From bbfa68d4e4446a8b2aede0001dd03a90a00cf53b Mon Sep 17 00:00:00 2001 From: Dsplib Date: Thu, 10 Oct 2019 21:02:33 +0300 Subject: [PATCH] ADDED Lapack and changed project structure --- .gitignore | 3 +- Makefile | 26 +- Makefile.dirs | 27 - Makefile.dspl | 40 +- Makefile.examples | 2 +- Makefile.verif | 2 +- dox/doxyfile_ru | 3 +- dspl.project.win.geany | 26 +- dspl/dspl_src/array.c | 5 +- dspl/dspl_src/blas.h | 366 +- dspl/dspl_src/matrix.c | 1 + dspl/{blas_obj => libblas}/.gitignore | 0 dspl/libblas/SRC/._Makefile | Bin 0 -> 227 bytes dspl/libblas/SRC/._caxpy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ccopy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cdotc.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cdotu.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cgbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cgemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cgemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cgerc.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cgeru.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cher.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cher2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cher2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cherk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chpr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._chpr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._crotg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._csrot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._csscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._cswap.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._csymm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._csyr2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._csyrk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctbsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctpsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctrmm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctrmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctrsm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ctrsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dasum.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._daxpy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dcabs1.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dcopy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ddot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dgbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dgemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dgemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dger.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dnrm2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._drot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._drotg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._drotm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._drotmg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsdot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dspmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dspr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dspr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dswap.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsymm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsymv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsyr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsyr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsyr2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dsyrk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtbsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtpsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtrmm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtrmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtrsm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dtrsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dzasum.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._dznrm2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._icamax.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._idamax.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._isamax.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._izamax.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._lsame.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._make.inc | Bin 0 -> 176 bytes dspl/libblas/SRC/._sasum.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._saxpy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._scabs1.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._scasum.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._scnrm2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._scopy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sdot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sdsdot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sgbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sgemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sgemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sger.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._snrm2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._srot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._srotg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._srotm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._srotmg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sspmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sspr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sspr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._sswap.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssymm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssymv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssyr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssyr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssyr2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ssyrk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._stbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._stbsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._stpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._stpsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._strmm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._strmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._strsm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._strsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._xerbla.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._xerbla_array.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zaxpy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zcopy.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zdotc.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zdotu.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zdrot.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zdscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zgbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zgemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zgemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zgerc.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zgeru.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhemm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhemv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zher.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zher2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zher2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zherk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhpr.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zhpr2.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zrotg.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zscal.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zswap.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zsymm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zsyr2k.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._zsyrk.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztbmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztbsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztpmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztpsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztrmm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztrmv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztrsm.f | Bin 0 -> 176 bytes dspl/libblas/SRC/._ztrsv.f | Bin 0 -> 176 bytes dspl/libblas/SRC/.gitignore | 9 + dspl/{blas_src => libblas/SRC}/Makefile | 0 dspl/{blas_src => libblas/SRC}/caxpy.f | 0 dspl/{blas_src => libblas/SRC}/ccopy.f | 0 dspl/{blas_src => libblas/SRC}/cdotc.f | 0 dspl/{blas_src => libblas/SRC}/cdotu.f | 0 dspl/{blas_src => libblas/SRC}/cgbmv.f | 0 dspl/{blas_src => libblas/SRC}/cgemm.f | 0 dspl/{blas_src => libblas/SRC}/cgemv.f | 0 dspl/{blas_src => libblas/SRC}/cgerc.f | 0 dspl/{blas_src => libblas/SRC}/cgeru.f | 0 dspl/{blas_src => libblas/SRC}/chbmv.f | 0 dspl/{blas_src => libblas/SRC}/chemm.f | 0 dspl/{blas_src => libblas/SRC}/chemv.f | 0 dspl/{blas_src => libblas/SRC}/cher.f | 0 dspl/{blas_src => libblas/SRC}/cher2.f | 0 dspl/{blas_src => libblas/SRC}/cher2k.f | 0 dspl/{blas_src => libblas/SRC}/cherk.f | 0 dspl/{blas_src => libblas/SRC}/chpmv.f | 0 dspl/{blas_src => libblas/SRC}/chpr.f | 0 dspl/{blas_src => libblas/SRC}/chpr2.f | 0 dspl/{blas_src => libblas/SRC}/crotg.f | 0 dspl/{blas_src => libblas/SRC}/cscal.f | 0 dspl/{blas_src => libblas/SRC}/csrot.f | 0 dspl/{blas_src => libblas/SRC}/csscal.f | 0 dspl/{blas_src => libblas/SRC}/cswap.f | 0 dspl/{blas_src => libblas/SRC}/csymm.f | 0 dspl/{blas_src => libblas/SRC}/csyr2k.f | 0 dspl/{blas_src => libblas/SRC}/csyrk.f | 0 dspl/{blas_src => libblas/SRC}/ctbmv.f | 0 dspl/{blas_src => libblas/SRC}/ctbsv.f | 0 dspl/{blas_src => libblas/SRC}/ctpmv.f | 0 dspl/{blas_src => libblas/SRC}/ctpsv.f | 0 dspl/{blas_src => libblas/SRC}/ctrmm.f | 0 dspl/{blas_src => libblas/SRC}/ctrmv.f | 0 dspl/{blas_src => libblas/SRC}/ctrsm.f | 0 dspl/{blas_src => libblas/SRC}/ctrsv.f | 0 dspl/{blas_src => libblas/SRC}/dasum.f | 0 dspl/{blas_src => libblas/SRC}/daxpy.f | 0 dspl/{blas_src => libblas/SRC}/dcabs1.f | 0 dspl/{blas_src => libblas/SRC}/dcopy.f | 0 dspl/{blas_src => libblas/SRC}/ddot.f | 0 dspl/{blas_src => libblas/SRC}/dgbmv.f | 0 dspl/{blas_src => libblas/SRC}/dgemm.f | 0 dspl/{blas_src => libblas/SRC}/dgemv.f | 0 dspl/{blas_src => libblas/SRC}/dger.f | 0 dspl/{blas_src => libblas/SRC}/dnrm2.f | 0 dspl/{blas_src => libblas/SRC}/drot.f | 0 dspl/{blas_src => libblas/SRC}/drotg.f | 0 dspl/{blas_src => libblas/SRC}/drotm.f | 0 dspl/{blas_src => libblas/SRC}/drotmg.f | 0 dspl/{blas_src => libblas/SRC}/dsbmv.f | 0 dspl/{blas_src => libblas/SRC}/dscal.f | 0 dspl/{blas_src => libblas/SRC}/dsdot.f | 0 dspl/{blas_src => libblas/SRC}/dspmv.f | 0 dspl/{blas_src => libblas/SRC}/dspr.f | 0 dspl/{blas_src => libblas/SRC}/dspr2.f | 0 dspl/{blas_src => libblas/SRC}/dswap.f | 0 dspl/{blas_src => libblas/SRC}/dsymm.f | 0 dspl/{blas_src => libblas/SRC}/dsymv.f | 0 dspl/{blas_src => libblas/SRC}/dsyr.f | 0 dspl/{blas_src => libblas/SRC}/dsyr2.f | 0 dspl/{blas_src => libblas/SRC}/dsyr2k.f | 0 dspl/{blas_src => libblas/SRC}/dsyrk.f | 0 dspl/{blas_src => libblas/SRC}/dtbmv.f | 0 dspl/{blas_src => libblas/SRC}/dtbsv.f | 0 dspl/{blas_src => libblas/SRC}/dtpmv.f | 0 dspl/{blas_src => libblas/SRC}/dtpsv.f | 0 dspl/{blas_src => libblas/SRC}/dtrmm.f | 0 dspl/{blas_src => libblas/SRC}/dtrmv.f | 0 dspl/{blas_src => libblas/SRC}/dtrsm.f | 0 dspl/{blas_src => libblas/SRC}/dtrsv.f | 0 dspl/{blas_src => libblas/SRC}/dzasum.f | 0 dspl/{blas_src => libblas/SRC}/dznrm2.f | 0 dspl/{blas_src => libblas/SRC}/icamax.f | 0 dspl/{blas_src => libblas/SRC}/idamax.f | 0 dspl/{blas_src => libblas/SRC}/isamax.f | 0 dspl/{blas_src => libblas/SRC}/izamax.f | 0 dspl/{blas_src => libblas/SRC}/lsame.f | 0 dspl/{blas_src => libblas/SRC}/make.inc | 2 +- dspl/{blas_src => libblas/SRC}/sasum.f | 0 dspl/{blas_src => libblas/SRC}/saxpy.f | 0 dspl/{blas_src => libblas/SRC}/scabs1.f | 0 dspl/{blas_src => libblas/SRC}/scasum.f | 0 dspl/{blas_src => libblas/SRC}/scnrm2.f | 0 dspl/{blas_src => libblas/SRC}/scopy.f | 0 dspl/{blas_src => libblas/SRC}/sdot.f | 0 dspl/{blas_src => libblas/SRC}/sdsdot.f | 0 dspl/{blas_src => libblas/SRC}/sgbmv.f | 0 dspl/{blas_src => libblas/SRC}/sgemm.f | 0 dspl/{blas_src => libblas/SRC}/sgemv.f | 0 dspl/{blas_src => libblas/SRC}/sger.f | 0 dspl/{blas_src => libblas/SRC}/snrm2.f | 0 dspl/{blas_src => libblas/SRC}/srot.f | 0 dspl/{blas_src => libblas/SRC}/srotg.f | 0 dspl/{blas_src => libblas/SRC}/srotm.f | 0 dspl/{blas_src => libblas/SRC}/srotmg.f | 0 dspl/{blas_src => libblas/SRC}/ssbmv.f | 0 dspl/{blas_src => libblas/SRC}/sscal.f | 0 dspl/{blas_src => libblas/SRC}/sspmv.f | 0 dspl/{blas_src => libblas/SRC}/sspr.f | 0 dspl/{blas_src => libblas/SRC}/sspr2.f | 0 dspl/{blas_src => libblas/SRC}/sswap.f | 0 dspl/{blas_src => libblas/SRC}/ssymm.f | 0 dspl/{blas_src => libblas/SRC}/ssymv.f | 0 dspl/{blas_src => libblas/SRC}/ssyr.f | 0 dspl/{blas_src => libblas/SRC}/ssyr2.f | 0 dspl/{blas_src => libblas/SRC}/ssyr2k.f | 0 dspl/{blas_src => libblas/SRC}/ssyrk.f | 0 dspl/{blas_src => libblas/SRC}/stbmv.f | 0 dspl/{blas_src => libblas/SRC}/stbsv.f | 0 dspl/{blas_src => libblas/SRC}/stpmv.f | 0 dspl/{blas_src => libblas/SRC}/stpsv.f | 0 dspl/{blas_src => libblas/SRC}/strmm.f | 0 dspl/{blas_src => libblas/SRC}/strmv.f | 0 dspl/{blas_src => libblas/SRC}/strsm.f | 0 dspl/{blas_src => libblas/SRC}/strsv.f | 0 dspl/{blas_src => libblas/SRC}/xerbla.f | 0 dspl/{blas_src => libblas/SRC}/xerbla_array.f | 0 dspl/{blas_src => libblas/SRC}/zaxpy.f | 0 dspl/{blas_src => libblas/SRC}/zcopy.f | 0 dspl/{blas_src => libblas/SRC}/zdotc.f | 0 dspl/{blas_src => libblas/SRC}/zdotu.f | 0 dspl/{blas_src => libblas/SRC}/zdrot.f | 0 dspl/{blas_src => libblas/SRC}/zdscal.f | 0 dspl/{blas_src => libblas/SRC}/zgbmv.f | 0 dspl/{blas_src => libblas/SRC}/zgemm.f | 0 dspl/{blas_src => libblas/SRC}/zgemv.f | 0 dspl/{blas_src => libblas/SRC}/zgerc.f | 0 dspl/{blas_src => libblas/SRC}/zgeru.f | 0 dspl/{blas_src => libblas/SRC}/zhbmv.f | 0 dspl/{blas_src => libblas/SRC}/zhemm.f | 0 dspl/{blas_src => libblas/SRC}/zhemv.f | 0 dspl/{blas_src => libblas/SRC}/zher.f | 0 dspl/{blas_src => libblas/SRC}/zher2.f | 0 dspl/{blas_src => libblas/SRC}/zher2k.f | 0 dspl/{blas_src => libblas/SRC}/zherk.f | 0 dspl/{blas_src => libblas/SRC}/zhpmv.f | 0 dspl/{blas_src => libblas/SRC}/zhpr.f | 0 dspl/{blas_src => libblas/SRC}/zhpr2.f | 0 dspl/{blas_src => libblas/SRC}/zrotg.f | 0 dspl/{blas_src => libblas/SRC}/zscal.f | 0 dspl/{blas_src => libblas/SRC}/zswap.f | 0 dspl/{blas_src => libblas/SRC}/zsymm.f | 0 dspl/{blas_src => libblas/SRC}/zsyr2k.f | 0 dspl/{blas_src => libblas/SRC}/zsyrk.f | 0 dspl/{blas_src => libblas/SRC}/ztbmv.f | 0 dspl/{blas_src => libblas/SRC}/ztbsv.f | 0 dspl/{blas_src => libblas/SRC}/ztpmv.f | 0 dspl/{blas_src => libblas/SRC}/ztpsv.f | 0 dspl/{blas_src => libblas/SRC}/ztrmm.f | 0 dspl/{blas_src => libblas/SRC}/ztrmv.f | 0 dspl/{blas_src => libblas/SRC}/ztrsm.f | 0 dspl/{blas_src => libblas/SRC}/ztrsv.f | 0 dspl/liblapack/INSTALL/.gitignore | 9 + dspl/liblapack/INSTALL/LAPACK_version.f | 41 + dspl/liblapack/INSTALL/Makefile | 50 + dspl/liblapack/INSTALL/dlamch.f | 189 + dspl/liblapack/INSTALL/dlamchf77.f | 919 ++++ dspl/liblapack/INSTALL/dlamchtst.f | 67 + dspl/liblapack/INSTALL/dsecnd_EXT_ETIME.f | 64 + dspl/liblapack/INSTALL/dsecnd_EXT_ETIME_.f | 61 + dspl/liblapack/INSTALL/dsecnd_INT_CPU_TIME.f | 61 + dspl/liblapack/INSTALL/dsecnd_INT_ETIME.f | 63 + dspl/liblapack/INSTALL/dsecnd_NONE.f | 52 + dspl/liblapack/INSTALL/dsecndtst.f | 127 + dspl/liblapack/INSTALL/ilaver.f | 66 + dspl/liblapack/INSTALL/lsame.f | 125 + dspl/liblapack/INSTALL/lsametst.f | 88 + dspl/liblapack/INSTALL/make.inc.ALPHA | 82 + dspl/liblapack/INSTALL/make.inc.HPPA | 82 + dspl/liblapack/INSTALL/make.inc.IRIX64 | 85 + dspl/liblapack/INSTALL/make.inc.O2K | 86 + dspl/liblapack/INSTALL/make.inc.SGI5 | 82 + dspl/liblapack/INSTALL/make.inc.SUN4 | 82 + dspl/liblapack/INSTALL/make.inc.SUN4SOL2 | 87 + dspl/liblapack/INSTALL/make.inc.XLF | 83 + dspl/liblapack/INSTALL/make.inc.gfortran | 85 + .../liblapack/INSTALL/make.inc.gfortran_debug | 85 + dspl/liblapack/INSTALL/make.inc.ifort | 81 + dspl/liblapack/INSTALL/make.inc.pgf95 | 81 + dspl/liblapack/INSTALL/make.inc.pghpf | 82 + dspl/liblapack/INSTALL/second_EXT_ETIME.f | 63 + dspl/liblapack/INSTALL/second_EXT_ETIME_.f | 62 + dspl/liblapack/INSTALL/second_INT_CPU_TIME.f | 61 + dspl/liblapack/INSTALL/second_INT_ETIME.f | 63 + dspl/liblapack/INSTALL/second_NONE.f | 52 + dspl/liblapack/INSTALL/secondtst.f | 123 + dspl/liblapack/INSTALL/slamch.f | 192 + dspl/liblapack/INSTALL/slamchf77.f | 924 ++++ dspl/liblapack/INSTALL/slamchtst.f | 63 + dspl/liblapack/INSTALL/tstiee.f | 770 ++++ dspl/liblapack/SRC/.gitignore | 9 + dspl/liblapack/SRC/DEPRECATED/cgegs.f | 531 +++ dspl/liblapack/SRC/DEPRECATED/cgegv.f | 706 ++++ dspl/liblapack/SRC/DEPRECATED/cgelsx.f | 447 ++ dspl/liblapack/SRC/DEPRECATED/cgeqpf.f | 313 ++ dspl/liblapack/SRC/DEPRECATED/cggsvd.f | 466 +++ dspl/liblapack/SRC/DEPRECATED/cggsvp.f | 536 +++ dspl/liblapack/SRC/DEPRECATED/clahrd.f | 292 ++ dspl/liblapack/SRC/DEPRECATED/clatzm.f | 225 + dspl/liblapack/SRC/DEPRECATED/ctzrqf.f | 241 ++ dspl/liblapack/SRC/DEPRECATED/dgegs.f | 541 +++ dspl/liblapack/SRC/DEPRECATED/dgegv.f | 769 ++++ dspl/liblapack/SRC/DEPRECATED/dgelsx.f | 435 ++ dspl/liblapack/SRC/DEPRECATED/dgeqpf.f | 306 ++ dspl/liblapack/SRC/DEPRECATED/dggsvd.f | 464 +++ dspl/liblapack/SRC/DEPRECATED/dggsvp.f | 522 +++ dspl/liblapack/SRC/DEPRECATED/dlahrd.f | 286 ++ dspl/liblapack/SRC/DEPRECATED/dlatzm.f | 221 + dspl/liblapack/SRC/DEPRECATED/dtzrqf.f | 232 ++ dspl/liblapack/SRC/DEPRECATED/sgegs.f | 541 +++ dspl/liblapack/SRC/DEPRECATED/sgegv.f | 769 ++++ dspl/liblapack/SRC/DEPRECATED/sgelsx.f | 435 ++ dspl/liblapack/SRC/DEPRECATED/sgeqpf.f | 306 ++ dspl/liblapack/SRC/DEPRECATED/sggsvd.f | 464 +++ dspl/liblapack/SRC/DEPRECATED/sggsvp.f | 522 +++ dspl/liblapack/SRC/DEPRECATED/slahrd.f | 286 ++ dspl/liblapack/SRC/DEPRECATED/slatzm.f | 221 + dspl/liblapack/SRC/DEPRECATED/stzrqf.f | 232 ++ dspl/liblapack/SRC/DEPRECATED/zgegs.f | 531 +++ dspl/liblapack/SRC/DEPRECATED/zgegv.f | 706 ++++ dspl/liblapack/SRC/DEPRECATED/zgelsx.f | 447 ++ dspl/liblapack/SRC/DEPRECATED/zgeqpf.f | 313 ++ dspl/liblapack/SRC/DEPRECATED/zggsvd.f | 465 +++ dspl/liblapack/SRC/DEPRECATED/zggsvp.f | 539 +++ dspl/liblapack/SRC/DEPRECATED/zlahrd.f | 292 ++ dspl/liblapack/SRC/DEPRECATED/zlatzm.f | 225 + dspl/liblapack/SRC/DEPRECATED/ztzrqf.f | 241 ++ dspl/liblapack/SRC/Makefile | 572 +++ dspl/liblapack/SRC/VARIANTS/Makefile | 66 + dspl/liblapack/SRC/VARIANTS/README | 84 + .../SRC/VARIANTS/cholesky/RL/cpotrf.f | 243 ++ .../SRC/VARIANTS/cholesky/RL/dpotrf.f | 242 ++ .../SRC/VARIANTS/cholesky/RL/spotrf.f | 242 ++ .../SRC/VARIANTS/cholesky/RL/zpotrf.f | 243 ++ .../SRC/VARIANTS/cholesky/TOP/cpotrf.f | 237 ++ .../SRC/VARIANTS/cholesky/TOP/dpotrf.f | 238 ++ .../SRC/VARIANTS/cholesky/TOP/spotrf.f | 237 ++ .../SRC/VARIANTS/cholesky/TOP/zpotrf.f | 237 ++ dspl/liblapack/SRC/VARIANTS/lu/CR/cgetrf.f | 223 + dspl/liblapack/SRC/VARIANTS/lu/CR/dgetrf.f | 223 + dspl/liblapack/SRC/VARIANTS/lu/CR/sgetrf.f | 223 + dspl/liblapack/SRC/VARIANTS/lu/CR/zgetrf.f | 223 + dspl/liblapack/SRC/VARIANTS/lu/LL/cgetrf.f | 248 ++ dspl/liblapack/SRC/VARIANTS/lu/LL/dgetrf.f | 247 ++ dspl/liblapack/SRC/VARIANTS/lu/LL/sgetrf.f | 248 ++ dspl/liblapack/SRC/VARIANTS/lu/LL/zgetrf.f | 248 ++ dspl/liblapack/SRC/VARIANTS/lu/REC/cgetrf.f | 281 ++ dspl/liblapack/SRC/VARIANTS/lu/REC/dgetrf.f | 277 ++ dspl/liblapack/SRC/VARIANTS/lu/REC/sgetrf.f | 277 ++ dspl/liblapack/SRC/VARIANTS/lu/REC/zgetrf.f | 281 ++ dspl/liblapack/SRC/VARIANTS/qr/LL/cgeqrf.f | 416 ++ dspl/liblapack/SRC/VARIANTS/qr/LL/dgeqrf.f | 417 ++ dspl/liblapack/SRC/VARIANTS/qr/LL/sceil.f | 87 + dspl/liblapack/SRC/VARIANTS/qr/LL/sgeqrf.f | 416 ++ dspl/liblapack/SRC/VARIANTS/qr/LL/zgeqrf.f | 416 ++ dspl/liblapack/SRC/cbbcsd.f | 1086 +++++ dspl/liblapack/SRC/cbdsqr.f | 842 ++++ dspl/liblapack/SRC/cgbbrd.f | 573 +++ dspl/liblapack/SRC/cgbcon.f | 320 ++ dspl/liblapack/SRC/cgbequ.f | 333 ++ dspl/liblapack/SRC/cgbequb.f | 350 ++ dspl/liblapack/SRC/cgbrfs.f | 475 +++ dspl/liblapack/SRC/cgbrfsx.f | 763 ++++ dspl/liblapack/SRC/cgbsv.f | 223 + dspl/liblapack/SRC/cgbsvx.f | 647 +++ dspl/liblapack/SRC/cgbsvxx.f | 803 ++++ dspl/liblapack/SRC/cgbtf2.f | 277 ++ dspl/liblapack/SRC/cgbtrf.f | 517 +++ dspl/liblapack/SRC/cgbtrs.f | 297 ++ dspl/liblapack/SRC/cgebak.f | 270 ++ dspl/liblapack/SRC/cgebal.f | 399 ++ dspl/liblapack/SRC/cgebd2.f | 332 ++ dspl/liblapack/SRC/cgebrd.f | 354 ++ dspl/liblapack/SRC/cgecon.f | 269 ++ dspl/liblapack/SRC/cgeequ.f | 313 ++ dspl/liblapack/SRC/cgeequb.f | 330 ++ dspl/liblapack/SRC/cgees.f | 424 ++ dspl/liblapack/SRC/cgeesx.f | 499 +++ dspl/liblapack/SRC/cgeev.f | 503 +++ dspl/liblapack/SRC/cgeevx.f | 667 +++ dspl/liblapack/SRC/cgehd2.f | 224 + dspl/liblapack/SRC/cgehrd.f | 356 ++ dspl/liblapack/SRC/cgejsv.f | 2235 ++++++++++ dspl/liblapack/SRC/cgelq.f | 306 ++ dspl/liblapack/SRC/cgelq2.f | 194 + dspl/liblapack/SRC/cgelqf.f | 269 ++ dspl/liblapack/SRC/cgelqt.f | 194 + dspl/liblapack/SRC/cgelqt3.f | 244 ++ dspl/liblapack/SRC/cgels.f | 505 +++ dspl/liblapack/SRC/cgelsd.f | 666 +++ dspl/liblapack/SRC/cgelss.f | 771 ++++ dspl/liblapack/SRC/cgelsy.f | 477 +++ dspl/liblapack/SRC/cgemlq.f | 283 ++ dspl/liblapack/SRC/cgemlqt.f | 272 ++ dspl/liblapack/SRC/cgemqr.f | 285 ++ dspl/liblapack/SRC/cgemqrt.f | 291 ++ dspl/liblapack/SRC/cgeql2.f | 192 + dspl/liblapack/SRC/cgeqlf.f | 287 ++ dspl/liblapack/SRC/cgeqp3.f | 372 ++ dspl/liblapack/SRC/cgeqr.f | 307 ++ dspl/liblapack/SRC/cgeqr2.f | 192 + dspl/liblapack/SRC/cgeqr2p.f | 195 + dspl/liblapack/SRC/cgeqrf.f | 270 ++ dspl/liblapack/SRC/cgeqrfp.f | 273 ++ dspl/liblapack/SRC/cgeqrt.f | 218 + dspl/liblapack/SRC/cgeqrt2.f | 227 + dspl/liblapack/SRC/cgeqrt3.f | 257 ++ dspl/liblapack/SRC/cgerfs.f | 448 ++ dspl/liblapack/SRC/cgerfsx.f | 734 ++++ dspl/liblapack/SRC/cgerq2.f | 195 + dspl/liblapack/SRC/cgerqf.f | 287 ++ dspl/liblapack/SRC/cgesc2.f | 202 + dspl/liblapack/SRC/cgesdd.f | 2220 ++++++++++ dspl/liblapack/SRC/cgesv.f | 179 + dspl/liblapack/SRC/cgesvd.f | 3706 +++++++++++++++++ dspl/liblapack/SRC/cgesvdx.f | 858 ++++ dspl/liblapack/SRC/cgesvj.f | 1442 +++++++ dspl/liblapack/SRC/cgesvx.f | 605 +++ dspl/liblapack/SRC/cgesvxx.f | 772 ++++ dspl/liblapack/SRC/cgetc2.f | 234 ++ dspl/liblapack/SRC/cgetf2.f | 214 + dspl/liblapack/SRC/cgetrf.f | 225 + dspl/liblapack/SRC/cgetrf2.f | 274 ++ dspl/liblapack/SRC/cgetri.f | 262 ++ dspl/liblapack/SRC/cgetrs.f | 225 + dspl/liblapack/SRC/cgetsls.f | 497 +++ dspl/liblapack/SRC/cggbak.f | 307 ++ dspl/liblapack/SRC/cggbal.f | 572 +++ dspl/liblapack/SRC/cgges.f | 599 +++ dspl/liblapack/SRC/cgges3.f | 597 +++ dspl/liblapack/SRC/cggesx.f | 718 ++++ dspl/liblapack/SRC/cggev.f | 558 +++ dspl/liblapack/SRC/cggev3.f | 560 +++ dspl/liblapack/SRC/cggevx.f | 804 ++++ dspl/liblapack/SRC/cggglm.f | 349 ++ dspl/liblapack/SRC/cgghd3.f | 900 ++++ dspl/liblapack/SRC/cgghrd.f | 361 ++ dspl/liblapack/SRC/cgglse.f | 355 ++ dspl/liblapack/SRC/cggqrf.f | 299 ++ dspl/liblapack/SRC/cggrqf.f | 299 ++ dspl/liblapack/SRC/cggsvd3.f | 506 +++ dspl/liblapack/SRC/cggsvp3.f | 579 +++ dspl/liblapack/SRC/cgsvj0.f | 935 +++++ dspl/liblapack/SRC/cgsvj1.f | 705 ++++ dspl/liblapack/SRC/cgtcon.f | 253 ++ dspl/liblapack/SRC/cgtrfs.f | 487 +++ dspl/liblapack/SRC/cgtsv.f | 244 ++ dspl/liblapack/SRC/cgtsvx.f | 416 ++ dspl/liblapack/SRC/cgttrf.f | 243 ++ dspl/liblapack/SRC/cgttrs.f | 225 + dspl/liblapack/SRC/cgtts2.f | 349 ++ dspl/liblapack/SRC/chb2st_kernels.f | 377 ++ dspl/liblapack/SRC/chbev.f | 294 ++ dspl/liblapack/SRC/chbev_2stage.f | 389 ++ dspl/liblapack/SRC/chbevd.f | 401 ++ dspl/liblapack/SRC/chbevd_2stage.f | 458 ++ dspl/liblapack/SRC/chbevx.f | 553 +++ dspl/liblapack/SRC/chbevx_2stage.f | 649 +++ dspl/liblapack/SRC/chbgst.f | 1469 +++++++ dspl/liblapack/SRC/chbgv.f | 287 ++ dspl/liblapack/SRC/chbgvd.f | 407 ++ dspl/liblapack/SRC/chbgvx.f | 535 +++ dspl/liblapack/SRC/chbtrd.f | 677 +++ dspl/liblapack/SRC/checon.f | 239 ++ dspl/liblapack/SRC/checon_3.f | 285 ++ dspl/liblapack/SRC/checon_rook.f | 253 ++ dspl/liblapack/SRC/cheequb.f | 343 ++ dspl/liblapack/SRC/cheev.f | 298 ++ dspl/liblapack/SRC/cheev_2stage.f | 355 ++ dspl/liblapack/SRC/cheevd.f | 398 ++ dspl/liblapack/SRC/cheevd_2stage.f | 455 ++ dspl/liblapack/SRC/cheevr.f | 724 ++++ dspl/liblapack/SRC/cheevr_2stage.f | 779 ++++ dspl/liblapack/SRC/cheevx.f | 564 +++ dspl/liblapack/SRC/cheevx_2stage.f | 622 +++ dspl/liblapack/SRC/chegs2.f | 296 ++ dspl/liblapack/SRC/chegst.f | 331 ++ dspl/liblapack/SRC/chegv.f | 321 ++ dspl/liblapack/SRC/chegv_2stage.f | 379 ++ dspl/liblapack/SRC/chegvd.f | 412 ++ dspl/liblapack/SRC/chegvx.f | 476 +++ dspl/liblapack/SRC/cherfs.f | 446 ++ dspl/liblapack/SRC/cherfsx.f | 700 ++++ dspl/liblapack/SRC/chesv.f | 271 ++ dspl/liblapack/SRC/chesv_aa.f | 252 ++ dspl/liblapack/SRC/chesv_aa_2stage.f | 276 ++ dspl/liblapack/SRC/chesv_rk.f | 316 ++ dspl/liblapack/SRC/chesv_rook.f | 295 ++ dspl/liblapack/SRC/chesvx.f | 417 ++ dspl/liblapack/SRC/chesvxx.f | 700 ++++ dspl/liblapack/SRC/cheswapr.f | 203 + dspl/liblapack/SRC/chetd2.f | 334 ++ dspl/liblapack/SRC/chetf2.f | 634 +++ dspl/liblapack/SRC/chetf2_rk.f | 1039 +++++ dspl/liblapack/SRC/chetf2_rook.f | 910 ++++ dspl/liblapack/SRC/chetrd.f | 378 ++ dspl/liblapack/SRC/chetrd_2stage.f | 337 ++ dspl/liblapack/SRC/chetrd_hb2st.F | 587 +++ dspl/liblapack/SRC/chetrd_he2hb.f | 517 +++ dspl/liblapack/SRC/chetrf.f | 357 ++ dspl/liblapack/SRC/chetrf_aa.f | 469 +++ dspl/liblapack/SRC/chetrf_aa_2stage.f | 664 +++ dspl/liblapack/SRC/chetrf_rk.f | 498 +++ dspl/liblapack/SRC/chetrf_rook.f | 397 ++ dspl/liblapack/SRC/chetri.f | 397 ++ dspl/liblapack/SRC/chetri2.f | 205 + dspl/liblapack/SRC/chetri2x.f | 590 +++ dspl/liblapack/SRC/chetri_3.f | 248 ++ dspl/liblapack/SRC/chetri_3x.f | 649 +++ dspl/liblapack/SRC/chetri_rook.f | 516 +++ dspl/liblapack/SRC/chetrs.f | 469 +++ dspl/liblapack/SRC/chetrs2.f | 359 ++ dspl/liblapack/SRC/chetrs_3.f | 374 ++ dspl/liblapack/SRC/chetrs_aa.f | 295 ++ dspl/liblapack/SRC/chetrs_aa_2stage.f | 283 ++ dspl/liblapack/SRC/chetrs_rook.f | 503 +++ dspl/liblapack/SRC/chfrk.f | 553 +++ dspl/liblapack/SRC/chgeqz.f | 875 ++++ dspl/liblapack/SRC/chla_transtype.f | 91 + dspl/liblapack/SRC/chpcon.f | 231 + dspl/liblapack/SRC/chpev.f | 276 ++ dspl/liblapack/SRC/chpevd.f | 378 ++ dspl/liblapack/SRC/chpevx.f | 507 +++ dspl/liblapack/SRC/chpgst.f | 281 ++ dspl/liblapack/SRC/chpgv.f | 282 ++ dspl/liblapack/SRC/chpgvd.f | 393 ++ dspl/liblapack/SRC/chpgvx.f | 421 ++ dspl/liblapack/SRC/chprfs.f | 438 ++ dspl/liblapack/SRC/chpsv.f | 224 + dspl/liblapack/SRC/chpsvx.f | 387 ++ dspl/liblapack/SRC/chptrd.f | 310 ++ dspl/liblapack/SRC/chptrf.f | 649 +++ dspl/liblapack/SRC/chptri.f | 410 ++ dspl/liblapack/SRC/chptrs.f | 474 +++ dspl/liblapack/SRC/chsein.f | 468 +++ dspl/liblapack/SRC/chseqr.f | 498 +++ dspl/liblapack/SRC/cla_gbamv.f | 422 ++ dspl/liblapack/SRC/cla_gbrcond_c.f | 342 ++ dspl/liblapack/SRC/cla_gbrcond_x.f | 319 ++ dspl/liblapack/SRC/cla_gbrfsx_extended.f | 713 ++++ dspl/liblapack/SRC/cla_gbrpvgrw.f | 167 + dspl/liblapack/SRC/cla_geamv.f | 406 ++ dspl/liblapack/SRC/cla_gercond_c.f | 317 ++ dspl/liblapack/SRC/cla_gercond_x.f | 293 ++ dspl/liblapack/SRC/cla_gerfsx_extended.f | 698 ++++ dspl/liblapack/SRC/cla_gerpvgrw.f | 147 + dspl/liblapack/SRC/cla_heamv.f | 426 ++ dspl/liblapack/SRC/cla_hercond_c.f | 327 ++ dspl/liblapack/SRC/cla_hercond_x.f | 297 ++ dspl/liblapack/SRC/cla_herfsx_extended.f | 716 ++++ dspl/liblapack/SRC/cla_herpvgrw.f | 330 ++ dspl/liblapack/SRC/cla_lin_berr.f | 160 + dspl/liblapack/SRC/cla_porcond_c.f | 319 ++ dspl/liblapack/SRC/cla_porcond_x.f | 288 ++ dspl/liblapack/SRC/cla_porfsx_extended.f | 687 +++ dspl/liblapack/SRC/cla_porpvgrw.f | 216 + dspl/liblapack/SRC/cla_syamv.f | 428 ++ dspl/liblapack/SRC/cla_syrcond_c.f | 328 ++ dspl/liblapack/SRC/cla_syrcond_x.f | 298 ++ dspl/liblapack/SRC/cla_syrfsx_extended.f | 716 ++++ dspl/liblapack/SRC/cla_syrpvgrw.f | 331 ++ dspl/liblapack/SRC/cla_wwaddw.f | 111 + dspl/liblapack/SRC/clabrd.f | 420 ++ dspl/liblapack/SRC/clacgv.f | 116 + dspl/liblapack/SRC/clacn2.f | 298 ++ dspl/liblapack/SRC/clacon.f | 281 ++ dspl/liblapack/SRC/clacp2.f | 161 + dspl/liblapack/SRC/clacpy.f | 159 + dspl/liblapack/SRC/clacrm.f | 185 + dspl/liblapack/SRC/clacrt.f | 160 + dspl/liblapack/SRC/cladiv.f | 97 + dspl/liblapack/SRC/claed0.f | 371 ++ dspl/liblapack/SRC/claed7.f | 385 ++ dspl/liblapack/SRC/claed8.f | 486 +++ dspl/liblapack/SRC/claein.f | 354 ++ dspl/liblapack/SRC/claesy.f | 221 + dspl/liblapack/SRC/claev2.f | 165 + dspl/liblapack/SRC/clag2z.f | 136 + dspl/liblapack/SRC/clags2.f | 395 ++ dspl/liblapack/SRC/clagtm.f | 321 ++ dspl/liblapack/SRC/clahef.f | 973 +++++ dspl/liblapack/SRC/clahef_aa.f | 501 +++ dspl/liblapack/SRC/clahef_rk.f | 1234 ++++++ dspl/liblapack/SRC/clahef_rook.f | 1176 ++++++ dspl/liblapack/SRC/clahqr.f | 570 +++ dspl/liblapack/SRC/clahr2.f | 328 ++ dspl/liblapack/SRC/claic1.f | 371 ++ dspl/liblapack/SRC/clals0.f | 556 +++ dspl/liblapack/SRC/clalsa.f | 635 +++ dspl/liblapack/SRC/clalsd.f | 690 +++ dspl/liblapack/SRC/clamswlq.f | 417 ++ dspl/liblapack/SRC/clamtsqr.f | 414 ++ dspl/liblapack/SRC/clangb.f | 226 + dspl/liblapack/SRC/clange.f | 213 + dspl/liblapack/SRC/clangt.f | 208 + dspl/liblapack/SRC/clanhb.f | 276 ++ dspl/liblapack/SRC/clanhe.f | 258 ++ dspl/liblapack/SRC/clanhf.f | 1573 +++++++ dspl/liblapack/SRC/clanhp.f | 269 ++ dspl/liblapack/SRC/clanhs.f | 207 + dspl/liblapack/SRC/clanht.f | 188 + dspl/liblapack/SRC/clansb.f | 260 ++ dspl/liblapack/SRC/clansp.f | 272 ++ dspl/liblapack/SRC/clansy.f | 243 ++ dspl/liblapack/SRC/clantb.f | 363 ++ dspl/liblapack/SRC/clantp.f | 357 ++ dspl/liblapack/SRC/clantr.f | 355 ++ dspl/liblapack/SRC/clapll.f | 169 + dspl/liblapack/SRC/clapmr.f | 204 + dspl/liblapack/SRC/clapmt.f | 203 + dspl/liblapack/SRC/claqgb.f | 258 ++ dspl/liblapack/SRC/claqge.f | 238 ++ dspl/liblapack/SRC/claqhb.f | 230 + dspl/liblapack/SRC/claqhe.f | 223 + dspl/liblapack/SRC/claqhp.f | 219 + dspl/liblapack/SRC/claqp2.f | 266 ++ dspl/liblapack/SRC/claqps.f | 371 ++ dspl/liblapack/SRC/claqr0.f | 701 ++++ dspl/liblapack/SRC/claqr1.f | 172 + dspl/liblapack/SRC/claqr2.f | 566 +++ dspl/liblapack/SRC/claqr3.f | 577 +++ dspl/liblapack/SRC/claqr4.f | 705 ++++ dspl/liblapack/SRC/claqr5.f | 907 ++++ dspl/liblapack/SRC/claqsb.f | 228 + dspl/liblapack/SRC/claqsp.f | 214 + dspl/liblapack/SRC/claqsy.f | 218 + dspl/liblapack/SRC/clar1v.f | 488 +++ dspl/liblapack/SRC/clar2v.f | 169 + dspl/liblapack/SRC/clarcm.f | 185 + dspl/liblapack/SRC/clarf.f | 232 ++ dspl/liblapack/SRC/clarfb.f | 731 ++++ dspl/liblapack/SRC/clarfg.f | 203 + dspl/liblapack/SRC/clarfgp.f | 272 ++ dspl/liblapack/SRC/clarft.f | 328 ++ dspl/liblapack/SRC/clarfx.f | 699 ++++ dspl/liblapack/SRC/clarfy.f | 163 + dspl/liblapack/SRC/clargv.f | 299 ++ dspl/liblapack/SRC/clarnv.f | 193 + dspl/liblapack/SRC/clarrv.f | 1060 +++++ dspl/liblapack/SRC/clarscl2.f | 122 + dspl/liblapack/SRC/clartg.f | 250 ++ dspl/liblapack/SRC/clartv.f | 150 + dspl/liblapack/SRC/clarz.f | 241 ++ dspl/liblapack/SRC/clarzb.f | 337 ++ dspl/liblapack/SRC/clarzt.f | 266 ++ dspl/liblapack/SRC/clascl.f | 368 ++ dspl/liblapack/SRC/clascl2.f | 122 + dspl/liblapack/SRC/claset.f | 184 + dspl/liblapack/SRC/clasr.f | 439 ++ dspl/liblapack/SRC/classq.f | 168 + dspl/liblapack/SRC/claswlq.f | 262 ++ dspl/liblapack/SRC/claswp.f | 193 + dspl/liblapack/SRC/clasyf.f | 832 ++++ dspl/liblapack/SRC/clasyf_aa.f | 493 +++ dspl/liblapack/SRC/clasyf_rk.f | 974 +++++ dspl/liblapack/SRC/clasyf_rook.f | 900 ++++ dspl/liblapack/SRC/clatbs.f | 998 +++++ dspl/liblapack/SRC/clatdf.f | 323 ++ dspl/liblapack/SRC/clatps.f | 978 +++++ dspl/liblapack/SRC/clatrd.f | 358 ++ dspl/liblapack/SRC/clatrs.f | 966 +++++ dspl/liblapack/SRC/clatrz.f | 206 + dspl/liblapack/SRC/clatsqr.f | 255 ++ dspl/liblapack/SRC/clauu2.f | 206 + dspl/liblapack/SRC/clauum.f | 223 + dspl/liblapack/SRC/cpbcon.f | 277 ++ dspl/liblapack/SRC/cpbequ.f | 244 ++ dspl/liblapack/SRC/cpbrfs.f | 448 ++ dspl/liblapack/SRC/cpbstf.f | 332 ++ dspl/liblapack/SRC/cpbsv.f | 229 + dspl/liblapack/SRC/cpbsvx.f | 543 +++ dspl/liblapack/SRC/cpbtf2.f | 269 ++ dspl/liblapack/SRC/cpbtrf.f | 442 ++ dspl/liblapack/SRC/cpbtrs.f | 220 + dspl/liblapack/SRC/cpftrf.f | 471 +++ dspl/liblapack/SRC/cpftri.f | 445 ++ dspl/liblapack/SRC/cpftrs.f | 301 ++ dspl/liblapack/SRC/cpocon.f | 260 ++ dspl/liblapack/SRC/cpoequ.f | 207 + dspl/liblapack/SRC/cpoequb.f | 223 + dspl/liblapack/SRC/cporfs.f | 436 ++ dspl/liblapack/SRC/cporfsx.f | 693 +++ dspl/liblapack/SRC/cposv.f | 193 + dspl/liblapack/SRC/cposvx.f | 492 +++ dspl/liblapack/SRC/cposvxx.f | 683 +++ dspl/liblapack/SRC/cpotf2.f | 237 ++ dspl/liblapack/SRC/cpotrf.f | 249 ++ dspl/liblapack/SRC/cpotrf2.f | 246 ++ dspl/liblapack/SRC/cpotri.f | 159 + dspl/liblapack/SRC/cpotrs.f | 204 + dspl/liblapack/SRC/cppcon.f | 255 ++ dspl/liblapack/SRC/cppequ.f | 240 ++ dspl/liblapack/SRC/cpprfs.f | 428 ++ dspl/liblapack/SRC/cppsv.f | 205 + dspl/liblapack/SRC/cppsvx.f | 493 +++ dspl/liblapack/SRC/cpptrf.f | 241 ++ dspl/liblapack/SRC/cpptri.f | 190 + dspl/liblapack/SRC/cpptrs.f | 203 + dspl/liblapack/SRC/cpstf2.f | 406 ++ dspl/liblapack/SRC/cpstrf.f | 463 ++ dspl/liblapack/SRC/cptcon.f | 223 + dspl/liblapack/SRC/cpteqr.f | 263 ++ dspl/liblapack/SRC/cptrfs.f | 468 +++ dspl/liblapack/SRC/cptsv.f | 169 + dspl/liblapack/SRC/cptsvx.f | 343 ++ dspl/liblapack/SRC/cpttrf.f | 228 + dspl/liblapack/SRC/cpttrs.f | 208 + dspl/liblapack/SRC/cptts2.f | 245 ++ dspl/liblapack/SRC/crot.f | 162 + dspl/liblapack/SRC/cspcon.f | 231 + dspl/liblapack/SRC/cspmv.f | 340 ++ dspl/liblapack/SRC/cspr.f | 280 ++ dspl/liblapack/SRC/csprfs.f | 437 ++ dspl/liblapack/SRC/cspsv.f | 224 + dspl/liblapack/SRC/cspsvx.f | 387 ++ dspl/liblapack/SRC/csptrf.f | 619 +++ dspl/liblapack/SRC/csptri.f | 404 ++ dspl/liblapack/SRC/csptrs.f | 450 ++ dspl/liblapack/SRC/csrscl.f | 174 + dspl/liblapack/SRC/cstedc.f | 486 +++ dspl/liblapack/SRC/cstegr.f | 302 ++ dspl/liblapack/SRC/cstein.f | 469 +++ dspl/liblapack/SRC/cstemr.f | 791 ++++ dspl/liblapack/SRC/csteqr.f | 576 +++ dspl/liblapack/SRC/csycon.f | 239 ++ dspl/liblapack/SRC/csycon_3.f | 287 ++ dspl/liblapack/SRC/csycon_rook.f | 255 ++ dspl/liblapack/SRC/csyconv.f | 366 ++ dspl/liblapack/SRC/csyconvf.f | 562 +++ dspl/liblapack/SRC/csyconvf_rook.f | 547 +++ dspl/liblapack/SRC/csyequb.f | 343 ++ dspl/liblapack/SRC/csymv.f | 343 ++ dspl/liblapack/SRC/csyr.f | 268 ++ dspl/liblapack/SRC/csyrfs.f | 446 ++ dspl/liblapack/SRC/csyrfsx.f | 703 ++++ dspl/liblapack/SRC/csysv.f | 270 ++ dspl/liblapack/SRC/csysv_aa.f | 254 ++ dspl/liblapack/SRC/csysv_aa_2stage.f | 276 ++ dspl/liblapack/SRC/csysv_rk.f | 316 ++ dspl/liblapack/SRC/csysv_rook.f | 293 ++ dspl/liblapack/SRC/csysvx.f | 417 ++ dspl/liblapack/SRC/csysvxx.f | 701 ++++ dspl/liblapack/SRC/csyswapr.f | 193 + dspl/liblapack/SRC/csytf2.f | 611 +++ dspl/liblapack/SRC/csytf2_rk.f | 952 +++++ dspl/liblapack/SRC/csytf2_rook.f | 821 ++++ dspl/liblapack/SRC/csytrf.f | 362 ++ dspl/liblapack/SRC/csytrf_aa.f | 467 +++ dspl/liblapack/SRC/csytrf_aa_2stage.f | 668 +++ dspl/liblapack/SRC/csytrf_rk.f | 498 +++ dspl/liblapack/SRC/csytrf_rook.f | 393 ++ dspl/liblapack/SRC/csytri.f | 383 ++ dspl/liblapack/SRC/csytri2.f | 205 + dspl/liblapack/SRC/csytri2x.f | 588 +++ dspl/liblapack/SRC/csytri_3.f | 248 ++ dspl/liblapack/SRC/csytri_3x.f | 647 +++ dspl/liblapack/SRC/csytri_rook.f | 451 ++ dspl/liblapack/SRC/csytrs.f | 445 ++ dspl/liblapack/SRC/csytrs2.f | 361 ++ dspl/liblapack/SRC/csytrs_3.f | 371 ++ dspl/liblapack/SRC/csytrs_aa.f | 285 ++ dspl/liblapack/SRC/csytrs_aa_2stage.f | 281 ++ dspl/liblapack/SRC/csytrs_rook.f | 484 +++ dspl/liblapack/SRC/ctbcon.f | 291 ++ dspl/liblapack/SRC/ctbrfs.f | 497 +++ dspl/liblapack/SRC/ctbtrs.f | 244 ++ dspl/liblapack/SRC/ctfsm.f | 1026 +++++ dspl/liblapack/SRC/ctftri.f | 492 +++ dspl/liblapack/SRC/ctfttp.f | 543 +++ dspl/liblapack/SRC/ctfttr.f | 538 +++ dspl/liblapack/SRC/ctgevc.f | 737 ++++ dspl/liblapack/SRC/ctgex2.f | 363 ++ dspl/liblapack/SRC/ctgexc.f | 300 ++ dspl/liblapack/SRC/ctgsen.f | 784 ++++ dspl/liblapack/SRC/ctgsja.f | 665 +++ dspl/liblapack/SRC/ctgsna.f | 519 +++ dspl/liblapack/SRC/ctgsy2.f | 472 +++ dspl/liblapack/SRC/ctgsyl.f | 695 ++++ dspl/liblapack/SRC/ctpcon.f | 274 ++ dspl/liblapack/SRC/ctplqt.f | 253 ++ dspl/liblapack/SRC/ctplqt2.f | 316 ++ dspl/liblapack/SRC/ctpmlqt.f | 349 ++ dspl/liblapack/SRC/ctpmqrt.f | 368 ++ dspl/liblapack/SRC/ctpqrt.f | 270 ++ dspl/liblapack/SRC/ctpqrt2.f | 302 ++ dspl/liblapack/SRC/ctprfb.f | 814 ++++ dspl/liblapack/SRC/ctprfs.f | 484 +++ dspl/liblapack/SRC/ctptri.f | 242 ++ dspl/liblapack/SRC/ctptrs.f | 228 + dspl/liblapack/SRC/ctpttf.f | 539 +++ dspl/liblapack/SRC/ctpttr.f | 176 + dspl/liblapack/SRC/ctrcon.f | 283 ++ dspl/liblapack/SRC/ctrevc.f | 486 +++ dspl/liblapack/SRC/ctrevc3.f | 631 +++ dspl/liblapack/SRC/ctrexc.f | 240 ++ dspl/liblapack/SRC/ctrrfs.f | 479 +++ dspl/liblapack/SRC/ctrsen.f | 456 ++ dspl/liblapack/SRC/ctrsna.f | 464 +++ dspl/liblapack/SRC/ctrsyl.f | 454 ++ dspl/liblapack/SRC/ctrti2.f | 212 + dspl/liblapack/SRC/ctrtri.f | 243 ++ dspl/liblapack/SRC/ctrtrs.f | 227 + dspl/liblapack/SRC/ctrttf.f | 537 +++ dspl/liblapack/SRC/ctrttp.f | 176 + dspl/liblapack/SRC/ctzrzf.f | 313 ++ dspl/liblapack/SRC/cunbdb.f | 687 +++ dspl/liblapack/SRC/cunbdb1.f | 326 ++ dspl/liblapack/SRC/cunbdb2.f | 338 ++ dspl/liblapack/SRC/cunbdb3.f | 336 ++ dspl/liblapack/SRC/cunbdb4.f | 385 ++ dspl/liblapack/SRC/cunbdb5.f | 274 ++ dspl/liblapack/SRC/cunbdb6.f | 313 ++ dspl/liblapack/SRC/cuncsd.f | 658 +++ dspl/liblapack/SRC/cuncsd2by1.f | 774 ++++ dspl/liblapack/SRC/cung2l.f | 199 + dspl/liblapack/SRC/cung2r.f | 201 + dspl/liblapack/SRC/cungbr.f | 338 ++ dspl/liblapack/SRC/cunghr.f | 241 ++ dspl/liblapack/SRC/cungl2.f | 207 + dspl/liblapack/SRC/cunglq.f | 289 ++ dspl/liblapack/SRC/cungql.f | 296 ++ dspl/liblapack/SRC/cungqr.f | 290 ++ dspl/liblapack/SRC/cungr2.f | 205 + dspl/liblapack/SRC/cungrq.f | 297 ++ dspl/liblapack/SRC/cungtr.f | 256 ++ dspl/liblapack/SRC/cunm22.f | 440 ++ dspl/liblapack/SRC/cunm2l.f | 281 ++ dspl/liblapack/SRC/cunm2r.f | 286 ++ dspl/liblapack/SRC/cunmbr.f | 381 ++ dspl/liblapack/SRC/cunmhr.f | 296 ++ dspl/liblapack/SRC/cunml2.f | 290 ++ dspl/liblapack/SRC/cunmlq.f | 354 ++ dspl/liblapack/SRC/cunmql.f | 343 ++ dspl/liblapack/SRC/cunmqr.f | 342 ++ dspl/liblapack/SRC/cunmr2.f | 283 ++ dspl/liblapack/SRC/cunmr3.f | 305 ++ dspl/liblapack/SRC/cunmrq.f | 348 ++ dspl/liblapack/SRC/cunmrz.f | 383 ++ dspl/liblapack/SRC/cunmtr.f | 312 ++ dspl/liblapack/SRC/cupgtr.f | 233 ++ dspl/liblapack/SRC/cupmtr.f | 349 ++ dspl/liblapack/SRC/dbbcsd.f | 1080 +++++ dspl/liblapack/SRC/dbdsdc.f | 524 +++ dspl/liblapack/SRC/dbdsqr.f | 867 ++++ dspl/liblapack/SRC/dbdsvdx.f | 792 ++++ dspl/liblapack/SRC/ddisna.f | 245 ++ dspl/liblapack/SRC/dgbbrd.f | 547 +++ dspl/liblapack/SRC/dgbcon.f | 311 ++ dspl/liblapack/SRC/dgbequ.f | 324 ++ dspl/liblapack/SRC/dgbequb.f | 340 ++ dspl/liblapack/SRC/dgbrfs.f | 464 +++ dspl/liblapack/SRC/dgbrfsx.f | 765 ++++ dspl/liblapack/SRC/dgbsv.f | 223 + dspl/liblapack/SRC/dgbsvx.f | 642 +++ dspl/liblapack/SRC/dgbsvxx.f | 799 ++++ dspl/liblapack/SRC/dgbtf2.f | 277 ++ dspl/liblapack/SRC/dgbtrf.f | 516 +++ dspl/liblapack/SRC/dgbtrs.f | 269 ++ dspl/liblapack/SRC/dgebak.f | 268 ++ dspl/liblapack/SRC/dgebal.f | 398 ++ dspl/liblapack/SRC/dgebd2.f | 320 ++ dspl/liblapack/SRC/dgebrd.f | 352 ++ dspl/liblapack/SRC/dgecon.f | 261 ++ dspl/liblapack/SRC/dgeequ.f | 304 ++ dspl/liblapack/SRC/dgeequb.f | 321 ++ dspl/liblapack/SRC/dgees.f | 535 +++ dspl/liblapack/SRC/dgeesx.f | 649 +++ dspl/liblapack/SRC/dgeev.f | 529 +++ dspl/liblapack/SRC/dgeevx.f | 694 +++ dspl/liblapack/SRC/dgehd2.f | 225 + dspl/liblapack/SRC/dgehrd.f | 356 ++ dspl/liblapack/SRC/dgejsv.f | 1783 ++++++++ dspl/liblapack/SRC/dgelq.f | 306 ++ dspl/liblapack/SRC/dgelq2.f | 192 + dspl/liblapack/SRC/dgelqf.f | 269 ++ dspl/liblapack/SRC/dgelqt.f | 210 + dspl/liblapack/SRC/dgelqt3.f | 259 ++ dspl/liblapack/SRC/dgels.f | 504 +++ dspl/liblapack/SRC/dgelsd.f | 629 +++ dspl/liblapack/SRC/dgelss.f | 747 ++++ dspl/liblapack/SRC/dgelsy.f | 479 +++ dspl/liblapack/SRC/dgemlq.f | 284 ++ dspl/liblapack/SRC/dgemlqt.f | 289 ++ dspl/liblapack/SRC/dgemqr.f | 285 ++ dspl/liblapack/SRC/dgemqrt.f | 291 ++ dspl/liblapack/SRC/dgeql2.f | 193 + dspl/liblapack/SRC/dgeqlf.f | 287 ++ dspl/liblapack/SRC/dgeqp3.f | 361 ++ dspl/liblapack/SRC/dgeqr.f | 307 ++ dspl/liblapack/SRC/dgeqr2.f | 192 + dspl/liblapack/SRC/dgeqr2p.f | 195 + dspl/liblapack/SRC/dgeqrf.f | 270 ++ dspl/liblapack/SRC/dgeqrfp.f | 273 ++ dspl/liblapack/SRC/dgeqrt.f | 218 + dspl/liblapack/SRC/dgeqrt2.f | 227 + dspl/liblapack/SRC/dgeqrt3.f | 257 ++ dspl/liblapack/SRC/dgerfs.f | 438 ++ dspl/liblapack/SRC/dgerfsx.f | 731 ++++ dspl/liblapack/SRC/dgerq2.f | 193 + dspl/liblapack/SRC/dgerqf.f | 287 ++ dspl/liblapack/SRC/dgesc2.f | 201 + dspl/liblapack/SRC/dgesdd.f | 1548 +++++++ dspl/liblapack/SRC/dgesv.f | 179 + dspl/liblapack/SRC/dgesvd.f | 3504 ++++++++++++++++ dspl/liblapack/SRC/dgesvdx.f | 834 ++++ dspl/liblapack/SRC/dgesvj.f | 1615 +++++++ dspl/liblapack/SRC/dgesvx.f | 602 +++ dspl/liblapack/SRC/dgesvxx.f | 769 ++++ dspl/liblapack/SRC/dgetc2.f | 234 ++ dspl/liblapack/SRC/dgetf2.f | 213 + dspl/liblapack/SRC/dgetrf.f | 225 + dspl/liblapack/SRC/dgetrf2.f | 272 ++ dspl/liblapack/SRC/dgetri.f | 261 ++ dspl/liblapack/SRC/dgetrs.f | 225 + dspl/liblapack/SRC/dgetsls.f | 494 +++ dspl/liblapack/SRC/dggbak.f | 306 ++ dspl/liblapack/SRC/dggbal.f | 559 +++ dspl/liblapack/SRC/dgges.f | 682 +++ dspl/liblapack/SRC/dgges3.f | 674 +++ dspl/liblapack/SRC/dggesx.f | 820 ++++ dspl/liblapack/SRC/dggev.f | 592 +++ dspl/liblapack/SRC/dggev3.f | 594 +++ dspl/liblapack/SRC/dggevx.f | 868 ++++ dspl/liblapack/SRC/dggglm.f | 348 ++ dspl/liblapack/SRC/dgghd3.f | 897 ++++ dspl/liblapack/SRC/dgghrd.f | 361 ++ dspl/liblapack/SRC/dgglse.f | 354 ++ dspl/liblapack/SRC/dggqrf.f | 299 ++ dspl/liblapack/SRC/dggrqf.f | 299 ++ dspl/liblapack/SRC/dggsvd3.f | 503 +++ dspl/liblapack/SRC/dggsvp3.f | 571 +++ dspl/liblapack/SRC/dgsvj0.f | 1079 +++++ dspl/liblapack/SRC/dgsvj1.f | 784 ++++ dspl/liblapack/SRC/dgtcon.f | 255 ++ dspl/liblapack/SRC/dgtrfs.f | 474 +++ dspl/liblapack/SRC/dgtsv.f | 333 ++ dspl/liblapack/SRC/dgtsvx.f | 414 ++ dspl/liblapack/SRC/dgttrf.f | 237 ++ dspl/liblapack/SRC/dgttrs.f | 223 + dspl/liblapack/SRC/dgtts2.f | 274 ++ dspl/liblapack/SRC/dhgeqz.f | 1367 ++++++ dspl/liblapack/SRC/dhsein.f | 530 +++ dspl/liblapack/SRC/dhseqr.f | 516 +++ dspl/liblapack/SRC/disnan.f | 80 + dspl/liblapack/SRC/dla_gbamv.f | 411 ++ dspl/liblapack/SRC/dla_gbrcond.f | 353 ++ dspl/liblapack/SRC/dla_gbrfsx_extended.f | 707 ++++ dspl/liblapack/SRC/dla_gbrpvgrw.f | 160 + dspl/liblapack/SRC/dla_geamv.f | 396 ++ dspl/liblapack/SRC/dla_gercond.f | 329 ++ dspl/liblapack/SRC/dla_gerfsx_extended.f | 685 +++ dspl/liblapack/SRC/dla_gerpvgrw.f | 142 + dspl/liblapack/SRC/dla_lin_berr.f | 153 + dspl/liblapack/SRC/dla_porcond.f | 328 ++ dspl/liblapack/SRC/dla_porfsx_extended.f | 679 +++ dspl/liblapack/SRC/dla_porpvgrw.f | 210 + dspl/liblapack/SRC/dla_syamv.f | 417 ++ dspl/liblapack/SRC/dla_syrcond.f | 341 ++ dspl/liblapack/SRC/dla_syrfsx_extended.f | 708 ++++ dspl/liblapack/SRC/dla_syrpvgrw.f | 320 ++ dspl/liblapack/SRC/dla_wwaddw.f | 111 + dspl/liblapack/SRC/dlabad.f | 105 + dspl/liblapack/SRC/dlabrd.f | 381 ++ dspl/liblapack/SRC/dlacn2.f | 294 ++ dspl/liblapack/SRC/dlacon.f | 275 ++ dspl/liblapack/SRC/dlacpy.f | 156 + dspl/liblapack/SRC/dladiv.f | 256 ++ dspl/liblapack/SRC/dlae2.f | 185 + dspl/liblapack/SRC/dlaebz.f | 649 +++ dspl/liblapack/SRC/dlaed0.f | 434 ++ dspl/liblapack/SRC/dlaed1.f | 274 ++ dspl/liblapack/SRC/dlaed2.f | 539 +++ dspl/liblapack/SRC/dlaed3.f | 353 ++ dspl/liblapack/SRC/dlaed4.f | 917 ++++ dspl/liblapack/SRC/dlaed5.f | 189 + dspl/liblapack/SRC/dlaed6.f | 410 ++ dspl/liblapack/SRC/dlaed7.f | 407 ++ dspl/liblapack/SRC/dlaed8.f | 524 +++ dspl/liblapack/SRC/dlaed9.f | 294 ++ dspl/liblapack/SRC/dlaeda.f | 308 ++ dspl/liblapack/SRC/dlaein.f | 632 +++ dspl/liblapack/SRC/dlaev2.f | 238 ++ dspl/liblapack/SRC/dlaexc.f | 436 ++ dspl/liblapack/SRC/dlag2.f | 379 ++ dspl/liblapack/SRC/dlag2s.f | 152 + dspl/liblapack/SRC/dlags2.f | 362 ++ dspl/liblapack/SRC/dlagtf.f | 266 ++ dspl/liblapack/SRC/dlagtm.f | 278 ++ dspl/liblapack/SRC/dlagts.f | 383 ++ dspl/liblapack/SRC/dlagv2.f | 374 ++ dspl/liblapack/SRC/dlahqr.f | 613 +++ dspl/liblapack/SRC/dlahr2.f | 326 ++ dspl/liblapack/SRC/dlaic1.f | 367 ++ dspl/liblapack/SRC/dlaisnan.f | 91 + dspl/liblapack/SRC/dlaln2.f | 611 +++ dspl/liblapack/SRC/dlals0.f | 499 +++ dspl/liblapack/SRC/dlalsa.f | 493 +++ dspl/liblapack/SRC/dlalsd.f | 523 +++ dspl/liblapack/SRC/dlamrg.f | 171 + dspl/liblapack/SRC/dlamswlq.f | 418 ++ dspl/liblapack/SRC/dlamtsqr.f | 415 ++ dspl/liblapack/SRC/dlaneg.f | 227 + dspl/liblapack/SRC/dlangb.f | 225 + dspl/liblapack/SRC/dlange.f | 211 + dspl/liblapack/SRC/dlangt.f | 208 + dspl/liblapack/SRC/dlanhs.f | 205 + dspl/liblapack/SRC/dlansb.f | 258 ++ dspl/liblapack/SRC/dlansf.f | 963 +++++ dspl/liblapack/SRC/dlansp.f | 261 ++ dspl/liblapack/SRC/dlanst.f | 186 + dspl/liblapack/SRC/dlansy.f | 241 ++ dspl/liblapack/SRC/dlantb.f | 361 ++ dspl/liblapack/SRC/dlantp.f | 355 ++ dspl/liblapack/SRC/dlantr.f | 353 ++ dspl/liblapack/SRC/dlanv2.f | 289 ++ dspl/liblapack/SRC/dlapll.f | 165 + dspl/liblapack/SRC/dlapmr.f | 204 + dspl/liblapack/SRC/dlapmt.f | 203 + dspl/liblapack/SRC/dlapy2.f | 116 + dspl/liblapack/SRC/dlapy3.f | 111 + dspl/liblapack/SRC/dlaqgb.f | 256 ++ dspl/liblapack/SRC/dlaqge.f | 236 ++ dspl/liblapack/SRC/dlaqp2.f | 262 ++ dspl/liblapack/SRC/dlaqps.f | 358 ++ dspl/liblapack/SRC/dlaqr0.f | 740 ++++ dspl/liblapack/SRC/dlaqr1.f | 179 + dspl/liblapack/SRC/dlaqr2.f | 684 +++ dspl/liblapack/SRC/dlaqr3.f | 695 ++++ dspl/liblapack/SRC/dlaqr4.f | 739 ++++ dspl/liblapack/SRC/dlaqr5.f | 919 ++++ dspl/liblapack/SRC/dlaqsb.f | 226 + dspl/liblapack/SRC/dlaqsp.f | 212 + dspl/liblapack/SRC/dlaqsy.f | 216 + dspl/liblapack/SRC/dlaqtr.f | 748 ++++ dspl/liblapack/SRC/dlar1v.f | 486 +++ dspl/liblapack/SRC/dlar2v.f | 157 + dspl/liblapack/SRC/dlarf.f | 227 + dspl/liblapack/SRC/dlarfb.f | 710 ++++ dspl/liblapack/SRC/dlarfg.f | 196 + dspl/liblapack/SRC/dlarfgp.f | 242 ++ dspl/liblapack/SRC/dlarft.f | 326 ++ dspl/liblapack/SRC/dlarfx.f | 697 ++++ dspl/liblapack/SRC/dlarfy.f | 161 + dspl/liblapack/SRC/dlargv.f | 167 + dspl/liblapack/SRC/dlarnv.f | 178 + dspl/liblapack/SRC/dlarra.f | 210 + dspl/liblapack/SRC/dlarrb.f | 407 ++ dspl/liblapack/SRC/dlarrc.f | 251 ++ dspl/liblapack/SRC/dlarrd.f | 869 ++++ dspl/liblapack/SRC/dlarre.f | 904 ++++ dspl/liblapack/SRC/dlarrf.f | 495 +++ dspl/liblapack/SRC/dlarrj.f | 379 ++ dspl/liblapack/SRC/dlarrk.f | 256 ++ dspl/liblapack/SRC/dlarrr.f | 211 + dspl/liblapack/SRC/dlarrv.f | 1045 +++++ dspl/liblapack/SRC/dlarscl2.f | 119 + dspl/liblapack/SRC/dlartg.f | 204 + dspl/liblapack/SRC/dlartgp.f | 202 + dspl/liblapack/SRC/dlartgs.f | 161 + dspl/liblapack/SRC/dlartv.f | 147 + dspl/liblapack/SRC/dlaruv.f | 446 ++ dspl/liblapack/SRC/dlarz.f | 236 ++ dspl/liblapack/SRC/dlarzb.f | 323 ++ dspl/liblapack/SRC/dlarzt.f | 264 ++ dspl/liblapack/SRC/dlas2.f | 183 + dspl/liblapack/SRC/dlascl.f | 368 ++ dspl/liblapack/SRC/dlascl2.f | 119 + dspl/liblapack/SRC/dlasd0.f | 316 ++ dspl/liblapack/SRC/dlasd1.f | 326 ++ dspl/liblapack/SRC/dlasd2.f | 634 +++ dspl/liblapack/SRC/dlasd3.f | 469 +++ dspl/liblapack/SRC/dlasd4.f | 1061 +++++ dspl/liblapack/SRC/dlasd5.f | 231 + dspl/liblapack/SRC/dlasd6.f | 443 ++ dspl/liblapack/SRC/dlasd7.f | 580 +++ dspl/liblapack/SRC/dlasd8.f | 342 ++ dspl/liblapack/SRC/dlasda.f | 514 +++ dspl/liblapack/SRC/dlasdq.f | 413 ++ dspl/liblapack/SRC/dlasdt.f | 172 + dspl/liblapack/SRC/dlaset.f | 184 + dspl/liblapack/SRC/dlasq1.f | 224 + dspl/liblapack/SRC/dlasq2.f | 582 +++ dspl/liblapack/SRC/dlasq3.f | 421 ++ dspl/liblapack/SRC/dlasq4.f | 424 ++ dspl/liblapack/SRC/dlasq5.f | 410 ++ dspl/liblapack/SRC/dlasq6.f | 254 ++ dspl/liblapack/SRC/dlasr.f | 436 ++ dspl/liblapack/SRC/dlasrt.f | 303 ++ dspl/liblapack/SRC/dlassq.f | 155 + dspl/liblapack/SRC/dlasv2.f | 325 ++ dspl/liblapack/SRC/dlaswlq.f | 258 ++ dspl/liblapack/SRC/dlaswp.f | 193 + dspl/liblapack/SRC/dlasy2.f | 482 +++ dspl/liblapack/SRC/dlasyf.f | 822 ++++ dspl/liblapack/SRC/dlasyf_aa.f | 493 +++ dspl/liblapack/SRC/dlasyf_rk.f | 965 +++++ dspl/liblapack/SRC/dlasyf_rook.f | 892 ++++ dspl/liblapack/SRC/dlat2s.f | 173 + dspl/liblapack/SRC/dlatbs.f | 812 ++++ dspl/liblapack/SRC/dlatdf.f | 323 ++ dspl/liblapack/SRC/dlatps.f | 795 ++++ dspl/liblapack/SRC/dlatrd.f | 336 ++ dspl/liblapack/SRC/dlatrs.f | 787 ++++ dspl/liblapack/SRC/dlatrz.f | 200 + dspl/liblapack/SRC/dlatsqr.f | 256 ++ dspl/liblapack/SRC/dlauu2.f | 198 + dspl/liblapack/SRC/dlauum.f | 218 + dspl/liblapack/SRC/dopgtr.f | 232 ++ dspl/liblapack/SRC/dopmtr.f | 339 ++ dspl/liblapack/SRC/dorbdb.f | 687 +++ dspl/liblapack/SRC/dorbdb1.f | 323 ++ dspl/liblapack/SRC/dorbdb2.f | 333 ++ dspl/liblapack/SRC/dorbdb3.f | 332 ++ dspl/liblapack/SRC/dorbdb4.f | 377 ++ dspl/liblapack/SRC/dorbdb5.f | 274 ++ dspl/liblapack/SRC/dorbdb6.f | 312 ++ dspl/liblapack/SRC/dorcsd.f | 616 +++ dspl/liblapack/SRC/dorcsd2by1.f | 740 ++++ dspl/liblapack/SRC/dorg2l.f | 198 + dspl/liblapack/SRC/dorg2r.f | 200 + dspl/liblapack/SRC/dorgbr.f | 337 ++ dspl/liblapack/SRC/dorghr.f | 240 ++ dspl/liblapack/SRC/dorgl2.f | 204 + dspl/liblapack/SRC/dorglq.f | 289 ++ dspl/liblapack/SRC/dorgql.f | 296 ++ dspl/liblapack/SRC/dorgqr.f | 290 ++ dspl/liblapack/SRC/dorgr2.f | 202 + dspl/liblapack/SRC/dorgrq.f | 296 ++ dspl/liblapack/SRC/dorgtr.f | 255 ++ dspl/liblapack/SRC/dorm22.f | 441 ++ dspl/liblapack/SRC/dorm2l.f | 278 ++ dspl/liblapack/SRC/dorm2r.f | 282 ++ dspl/liblapack/SRC/dormbr.f | 372 ++ dspl/liblapack/SRC/dormhr.f | 294 ++ dspl/liblapack/SRC/dorml2.f | 282 ++ dspl/liblapack/SRC/dormlq.f | 347 ++ dspl/liblapack/SRC/dormql.f | 339 ++ dspl/liblapack/SRC/dormqr.f | 340 ++ dspl/liblapack/SRC/dormr2.f | 278 ++ dspl/liblapack/SRC/dormr3.f | 299 ++ dspl/liblapack/SRC/dormrq.f | 346 ++ dspl/liblapack/SRC/dormrz.f | 380 ++ dspl/liblapack/SRC/dormtr.f | 310 ++ dspl/liblapack/SRC/dpbcon.f | 271 ++ dspl/liblapack/SRC/dpbequ.f | 242 ++ dspl/liblapack/SRC/dpbrfs.f | 443 ++ dspl/liblapack/SRC/dpbstf.f | 319 ++ dspl/liblapack/SRC/dpbsv.f | 229 + dspl/liblapack/SRC/dpbsvx.f | 545 +++ dspl/liblapack/SRC/dpbtf2.f | 263 ++ dspl/liblapack/SRC/dpbtrf.f | 435 ++ dspl/liblapack/SRC/dpbtrs.f | 220 + dspl/liblapack/SRC/dpftrf.f | 457 ++ dspl/liblapack/SRC/dpftri.f | 423 ++ dspl/liblapack/SRC/dpftrs.f | 280 ++ dspl/liblapack/SRC/dpocon.f | 253 ++ dspl/liblapack/SRC/dpoequ.f | 205 + dspl/liblapack/SRC/dpoequb.f | 221 + dspl/liblapack/SRC/dporfs.f | 430 ++ dspl/liblapack/SRC/dporfsx.f | 693 +++ dspl/liblapack/SRC/dposv.f | 193 + dspl/liblapack/SRC/dposvx.f | 494 +++ dspl/liblapack/SRC/dposvxx.f | 683 +++ dspl/liblapack/SRC/dpotf2.f | 230 + dspl/liblapack/SRC/dpotrf.f | 246 ++ dspl/liblapack/SRC/dpotrf2.f | 237 ++ dspl/liblapack/SRC/dpotri.f | 159 + dspl/liblapack/SRC/dpotrs.f | 204 + dspl/liblapack/SRC/dppcon.f | 248 ++ dspl/liblapack/SRC/dppequ.f | 238 ++ dspl/liblapack/SRC/dpprfs.f | 421 ++ dspl/liblapack/SRC/dppsv.f | 205 + dspl/liblapack/SRC/dppsvx.f | 493 +++ dspl/liblapack/SRC/dpptrf.f | 240 ++ dspl/liblapack/SRC/dpptri.f | 188 + dspl/liblapack/SRC/dpptrs.f | 203 + dspl/liblapack/SRC/dpstf2.f | 386 ++ dspl/liblapack/SRC/dpstrf.f | 445 ++ dspl/liblapack/SRC/dptcon.f | 221 + dspl/liblapack/SRC/dpteqr.f | 261 ++ dspl/liblapack/SRC/dptrfs.f | 395 ++ dspl/liblapack/SRC/dptsv.f | 167 + dspl/liblapack/SRC/dptsvx.f | 336 ++ dspl/liblapack/SRC/dpttrf.f | 211 + dspl/liblapack/SRC/dpttrs.f | 182 + dspl/liblapack/SRC/dptts2.f | 158 + dspl/liblapack/SRC/drscl.f | 174 + dspl/liblapack/SRC/dsb2st_kernels.f | 377 ++ dspl/liblapack/SRC/dsbev.f | 287 ++ dspl/liblapack/SRC/dsbev_2stage.f | 380 ++ dspl/liblapack/SRC/dsbevd.f | 360 ++ dspl/liblapack/SRC/dsbevd_2stage.f | 412 ++ dspl/liblapack/SRC/dsbevx.f | 543 +++ dspl/liblapack/SRC/dsbevx_2stage.f | 636 +++ dspl/liblapack/SRC/dsbgst.f | 1434 +++++++ dspl/liblapack/SRC/dsbgv.f | 280 ++ dspl/liblapack/SRC/dsbgvd.f | 372 ++ dspl/liblapack/SRC/dsbgvx.f | 522 +++ dspl/liblapack/SRC/dsbtrd.f | 641 +++ dspl/liblapack/SRC/dsfrk.f | 544 +++ dspl/liblapack/SRC/dsgesv.f | 433 ++ dspl/liblapack/SRC/dspcon.f | 238 ++ dspl/liblapack/SRC/dspev.f | 262 ++ dspl/liblapack/SRC/dspevd.f | 337 ++ dspl/liblapack/SRC/dspevx.f | 496 +++ dspl/liblapack/SRC/dspgst.f | 274 ++ dspl/liblapack/SRC/dspgv.f | 277 ++ dspl/liblapack/SRC/dspgvd.f | 364 ++ dspl/liblapack/SRC/dspgvx.f | 417 ++ dspl/liblapack/SRC/dsposv.f | 439 ++ dspl/liblapack/SRC/dsprfs.f | 431 ++ dspl/liblapack/SRC/dspsv.f | 224 + dspl/liblapack/SRC/dspsvx.f | 385 ++ dspl/liblapack/SRC/dsptrd.f | 300 ++ dspl/liblapack/SRC/dsptrf.f | 616 +++ dspl/liblapack/SRC/dsptri.f | 401 ++ dspl/liblapack/SRC/dsptrs.f | 450 ++ dspl/liblapack/SRC/dstebz.f | 771 ++++ dspl/liblapack/SRC/dstedc.f | 482 +++ dspl/liblapack/SRC/dstegr.f | 302 ++ dspl/liblapack/SRC/dstein.f | 453 ++ dspl/liblapack/SRC/dstemr.f | 777 ++++ dspl/liblapack/SRC/dsteqr.f | 572 +++ dspl/liblapack/SRC/dsterf.f | 426 ++ dspl/liblapack/SRC/dstev.f | 235 ++ dspl/liblapack/SRC/dstevd.f | 302 ++ dspl/liblapack/SRC/dstevr.f | 584 +++ dspl/liblapack/SRC/dstevx.f | 464 +++ dspl/liblapack/SRC/dsycon.f | 244 ++ dspl/liblapack/SRC/dsycon_3.f | 285 ++ dspl/liblapack/SRC/dsycon_rook.f | 258 ++ dspl/liblapack/SRC/dsyconv.f | 366 ++ dspl/liblapack/SRC/dsyconvf.f | 559 +++ dspl/liblapack/SRC/dsyconvf_rook.f | 544 +++ dspl/liblapack/SRC/dsyequb.f | 334 ++ dspl/liblapack/SRC/dsyev.f | 286 ++ dspl/liblapack/SRC/dsyev_2stage.f | 348 ++ dspl/liblapack/SRC/dsyevd.f | 357 ++ dspl/liblapack/SRC/dsyevd_2stage.f | 410 ++ dspl/liblapack/SRC/dsyevr.f | 681 +++ dspl/liblapack/SRC/dsyevr_2stage.f | 740 ++++ dspl/liblapack/SRC/dsyevx.f | 554 +++ dspl/liblapack/SRC/dsyevx_2stage.f | 612 +++ dspl/liblapack/SRC/dsygs2.f | 283 ++ dspl/liblapack/SRC/dsygst.f | 321 ++ dspl/liblapack/SRC/dsygv.f | 314 ++ dspl/liblapack/SRC/dsygv_2stage.f | 370 ++ dspl/liblapack/SRC/dsygvd.f | 380 ++ dspl/liblapack/SRC/dsygvx.f | 465 +++ dspl/liblapack/SRC/dsyrfs.f | 441 ++ dspl/liblapack/SRC/dsyrfsx.f | 700 ++++ dspl/liblapack/SRC/dsysv.f | 270 ++ dspl/liblapack/SRC/dsysv_aa.f | 254 ++ dspl/liblapack/SRC/dsysv_aa_2stage.f | 280 ++ dspl/liblapack/SRC/dsysv_rk.f | 317 ++ dspl/liblapack/SRC/dsysv_rook.f | 293 ++ dspl/liblapack/SRC/dsysvx.f | 416 ++ dspl/liblapack/SRC/dsysvxx.f | 696 ++++ dspl/liblapack/SRC/dsyswapr.f | 193 + dspl/liblapack/SRC/dsytd2.f | 323 ++ dspl/liblapack/SRC/dsytf2.f | 610 +++ dspl/liblapack/SRC/dsytf2_rk.f | 943 +++++ dspl/liblapack/SRC/dsytf2_rook.f | 813 ++++ dspl/liblapack/SRC/dsytrd.f | 376 ++ dspl/liblapack/SRC/dsytrd_2stage.f | 337 ++ dspl/liblapack/SRC/dsytrd_sb2st.F | 556 +++ dspl/liblapack/SRC/dsytrd_sy2sb.f | 517 +++ dspl/liblapack/SRC/dsytrf.f | 363 ++ dspl/liblapack/SRC/dsytrf_aa.f | 467 +++ dspl/liblapack/SRC/dsytrf_aa_2stage.f | 647 +++ dspl/liblapack/SRC/dsytrf_rk.f | 498 +++ dspl/liblapack/SRC/dsytrf_rook.f | 393 ++ dspl/liblapack/SRC/dsytri.f | 382 ++ dspl/liblapack/SRC/dsytri2.f | 205 + dspl/liblapack/SRC/dsytri2x.f | 591 +++ dspl/liblapack/SRC/dsytri_3.f | 248 ++ dspl/liblapack/SRC/dsytri_3x.f | 645 +++ dspl/liblapack/SRC/dsytri_rook.f | 450 ++ dspl/liblapack/SRC/dsytrs.f | 445 ++ dspl/liblapack/SRC/dsytrs2.f | 361 ++ dspl/liblapack/SRC/dsytrs_3.f | 371 ++ dspl/liblapack/SRC/dsytrs_aa.f | 285 ++ dspl/liblapack/SRC/dsytrs_aa_2stage.f | 281 ++ dspl/liblapack/SRC/dsytrs_rook.f | 484 +++ dspl/liblapack/SRC/dtbcon.f | 284 ++ dspl/liblapack/SRC/dtbrfs.f | 485 +++ dspl/liblapack/SRC/dtbtrs.f | 244 ++ dspl/liblapack/SRC/dtfsm.f | 1006 +++++ dspl/liblapack/SRC/dtftri.f | 472 +++ dspl/liblapack/SRC/dtfttp.f | 517 +++ dspl/liblapack/SRC/dtfttr.f | 495 +++ dspl/liblapack/SRC/dtgevc.f | 1211 ++++++ dspl/liblapack/SRC/dtgex2.f | 697 ++++ dspl/liblapack/SRC/dtgexc.f | 544 +++ dspl/liblapack/SRC/dtgsen.f | 865 ++++ dspl/liblapack/SRC/dtgsja.f | 655 +++ dspl/liblapack/SRC/dtgsna.f | 700 ++++ dspl/liblapack/SRC/dtgsy2.f | 1075 +++++ dspl/liblapack/SRC/dtgsyl.f | 682 +++ dspl/liblapack/SRC/dtpcon.f | 267 ++ dspl/liblapack/SRC/dtplqt.f | 270 ++ dspl/liblapack/SRC/dtplqt2.f | 312 ++ dspl/liblapack/SRC/dtpmlqt.f | 366 ++ dspl/liblapack/SRC/dtpmqrt.f | 368 ++ dspl/liblapack/SRC/dtpqrt.f | 270 ++ dspl/liblapack/SRC/dtpqrt2.f | 302 ++ dspl/liblapack/SRC/dtprfb.f | 811 ++++ dspl/liblapack/SRC/dtprfs.f | 473 +++ dspl/liblapack/SRC/dtptri.f | 241 ++ dspl/liblapack/SRC/dtptrs.f | 228 + dspl/liblapack/SRC/dtpttf.f | 502 +++ dspl/liblapack/SRC/dtpttr.f | 176 + dspl/liblapack/SRC/dtrcon.f | 276 ++ dspl/liblapack/SRC/dtrevc.f | 1077 +++++ dspl/liblapack/SRC/dtrevc3.f | 1304 ++++++ dspl/liblapack/SRC/dtrexc.f | 428 ++ dspl/liblapack/SRC/dtrrfs.f | 472 +++ dspl/liblapack/SRC/dtrsen.f | 570 +++ dspl/liblapack/SRC/dtrsna.f | 603 +++ dspl/liblapack/SRC/dtrsyl.f | 1002 +++++ dspl/liblapack/SRC/dtrti2.f | 212 + dspl/liblapack/SRC/dtrtri.f | 242 ++ dspl/liblapack/SRC/dtrtrs.f | 226 + dspl/liblapack/SRC/dtrttf.f | 492 +++ dspl/liblapack/SRC/dtrttp.f | 176 + dspl/liblapack/SRC/dtzrzf.f | 313 ++ dspl/liblapack/SRC/dzsum1.f | 140 + dspl/liblapack/SRC/icmax1.f | 141 + dspl/liblapack/SRC/ieeeck.f | 203 + dspl/liblapack/SRC/ilaclc.f | 118 + dspl/liblapack/SRC/ilaclr.f | 121 + dspl/liblapack/SRC/iladiag.f | 92 + dspl/liblapack/SRC/iladlc.f | 118 + dspl/liblapack/SRC/iladlr.f | 121 + dspl/liblapack/SRC/ilaenv.f | 709 ++++ dspl/liblapack/SRC/ilaenv2stage.f | 191 + dspl/liblapack/SRC/ilaprec.f | 98 + dspl/liblapack/SRC/ilaslc.f | 118 + dspl/liblapack/SRC/ilaslr.f | 121 + dspl/liblapack/SRC/ilatrans.f | 95 + dspl/liblapack/SRC/ilauplo.f | 92 + dspl/liblapack/SRC/ilazlc.f | 118 + dspl/liblapack/SRC/ilazlr.f | 121 + dspl/liblapack/SRC/iparam2stage.F | 388 ++ dspl/liblapack/SRC/iparmq.f | 395 ++ dspl/liblapack/SRC/izmax1.f | 141 + dspl/liblapack/SRC/lsamen.f | 122 + dspl/liblapack/SRC/sbbcsd.f | 1080 +++++ dspl/liblapack/SRC/sbdsdc.f | 524 +++ dspl/liblapack/SRC/sbdsqr.f | 866 ++++ dspl/liblapack/SRC/sbdsvdx.f | 792 ++++ dspl/liblapack/SRC/scsum1.f | 140 + dspl/liblapack/SRC/sdisna.f | 245 ++ dspl/liblapack/SRC/sgbbrd.f | 547 +++ dspl/liblapack/SRC/sgbcon.f | 311 ++ dspl/liblapack/SRC/sgbequ.f | 324 ++ dspl/liblapack/SRC/sgbequb.f | 340 ++ dspl/liblapack/SRC/sgbrfs.f | 464 +++ dspl/liblapack/SRC/sgbrfsx.f | 765 ++++ dspl/liblapack/SRC/sgbsv.f | 223 + dspl/liblapack/SRC/sgbsvx.f | 644 +++ dspl/liblapack/SRC/sgbsvxx.f | 802 ++++ dspl/liblapack/SRC/sgbtf2.f | 277 ++ dspl/liblapack/SRC/sgbtrf.f | 516 +++ dspl/liblapack/SRC/sgbtrs.f | 269 ++ dspl/liblapack/SRC/sgebak.f | 268 ++ dspl/liblapack/SRC/sgebal.f | 397 ++ dspl/liblapack/SRC/sgebd2.f | 320 ++ dspl/liblapack/SRC/sgebrd.f | 352 ++ dspl/liblapack/SRC/sgecon.f | 261 ++ dspl/liblapack/SRC/sgeequ.f | 304 ++ dspl/liblapack/SRC/sgeequb.f | 321 ++ dspl/liblapack/SRC/sgees.f | 535 +++ dspl/liblapack/SRC/sgeesx.f | 649 +++ dspl/liblapack/SRC/sgeev.f | 529 +++ dspl/liblapack/SRC/sgeevx.f | 694 +++ dspl/liblapack/SRC/sgehd2.f | 225 + dspl/liblapack/SRC/sgehrd.f | 356 ++ dspl/liblapack/SRC/sgejsv.f | 1783 ++++++++ dspl/liblapack/SRC/sgelq.f | 305 ++ dspl/liblapack/SRC/sgelq2.f | 192 + dspl/liblapack/SRC/sgelqf.f | 269 ++ dspl/liblapack/SRC/sgelqt.f | 193 + dspl/liblapack/SRC/sgelqt3.f | 242 ++ dspl/liblapack/SRC/sgels.f | 504 +++ dspl/liblapack/SRC/sgelsd.f | 633 +++ dspl/liblapack/SRC/sgelss.f | 743 ++++ dspl/liblapack/SRC/sgelsy.f | 479 +++ dspl/liblapack/SRC/sgemlq.f | 283 ++ dspl/liblapack/SRC/sgemlqt.f | 272 ++ dspl/liblapack/SRC/sgemqr.f | 285 ++ dspl/liblapack/SRC/sgemqrt.f | 291 ++ dspl/liblapack/SRC/sgeql2.f | 193 + dspl/liblapack/SRC/sgeqlf.f | 287 ++ dspl/liblapack/SRC/sgeqp3.f | 358 ++ dspl/liblapack/SRC/sgeqr.f | 307 ++ dspl/liblapack/SRC/sgeqr2.f | 192 + dspl/liblapack/SRC/sgeqr2p.f | 195 + dspl/liblapack/SRC/sgeqrf.f | 270 ++ dspl/liblapack/SRC/sgeqrfp.f | 273 ++ dspl/liblapack/SRC/sgeqrt.f | 218 + dspl/liblapack/SRC/sgeqrt2.f | 227 + dspl/liblapack/SRC/sgeqrt3.f | 257 ++ dspl/liblapack/SRC/sgerfs.f | 438 ++ dspl/liblapack/SRC/sgerfsx.f | 731 ++++ dspl/liblapack/SRC/sgerq2.f | 193 + dspl/liblapack/SRC/sgerqf.f | 290 ++ dspl/liblapack/SRC/sgesc2.f | 201 + dspl/liblapack/SRC/sgesdd.f | 1548 +++++++ dspl/liblapack/SRC/sgesv.f | 179 + dspl/liblapack/SRC/sgesvd.f | 3505 ++++++++++++++++ dspl/liblapack/SRC/sgesvdx.f | 834 ++++ dspl/liblapack/SRC/sgesvj.f | 1601 +++++++ dspl/liblapack/SRC/sgesvx.f | 602 +++ dspl/liblapack/SRC/sgesvxx.f | 772 ++++ dspl/liblapack/SRC/sgetc2.f | 234 ++ dspl/liblapack/SRC/sgetf2.f | 213 + dspl/liblapack/SRC/sgetrf.f | 225 + dspl/liblapack/SRC/sgetrf2.f | 272 ++ dspl/liblapack/SRC/sgetri.f | 261 ++ dspl/liblapack/SRC/sgetrs.f | 225 + dspl/liblapack/SRC/sgetsls.f | 494 +++ dspl/liblapack/SRC/sggbak.f | 306 ++ dspl/liblapack/SRC/sggbal.f | 559 +++ dspl/liblapack/SRC/sgges.f | 680 +++ dspl/liblapack/SRC/sgges3.f | 671 +++ dspl/liblapack/SRC/sggesx.f | 820 ++++ dspl/liblapack/SRC/sggev.f | 592 +++ dspl/liblapack/SRC/sggev3.f | 589 +++ dspl/liblapack/SRC/sggevx.f | 866 ++++ dspl/liblapack/SRC/sggglm.f | 348 ++ dspl/liblapack/SRC/sgghd3.f | 897 ++++ dspl/liblapack/SRC/sgghrd.f | 361 ++ dspl/liblapack/SRC/sgglse.f | 354 ++ dspl/liblapack/SRC/sggqrf.f | 299 ++ dspl/liblapack/SRC/sggrqf.f | 299 ++ dspl/liblapack/SRC/sggsvd3.f | 503 +++ dspl/liblapack/SRC/sggsvp3.f | 571 +++ dspl/liblapack/SRC/sgsvj0.f | 1079 +++++ dspl/liblapack/SRC/sgsvj1.f | 784 ++++ dspl/liblapack/SRC/sgtcon.f | 255 ++ dspl/liblapack/SRC/sgtrfs.f | 474 +++ dspl/liblapack/SRC/sgtsv.f | 333 ++ dspl/liblapack/SRC/sgtsvx.f | 414 ++ dspl/liblapack/SRC/sgttrf.f | 237 ++ dspl/liblapack/SRC/sgttrs.f | 223 + dspl/liblapack/SRC/sgtts2.f | 274 ++ dspl/liblapack/SRC/shgeqz.f | 1367 ++++++ dspl/liblapack/SRC/shsein.f | 530 +++ dspl/liblapack/SRC/shseqr.f | 516 +++ dspl/liblapack/SRC/sisnan.f | 80 + dspl/liblapack/SRC/sla_gbamv.f | 410 ++ dspl/liblapack/SRC/sla_gbrcond.f | 351 ++ dspl/liblapack/SRC/sla_gbrfsx_extended.f | 706 ++++ dspl/liblapack/SRC/sla_gbrpvgrw.f | 160 + dspl/liblapack/SRC/sla_geamv.f | 396 ++ dspl/liblapack/SRC/sla_gercond.f | 327 ++ dspl/liblapack/SRC/sla_gerfsx_extended.f | 689 +++ dspl/liblapack/SRC/sla_gerpvgrw.f | 139 + dspl/liblapack/SRC/sla_lin_berr.f | 153 + dspl/liblapack/SRC/sla_porcond.f | 326 ++ dspl/liblapack/SRC/sla_porfsx_extended.f | 678 +++ dspl/liblapack/SRC/sla_porpvgrw.f | 208 + dspl/liblapack/SRC/sla_syamv.f | 417 ++ dspl/liblapack/SRC/sla_syrcond.f | 339 ++ dspl/liblapack/SRC/sla_syrfsx_extended.f | 707 ++++ dspl/liblapack/SRC/sla_syrpvgrw.f | 320 ++ dspl/liblapack/SRC/sla_wwaddw.f | 111 + dspl/liblapack/SRC/slabad.f | 105 + dspl/liblapack/SRC/slabrd.f | 381 ++ dspl/liblapack/SRC/slacn2.f | 294 ++ dspl/liblapack/SRC/slacon.f | 275 ++ dspl/liblapack/SRC/slacpy.f | 156 + dspl/liblapack/SRC/sladiv.f | 256 ++ dspl/liblapack/SRC/slae2.f | 185 + dspl/liblapack/SRC/slaebz.f | 649 +++ dspl/liblapack/SRC/slaed0.f | 434 ++ dspl/liblapack/SRC/slaed1.f | 274 ++ dspl/liblapack/SRC/slaed2.f | 539 +++ dspl/liblapack/SRC/slaed3.f | 353 ++ dspl/liblapack/SRC/slaed4.f | 917 ++++ dspl/liblapack/SRC/slaed5.f | 189 + dspl/liblapack/SRC/slaed6.f | 410 ++ dspl/liblapack/SRC/slaed7.f | 407 ++ dspl/liblapack/SRC/slaed8.f | 524 +++ dspl/liblapack/SRC/slaed9.f | 294 ++ dspl/liblapack/SRC/slaeda.f | 308 ++ dspl/liblapack/SRC/slaein.f | 632 +++ dspl/liblapack/SRC/slaev2.f | 238 ++ dspl/liblapack/SRC/slaexc.f | 435 ++ dspl/liblapack/SRC/slag2.f | 379 ++ dspl/liblapack/SRC/slag2d.f | 137 + dspl/liblapack/SRC/slags2.f | 362 ++ dspl/liblapack/SRC/slagtf.f | 266 ++ dspl/liblapack/SRC/slagtm.f | 278 ++ dspl/liblapack/SRC/slagts.f | 383 ++ dspl/liblapack/SRC/slagv2.f | 374 ++ dspl/liblapack/SRC/slahqr.f | 613 +++ dspl/liblapack/SRC/slahr2.f | 326 ++ dspl/liblapack/SRC/slaic1.f | 367 ++ dspl/liblapack/SRC/slaisnan.f | 91 + dspl/liblapack/SRC/slaln2.f | 611 +++ dspl/liblapack/SRC/slals0.f | 499 +++ dspl/liblapack/SRC/slalsa.f | 493 +++ dspl/liblapack/SRC/slalsd.f | 523 +++ dspl/liblapack/SRC/slamrg.f | 171 + dspl/liblapack/SRC/slamswlq.f | 418 ++ dspl/liblapack/SRC/slamtsqr.f | 415 ++ dspl/liblapack/SRC/slaneg.f | 227 + dspl/liblapack/SRC/slangb.f | 225 + dspl/liblapack/SRC/slange.f | 211 + dspl/liblapack/SRC/slangt.f | 208 + dspl/liblapack/SRC/slanhs.f | 205 + dspl/liblapack/SRC/slansb.f | 258 ++ dspl/liblapack/SRC/slansf.f | 964 +++++ dspl/liblapack/SRC/slansp.f | 261 ++ dspl/liblapack/SRC/slanst.f | 186 + dspl/liblapack/SRC/slansy.f | 241 ++ dspl/liblapack/SRC/slantb.f | 361 ++ dspl/liblapack/SRC/slantp.f | 355 ++ dspl/liblapack/SRC/slantr.f | 353 ++ dspl/liblapack/SRC/slanv2.f | 289 ++ dspl/liblapack/SRC/slapll.f | 165 + dspl/liblapack/SRC/slapmr.f | 204 + dspl/liblapack/SRC/slapmt.f | 203 + dspl/liblapack/SRC/slapy2.f | 119 + dspl/liblapack/SRC/slapy3.f | 111 + dspl/liblapack/SRC/slaqgb.f | 256 ++ dspl/liblapack/SRC/slaqge.f | 236 ++ dspl/liblapack/SRC/slaqp2.f | 262 ++ dspl/liblapack/SRC/slaqps.f | 359 ++ dspl/liblapack/SRC/slaqr0.f | 739 ++++ dspl/liblapack/SRC/slaqr1.f | 179 + dspl/liblapack/SRC/slaqr2.f | 684 +++ dspl/liblapack/SRC/slaqr3.f | 695 ++++ dspl/liblapack/SRC/slaqr4.f | 742 ++++ dspl/liblapack/SRC/slaqr5.f | 919 ++++ dspl/liblapack/SRC/slaqsb.f | 226 + dspl/liblapack/SRC/slaqsp.f | 212 + dspl/liblapack/SRC/slaqsy.f | 216 + dspl/liblapack/SRC/slaqtr.f | 748 ++++ dspl/liblapack/SRC/slar1v.f | 486 +++ dspl/liblapack/SRC/slar2v.f | 157 + dspl/liblapack/SRC/slarf.f | 227 + dspl/liblapack/SRC/slarfb.f | 710 ++++ dspl/liblapack/SRC/slarfg.f | 196 + dspl/liblapack/SRC/slarfgp.f | 242 ++ dspl/liblapack/SRC/slarft.f | 326 ++ dspl/liblapack/SRC/slarfx.f | 696 ++++ dspl/liblapack/SRC/slarfy.f | 161 + dspl/liblapack/SRC/slargv.f | 167 + dspl/liblapack/SRC/slarnv.f | 178 + dspl/liblapack/SRC/slarra.f | 210 + dspl/liblapack/SRC/slarrb.f | 407 ++ dspl/liblapack/SRC/slarrc.f | 251 ++ dspl/liblapack/SRC/slarrd.f | 869 ++++ dspl/liblapack/SRC/slarre.f | 908 ++++ dspl/liblapack/SRC/slarrf.f | 495 +++ dspl/liblapack/SRC/slarrj.f | 379 ++ dspl/liblapack/SRC/slarrk.f | 256 ++ dspl/liblapack/SRC/slarrr.f | 211 + dspl/liblapack/SRC/slarrv.f | 1045 +++++ dspl/liblapack/SRC/slarscl2.f | 119 + dspl/liblapack/SRC/slartg.f | 204 + dspl/liblapack/SRC/slartgp.f | 202 + dspl/liblapack/SRC/slartgs.f | 161 + dspl/liblapack/SRC/slartv.f | 147 + dspl/liblapack/SRC/slaruv.f | 447 ++ dspl/liblapack/SRC/slarz.f | 236 ++ dspl/liblapack/SRC/slarzb.f | 323 ++ dspl/liblapack/SRC/slarzt.f | 264 ++ dspl/liblapack/SRC/slas2.f | 183 + dspl/liblapack/SRC/slascl.f | 368 ++ dspl/liblapack/SRC/slascl2.f | 119 + dspl/liblapack/SRC/slasd0.f | 316 ++ dspl/liblapack/SRC/slasd1.f | 326 ++ dspl/liblapack/SRC/slasd2.f | 634 +++ dspl/liblapack/SRC/slasd3.f | 469 +++ dspl/liblapack/SRC/slasd4.f | 1061 +++++ dspl/liblapack/SRC/slasd5.f | 231 + dspl/liblapack/SRC/slasd6.f | 443 ++ dspl/liblapack/SRC/slasd7.f | 580 +++ dspl/liblapack/SRC/slasd8.f | 342 ++ dspl/liblapack/SRC/slasda.f | 514 +++ dspl/liblapack/SRC/slasdq.f | 413 ++ dspl/liblapack/SRC/slasdt.f | 172 + dspl/liblapack/SRC/slaset.f | 184 + dspl/liblapack/SRC/slasq1.f | 224 + dspl/liblapack/SRC/slasq2.f | 588 +++ dspl/liblapack/SRC/slasq3.f | 421 ++ dspl/liblapack/SRC/slasq4.f | 424 ++ dspl/liblapack/SRC/slasq5.f | 411 ++ dspl/liblapack/SRC/slasq6.f | 254 ++ dspl/liblapack/SRC/slasr.f | 436 ++ dspl/liblapack/SRC/slasrt.f | 303 ++ dspl/liblapack/SRC/slassq.f | 155 + dspl/liblapack/SRC/slasv2.f | 325 ++ dspl/liblapack/SRC/slaswlq.f | 258 ++ dspl/liblapack/SRC/slaswp.f | 193 + dspl/liblapack/SRC/slasy2.f | 482 +++ dspl/liblapack/SRC/slasyf.f | 822 ++++ dspl/liblapack/SRC/slasyf_aa.f | 493 +++ dspl/liblapack/SRC/slasyf_rk.f | 965 +++++ dspl/liblapack/SRC/slasyf_rook.f | 892 ++++ dspl/liblapack/SRC/slatbs.f | 812 ++++ dspl/liblapack/SRC/slatdf.f | 323 ++ dspl/liblapack/SRC/slatps.f | 795 ++++ dspl/liblapack/SRC/slatrd.f | 336 ++ dspl/liblapack/SRC/slatrs.f | 787 ++++ dspl/liblapack/SRC/slatrz.f | 200 + dspl/liblapack/SRC/slatsqr.f | 255 ++ dspl/liblapack/SRC/slauu2.f | 198 + dspl/liblapack/SRC/slauum.f | 218 + dspl/liblapack/SRC/sopgtr.f | 232 ++ dspl/liblapack/SRC/sopmtr.f | 339 ++ dspl/liblapack/SRC/sorbdb.f | 689 +++ dspl/liblapack/SRC/sorbdb1.f | 323 ++ dspl/liblapack/SRC/sorbdb2.f | 332 ++ dspl/liblapack/SRC/sorbdb3.f | 333 ++ dspl/liblapack/SRC/sorbdb4.f | 378 ++ dspl/liblapack/SRC/sorbdb5.f | 274 ++ dspl/liblapack/SRC/sorbdb6.f | 312 ++ dspl/liblapack/SRC/sorcsd.f | 615 +++ dspl/liblapack/SRC/sorcsd2by1.f | 740 ++++ dspl/liblapack/SRC/sorg2l.f | 198 + dspl/liblapack/SRC/sorg2r.f | 200 + dspl/liblapack/SRC/sorgbr.f | 337 ++ dspl/liblapack/SRC/sorghr.f | 240 ++ dspl/liblapack/SRC/sorgl2.f | 204 + dspl/liblapack/SRC/sorglq.f | 289 ++ dspl/liblapack/SRC/sorgql.f | 296 ++ dspl/liblapack/SRC/sorgqr.f | 290 ++ dspl/liblapack/SRC/sorgr2.f | 202 + dspl/liblapack/SRC/sorgrq.f | 296 ++ dspl/liblapack/SRC/sorgtr.f | 255 ++ dspl/liblapack/SRC/sorm22.f | 441 ++ dspl/liblapack/SRC/sorm2l.f | 278 ++ dspl/liblapack/SRC/sorm2r.f | 282 ++ dspl/liblapack/SRC/sormbr.f | 374 ++ dspl/liblapack/SRC/sormhr.f | 296 ++ dspl/liblapack/SRC/sorml2.f | 282 ++ dspl/liblapack/SRC/sormlq.f | 349 ++ dspl/liblapack/SRC/sormql.f | 341 ++ dspl/liblapack/SRC/sormqr.f | 342 ++ dspl/liblapack/SRC/sormr2.f | 278 ++ dspl/liblapack/SRC/sormr3.f | 299 ++ dspl/liblapack/SRC/sormrq.f | 348 ++ dspl/liblapack/SRC/sormrz.f | 379 ++ dspl/liblapack/SRC/sormtr.f | 312 ++ dspl/liblapack/SRC/spbcon.f | 271 ++ dspl/liblapack/SRC/spbequ.f | 242 ++ dspl/liblapack/SRC/spbrfs.f | 443 ++ dspl/liblapack/SRC/spbstf.f | 319 ++ dspl/liblapack/SRC/spbsv.f | 229 + dspl/liblapack/SRC/spbsvx.f | 545 +++ dspl/liblapack/SRC/spbtf2.f | 263 ++ dspl/liblapack/SRC/spbtrf.f | 435 ++ dspl/liblapack/SRC/spbtrs.f | 220 + dspl/liblapack/SRC/spftrf.f | 457 ++ dspl/liblapack/SRC/spftri.f | 423 ++ dspl/liblapack/SRC/spftrs.f | 280 ++ dspl/liblapack/SRC/spocon.f | 253 ++ dspl/liblapack/SRC/spoequ.f | 205 + dspl/liblapack/SRC/spoequb.f | 221 + dspl/liblapack/SRC/sporfs.f | 430 ++ dspl/liblapack/SRC/sporfsx.f | 693 +++ dspl/liblapack/SRC/sposv.f | 193 + dspl/liblapack/SRC/sposvx.f | 494 +++ dspl/liblapack/SRC/sposvxx.f | 686 +++ dspl/liblapack/SRC/spotf2.f | 230 + dspl/liblapack/SRC/spotrf.f | 246 ++ dspl/liblapack/SRC/spotrf2.f | 237 ++ dspl/liblapack/SRC/spotri.f | 159 + dspl/liblapack/SRC/spotrs.f | 204 + dspl/liblapack/SRC/sppcon.f | 248 ++ dspl/liblapack/SRC/sppequ.f | 238 ++ dspl/liblapack/SRC/spprfs.f | 421 ++ dspl/liblapack/SRC/sppsv.f | 205 + dspl/liblapack/SRC/sppsvx.f | 493 +++ dspl/liblapack/SRC/spptrf.f | 240 ++ dspl/liblapack/SRC/spptri.f | 188 + dspl/liblapack/SRC/spptrs.f | 203 + dspl/liblapack/SRC/spstf2.f | 386 ++ dspl/liblapack/SRC/spstrf.f | 444 ++ dspl/liblapack/SRC/sptcon.f | 221 + dspl/liblapack/SRC/spteqr.f | 261 ++ dspl/liblapack/SRC/sptrfs.f | 395 ++ dspl/liblapack/SRC/sptsv.f | 167 + dspl/liblapack/SRC/sptsvx.f | 336 ++ dspl/liblapack/SRC/spttrf.f | 211 + dspl/liblapack/SRC/spttrs.f | 182 + dspl/liblapack/SRC/sptts2.f | 158 + dspl/liblapack/SRC/srscl.f | 174 + dspl/liblapack/SRC/ssb2st_kernels.f | 380 ++ dspl/liblapack/SRC/ssbev.f | 287 ++ dspl/liblapack/SRC/ssbev_2stage.f | 380 ++ dspl/liblapack/SRC/ssbevd.f | 360 ++ dspl/liblapack/SRC/ssbevd_2stage.f | 412 ++ dspl/liblapack/SRC/ssbevx.f | 543 +++ dspl/liblapack/SRC/ssbevx_2stage.f | 636 +++ dspl/liblapack/SRC/ssbgst.f | 1434 +++++++ dspl/liblapack/SRC/ssbgv.f | 280 ++ dspl/liblapack/SRC/ssbgvd.f | 372 ++ dspl/liblapack/SRC/ssbgvx.f | 522 +++ dspl/liblapack/SRC/ssbtrd.f | 641 +++ dspl/liblapack/SRC/ssfrk.f | 543 +++ dspl/liblapack/SRC/sspcon.f | 238 ++ dspl/liblapack/SRC/sspev.f | 262 ++ dspl/liblapack/SRC/sspevd.f | 337 ++ dspl/liblapack/SRC/sspevx.f | 496 +++ dspl/liblapack/SRC/sspgst.f | 274 ++ dspl/liblapack/SRC/sspgv.f | 277 ++ dspl/liblapack/SRC/sspgvd.f | 364 ++ dspl/liblapack/SRC/sspgvx.f | 417 ++ dspl/liblapack/SRC/ssprfs.f | 431 ++ dspl/liblapack/SRC/sspsv.f | 224 + dspl/liblapack/SRC/sspsvx.f | 385 ++ dspl/liblapack/SRC/ssptrd.f | 299 ++ dspl/liblapack/SRC/ssptrf.f | 614 +++ dspl/liblapack/SRC/ssptri.f | 401 ++ dspl/liblapack/SRC/ssptrs.f | 450 ++ dspl/liblapack/SRC/sstebz.f | 770 ++++ dspl/liblapack/SRC/sstedc.f | 482 +++ dspl/liblapack/SRC/sstegr.f | 302 ++ dspl/liblapack/SRC/sstein.f | 453 ++ dspl/liblapack/SRC/sstemr.f | 772 ++++ dspl/liblapack/SRC/ssteqr.f | 572 +++ dspl/liblapack/SRC/ssterf.f | 425 ++ dspl/liblapack/SRC/sstev.f | 235 ++ dspl/liblapack/SRC/sstevd.f | 302 ++ dspl/liblapack/SRC/sstevr.f | 582 +++ dspl/liblapack/SRC/sstevx.f | 464 +++ dspl/liblapack/SRC/ssycon.f | 244 ++ dspl/liblapack/SRC/ssycon_3.f | 285 ++ dspl/liblapack/SRC/ssycon_rook.f | 258 ++ dspl/liblapack/SRC/ssyconv.f | 366 ++ dspl/liblapack/SRC/ssyconvf.f | 559 +++ dspl/liblapack/SRC/ssyconvf_rook.f | 544 +++ dspl/liblapack/SRC/ssyequb.f | 334 ++ dspl/liblapack/SRC/ssyev.f | 286 ++ dspl/liblapack/SRC/ssyev_2stage.f | 348 ++ dspl/liblapack/SRC/ssyevd.f | 355 ++ dspl/liblapack/SRC/ssyevd_2stage.f | 410 ++ dspl/liblapack/SRC/ssyevr.f | 690 +++ dspl/liblapack/SRC/ssyevr_2stage.f | 745 ++++ dspl/liblapack/SRC/ssyevx.f | 554 +++ dspl/liblapack/SRC/ssyevx_2stage.f | 612 +++ dspl/liblapack/SRC/ssygs2.f | 283 ++ dspl/liblapack/SRC/ssygst.f | 321 ++ dspl/liblapack/SRC/ssygv.f | 314 ++ dspl/liblapack/SRC/ssygv_2stage.f | 370 ++ dspl/liblapack/SRC/ssygvd.f | 380 ++ dspl/liblapack/SRC/ssygvx.f | 465 +++ dspl/liblapack/SRC/ssyrfs.f | 441 ++ dspl/liblapack/SRC/ssyrfsx.f | 700 ++++ dspl/liblapack/SRC/ssysv.f | 270 ++ dspl/liblapack/SRC/ssysv_aa.f | 253 ++ dspl/liblapack/SRC/ssysv_aa_2stage.f | 279 ++ dspl/liblapack/SRC/ssysv_rk.f | 317 ++ dspl/liblapack/SRC/ssysv_rook.f | 293 ++ dspl/liblapack/SRC/ssysvx.f | 416 ++ dspl/liblapack/SRC/ssysvxx.f | 699 ++++ dspl/liblapack/SRC/ssyswapr.f | 193 + dspl/liblapack/SRC/ssytd2.f | 322 ++ dspl/liblapack/SRC/ssytf2.f | 611 +++ dspl/liblapack/SRC/ssytf2_rk.f | 943 +++++ dspl/liblapack/SRC/ssytf2_rook.f | 813 ++++ dspl/liblapack/SRC/ssytrd.f | 376 ++ dspl/liblapack/SRC/ssytrd_2stage.f | 337 ++ dspl/liblapack/SRC/ssytrd_sb2st.F | 556 +++ dspl/liblapack/SRC/ssytrd_sy2sb.f | 517 +++ dspl/liblapack/SRC/ssytrf.f | 363 ++ dspl/liblapack/SRC/ssytrf_aa.f | 467 +++ dspl/liblapack/SRC/ssytrf_aa_2stage.f | 647 +++ dspl/liblapack/SRC/ssytrf_rk.f | 498 +++ dspl/liblapack/SRC/ssytrf_rook.f | 393 ++ dspl/liblapack/SRC/ssytri.f | 382 ++ dspl/liblapack/SRC/ssytri2.f | 205 + dspl/liblapack/SRC/ssytri2x.f | 590 +++ dspl/liblapack/SRC/ssytri_3.f | 248 ++ dspl/liblapack/SRC/ssytri_3x.f | 645 +++ dspl/liblapack/SRC/ssytri_rook.f | 450 ++ dspl/liblapack/SRC/ssytrs.f | 445 ++ dspl/liblapack/SRC/ssytrs2.f | 361 ++ dspl/liblapack/SRC/ssytrs_3.f | 371 ++ dspl/liblapack/SRC/ssytrs_aa.f | 294 ++ dspl/liblapack/SRC/ssytrs_aa_2stage.f | 281 ++ dspl/liblapack/SRC/ssytrs_rook.f | 484 +++ dspl/liblapack/SRC/stbcon.f | 284 ++ dspl/liblapack/SRC/stbrfs.f | 485 +++ dspl/liblapack/SRC/stbtrs.f | 244 ++ dspl/liblapack/SRC/stfsm.f | 1005 +++++ dspl/liblapack/SRC/stftri.f | 472 +++ dspl/liblapack/SRC/stfttp.f | 517 +++ dspl/liblapack/SRC/stfttr.f | 495 +++ dspl/liblapack/SRC/stgevc.f | 1211 ++++++ dspl/liblapack/SRC/stgex2.f | 697 ++++ dspl/liblapack/SRC/stgexc.f | 544 +++ dspl/liblapack/SRC/stgsen.f | 865 ++++ dspl/liblapack/SRC/stgsja.f | 655 +++ dspl/liblapack/SRC/stgsna.f | 700 ++++ dspl/liblapack/SRC/stgsy2.f | 1075 +++++ dspl/liblapack/SRC/stgsyl.f | 682 +++ dspl/liblapack/SRC/stpcon.f | 267 ++ dspl/liblapack/SRC/stplqt.f | 270 ++ dspl/liblapack/SRC/stplqt2.f | 312 ++ dspl/liblapack/SRC/stpmlqt.f | 366 ++ dspl/liblapack/SRC/stpmqrt.f | 368 ++ dspl/liblapack/SRC/stpqrt.f | 270 ++ dspl/liblapack/SRC/stpqrt2.f | 302 ++ dspl/liblapack/SRC/stprfb.f | 811 ++++ dspl/liblapack/SRC/stprfs.f | 473 +++ dspl/liblapack/SRC/stptri.f | 241 ++ dspl/liblapack/SRC/stptrs.f | 228 + dspl/liblapack/SRC/stpttf.f | 502 +++ dspl/liblapack/SRC/stpttr.f | 176 + dspl/liblapack/SRC/strcon.f | 276 ++ dspl/liblapack/SRC/strevc.f | 1077 +++++ dspl/liblapack/SRC/strevc3.f | 1304 ++++++ dspl/liblapack/SRC/strexc.f | 428 ++ dspl/liblapack/SRC/strrfs.f | 472 +++ dspl/liblapack/SRC/strsen.f | 571 +++ dspl/liblapack/SRC/strsna.f | 603 +++ dspl/liblapack/SRC/strsyl.f | 1002 +++++ dspl/liblapack/SRC/strti2.f | 212 + dspl/liblapack/SRC/strtri.f | 242 ++ dspl/liblapack/SRC/strtrs.f | 226 + dspl/liblapack/SRC/strttf.f | 492 +++ dspl/liblapack/SRC/strttp.f | 175 + dspl/liblapack/SRC/stzrzf.f | 313 ++ dspl/liblapack/SRC/xerbla.f | 99 + dspl/liblapack/SRC/xerbla_array.f | 129 + dspl/liblapack/SRC/zbbcsd.f | 1085 +++++ dspl/liblapack/SRC/zbdsqr.f | 842 ++++ dspl/liblapack/SRC/zcgesv.f | 448 ++ dspl/liblapack/SRC/zcposv.f | 457 ++ dspl/liblapack/SRC/zdrscl.f | 174 + dspl/liblapack/SRC/zgbbrd.f | 573 +++ dspl/liblapack/SRC/zgbcon.f | 320 ++ dspl/liblapack/SRC/zgbequ.f | 333 ++ dspl/liblapack/SRC/zgbequb.f | 350 ++ dspl/liblapack/SRC/zgbrfs.f | 475 +++ dspl/liblapack/SRC/zgbrfsx.f | 763 ++++ dspl/liblapack/SRC/zgbsv.f | 223 + dspl/liblapack/SRC/zgbsvx.f | 647 +++ dspl/liblapack/SRC/zgbsvxx.f | 800 ++++ dspl/liblapack/SRC/zgbtf2.f | 277 ++ dspl/liblapack/SRC/zgbtrf.f | 517 +++ dspl/liblapack/SRC/zgbtrs.f | 297 ++ dspl/liblapack/SRC/zgebak.f | 270 ++ dspl/liblapack/SRC/zgebal.f | 400 ++ dspl/liblapack/SRC/zgebd2.f | 331 ++ dspl/liblapack/SRC/zgebrd.f | 352 ++ dspl/liblapack/SRC/zgecon.f | 269 ++ dspl/liblapack/SRC/zgeequ.f | 313 ++ dspl/liblapack/SRC/zgeequb.f | 330 ++ dspl/liblapack/SRC/zgees.f | 424 ++ dspl/liblapack/SRC/zgeesx.f | 499 +++ dspl/liblapack/SRC/zgeev.f | 503 +++ dspl/liblapack/SRC/zgeevx.f | 667 +++ dspl/liblapack/SRC/zgehd2.f | 224 + dspl/liblapack/SRC/zgehrd.f | 356 ++ dspl/liblapack/SRC/zgejsv.f | 2237 ++++++++++ dspl/liblapack/SRC/zgelq.f | 306 ++ dspl/liblapack/SRC/zgelq2.f | 194 + dspl/liblapack/SRC/zgelqf.f | 269 ++ dspl/liblapack/SRC/zgelqt.f | 210 + dspl/liblapack/SRC/zgelqt3.f | 261 ++ dspl/liblapack/SRC/zgels.f | 505 +++ dspl/liblapack/SRC/zgelsd.f | 665 +++ dspl/liblapack/SRC/zgelss.f | 771 ++++ dspl/liblapack/SRC/zgelsy.f | 477 +++ dspl/liblapack/SRC/zgemlq.f | 282 ++ dspl/liblapack/SRC/zgemlqt.f | 289 ++ dspl/liblapack/SRC/zgemqr.f | 285 ++ dspl/liblapack/SRC/zgemqrt.f | 291 ++ dspl/liblapack/SRC/zgeql2.f | 192 + dspl/liblapack/SRC/zgeqlf.f | 287 ++ dspl/liblapack/SRC/zgeqp3.f | 372 ++ dspl/liblapack/SRC/zgeqr.f | 307 ++ dspl/liblapack/SRC/zgeqr2.f | 192 + dspl/liblapack/SRC/zgeqr2p.f | 195 + dspl/liblapack/SRC/zgeqrf.f | 270 ++ dspl/liblapack/SRC/zgeqrfp.f | 273 ++ dspl/liblapack/SRC/zgeqrt.f | 218 + dspl/liblapack/SRC/zgeqrt2.f | 227 + dspl/liblapack/SRC/zgeqrt3.f | 257 ++ dspl/liblapack/SRC/zgerfs.f | 448 ++ dspl/liblapack/SRC/zgerfsx.f | 734 ++++ dspl/liblapack/SRC/zgerq2.f | 194 + dspl/liblapack/SRC/zgerqf.f | 287 ++ dspl/liblapack/SRC/zgesc2.f | 202 + dspl/liblapack/SRC/zgesdd.f | 2220 ++++++++++ dspl/liblapack/SRC/zgesv.f | 179 + dspl/liblapack/SRC/zgesvd.f | 3705 ++++++++++++++++ dspl/liblapack/SRC/zgesvdx.f | 857 ++++ dspl/liblapack/SRC/zgesvj.f | 1443 +++++++ dspl/liblapack/SRC/zgesvx.f | 605 +++ dspl/liblapack/SRC/zgesvxx.f | 769 ++++ dspl/liblapack/SRC/zgetc2.f | 234 ++ dspl/liblapack/SRC/zgetf2.f | 214 + dspl/liblapack/SRC/zgetrf.f | 225 + dspl/liblapack/SRC/zgetrf2.f | 274 ++ dspl/liblapack/SRC/zgetri.f | 262 ++ dspl/liblapack/SRC/zgetrs.f | 225 + dspl/liblapack/SRC/zgetsls.f | 497 +++ dspl/liblapack/SRC/zggbak.f | 307 ++ dspl/liblapack/SRC/zggbal.f | 572 +++ dspl/liblapack/SRC/zgges.f | 599 +++ dspl/liblapack/SRC/zgges3.f | 595 +++ dspl/liblapack/SRC/zggesx.f | 718 ++++ dspl/liblapack/SRC/zggev.f | 558 +++ dspl/liblapack/SRC/zggev3.f | 559 +++ dspl/liblapack/SRC/zggevx.f | 804 ++++ dspl/liblapack/SRC/zggglm.f | 349 ++ dspl/liblapack/SRC/zgghd3.f | 895 ++++ dspl/liblapack/SRC/zgghrd.f | 361 ++ dspl/liblapack/SRC/zgglse.f | 355 ++ dspl/liblapack/SRC/zggqrf.f | 299 ++ dspl/liblapack/SRC/zggrqf.f | 299 ++ dspl/liblapack/SRC/zggsvd3.f | 505 +++ dspl/liblapack/SRC/zggsvp3.f | 579 +++ dspl/liblapack/SRC/zgsvj0.f | 935 +++++ dspl/liblapack/SRC/zgsvj1.f | 706 ++++ dspl/liblapack/SRC/zgtcon.f | 253 ++ dspl/liblapack/SRC/zgtrfs.f | 487 +++ dspl/liblapack/SRC/zgtsv.f | 244 ++ dspl/liblapack/SRC/zgtsvx.f | 416 ++ dspl/liblapack/SRC/zgttrf.f | 243 ++ dspl/liblapack/SRC/zgttrs.f | 225 + dspl/liblapack/SRC/zgtts2.f | 349 ++ dspl/liblapack/SRC/zhb2st_kernels.f | 377 ++ dspl/liblapack/SRC/zhbev.f | 294 ++ dspl/liblapack/SRC/zhbev_2stage.f | 389 ++ dspl/liblapack/SRC/zhbevd.f | 401 ++ dspl/liblapack/SRC/zhbevd_2stage.f | 458 ++ dspl/liblapack/SRC/zhbevx.f | 553 +++ dspl/liblapack/SRC/zhbevx_2stage.f | 649 +++ dspl/liblapack/SRC/zhbgst.f | 1470 +++++++ dspl/liblapack/SRC/zhbgv.f | 287 ++ dspl/liblapack/SRC/zhbgvd.f | 407 ++ dspl/liblapack/SRC/zhbgvx.f | 535 +++ dspl/liblapack/SRC/zhbtrd.f | 677 +++ dspl/liblapack/SRC/zhecon.f | 239 ++ dspl/liblapack/SRC/zhecon_3.f | 285 ++ dspl/liblapack/SRC/zhecon_rook.f | 253 ++ dspl/liblapack/SRC/zheequb.f | 343 ++ dspl/liblapack/SRC/zheev.f | 298 ++ dspl/liblapack/SRC/zheev_2stage.f | 355 ++ dspl/liblapack/SRC/zheevd.f | 398 ++ dspl/liblapack/SRC/zheevd_2stage.f | 455 ++ dspl/liblapack/SRC/zheevr.f | 724 ++++ dspl/liblapack/SRC/zheevr_2stage.f | 779 ++++ dspl/liblapack/SRC/zheevx.f | 564 +++ dspl/liblapack/SRC/zheevx_2stage.f | 622 +++ dspl/liblapack/SRC/zhegs2.f | 296 ++ dspl/liblapack/SRC/zhegst.f | 331 ++ dspl/liblapack/SRC/zhegv.f | 321 ++ dspl/liblapack/SRC/zhegv_2stage.f | 379 ++ dspl/liblapack/SRC/zhegvd.f | 412 ++ dspl/liblapack/SRC/zhegvx.f | 476 +++ dspl/liblapack/SRC/zherfs.f | 446 ++ dspl/liblapack/SRC/zherfsx.f | 700 ++++ dspl/liblapack/SRC/zhesv.f | 271 ++ dspl/liblapack/SRC/zhesv_aa.f | 252 ++ dspl/liblapack/SRC/zhesv_aa_2stage.f | 284 ++ dspl/liblapack/SRC/zhesv_rk.f | 317 ++ dspl/liblapack/SRC/zhesv_rook.f | 295 ++ dspl/liblapack/SRC/zhesvx.f | 417 ++ dspl/liblapack/SRC/zhesvxx.f | 697 ++++ dspl/liblapack/SRC/zheswapr.f | 203 + dspl/liblapack/SRC/zhetd2.f | 334 ++ dspl/liblapack/SRC/zhetf2.f | 661 +++ dspl/liblapack/SRC/zhetf2_rk.f | 1039 +++++ dspl/liblapack/SRC/zhetf2_rook.f | 910 ++++ dspl/liblapack/SRC/zhetrd.f | 378 ++ dspl/liblapack/SRC/zhetrd_2stage.f | 337 ++ dspl/liblapack/SRC/zhetrd_hb2st.F | 587 +++ dspl/liblapack/SRC/zhetrd_he2hb.f | 517 +++ dspl/liblapack/SRC/zhetrf.f | 357 ++ dspl/liblapack/SRC/zhetrf_aa.f | 469 +++ dspl/liblapack/SRC/zhetrf_aa_2stage.f | 663 +++ dspl/liblapack/SRC/zhetrf_rk.f | 498 +++ dspl/liblapack/SRC/zhetrf_rook.f | 397 ++ dspl/liblapack/SRC/zhetri.f | 397 ++ dspl/liblapack/SRC/zhetri2.f | 205 + dspl/liblapack/SRC/zhetri2x.f | 590 +++ dspl/liblapack/SRC/zhetri_3.f | 248 ++ dspl/liblapack/SRC/zhetri_3x.f | 649 +++ dspl/liblapack/SRC/zhetri_rook.f | 516 +++ dspl/liblapack/SRC/zhetrs.f | 469 +++ dspl/liblapack/SRC/zhetrs2.f | 359 ++ dspl/liblapack/SRC/zhetrs_3.f | 374 ++ dspl/liblapack/SRC/zhetrs_aa.f | 288 ++ dspl/liblapack/SRC/zhetrs_aa_2stage.f | 283 ++ dspl/liblapack/SRC/zhetrs_rook.f | 503 +++ dspl/liblapack/SRC/zhfrk.f | 552 +++ dspl/liblapack/SRC/zhgeqz.f | 876 ++++ dspl/liblapack/SRC/zhpcon.f | 231 + dspl/liblapack/SRC/zhpev.f | 276 ++ dspl/liblapack/SRC/zhpevd.f | 378 ++ dspl/liblapack/SRC/zhpevx.f | 507 +++ dspl/liblapack/SRC/zhpgst.f | 281 ++ dspl/liblapack/SRC/zhpgv.f | 282 ++ dspl/liblapack/SRC/zhpgvd.f | 393 ++ dspl/liblapack/SRC/zhpgvx.f | 421 ++ dspl/liblapack/SRC/zhprfs.f | 438 ++ dspl/liblapack/SRC/zhpsv.f | 224 + dspl/liblapack/SRC/zhpsvx.f | 387 ++ dspl/liblapack/SRC/zhptrd.f | 310 ++ dspl/liblapack/SRC/zhptrf.f | 650 +++ dspl/liblapack/SRC/zhptri.f | 410 ++ dspl/liblapack/SRC/zhptrs.f | 474 +++ dspl/liblapack/SRC/zhsein.f | 468 +++ dspl/liblapack/SRC/zhseqr.f | 498 +++ dspl/liblapack/SRC/zla_gbamv.f | 422 ++ dspl/liblapack/SRC/zla_gbrcond_c.f | 344 ++ dspl/liblapack/SRC/zla_gbrcond_x.f | 321 ++ dspl/liblapack/SRC/zla_gbrfsx_extended.f | 713 ++++ dspl/liblapack/SRC/zla_gbrpvgrw.f | 167 + dspl/liblapack/SRC/zla_geamv.f | 406 ++ dspl/liblapack/SRC/zla_gercond_c.f | 318 ++ dspl/liblapack/SRC/zla_gercond_x.f | 294 ++ dspl/liblapack/SRC/zla_gerfsx_extended.f | 697 ++++ dspl/liblapack/SRC/zla_gerpvgrw.f | 149 + dspl/liblapack/SRC/zla_heamv.f | 426 ++ dspl/liblapack/SRC/zla_hercond_c.f | 329 ++ dspl/liblapack/SRC/zla_hercond_x.f | 299 ++ dspl/liblapack/SRC/zla_herfsx_extended.f | 717 ++++ dspl/liblapack/SRC/zla_herpvgrw.f | 330 ++ dspl/liblapack/SRC/zla_lin_berr.f | 160 + dspl/liblapack/SRC/zla_porcond_c.f | 321 ++ dspl/liblapack/SRC/zla_porcond_x.f | 290 ++ dspl/liblapack/SRC/zla_porfsx_extended.f | 688 +++ dspl/liblapack/SRC/zla_porpvgrw.f | 218 + dspl/liblapack/SRC/zla_syamv.f | 428 ++ dspl/liblapack/SRC/zla_syrcond_c.f | 330 ++ dspl/liblapack/SRC/zla_syrcond_x.f | 300 ++ dspl/liblapack/SRC/zla_syrfsx_extended.f | 717 ++++ dspl/liblapack/SRC/zla_syrpvgrw.f | 331 ++ dspl/liblapack/SRC/zla_wwaddw.f | 110 + dspl/liblapack/SRC/zlabrd.f | 420 ++ dspl/liblapack/SRC/zlacgv.f | 116 + dspl/liblapack/SRC/zlacn2.f | 298 ++ dspl/liblapack/SRC/zlacon.f | 281 ++ dspl/liblapack/SRC/zlacp2.f | 161 + dspl/liblapack/SRC/zlacpy.f | 159 + dspl/liblapack/SRC/zlacrm.f | 185 + dspl/liblapack/SRC/zlacrt.f | 160 + dspl/liblapack/SRC/zladiv.f | 97 + dspl/liblapack/SRC/zlaed0.f | 371 ++ dspl/liblapack/SRC/zlaed7.f | 385 ++ dspl/liblapack/SRC/zlaed8.f | 486 +++ dspl/liblapack/SRC/zlaein.f | 354 ++ dspl/liblapack/SRC/zlaesy.f | 221 + dspl/liblapack/SRC/zlaev2.f | 165 + dspl/liblapack/SRC/zlag2c.f | 157 + dspl/liblapack/SRC/zlags2.f | 399 ++ dspl/liblapack/SRC/zlagtm.f | 321 ++ dspl/liblapack/SRC/zlahef.f | 972 +++++ dspl/liblapack/SRC/zlahef_aa.f | 501 +++ dspl/liblapack/SRC/zlahef_rk.f | 1234 ++++++ dspl/liblapack/SRC/zlahef_rook.f | 1176 ++++++ dspl/liblapack/SRC/zlahqr.f | 571 +++ dspl/liblapack/SRC/zlahr2.f | 328 ++ dspl/liblapack/SRC/zlaic1.f | 371 ++ dspl/liblapack/SRC/zlals0.f | 556 +++ dspl/liblapack/SRC/zlalsa.f | 635 +++ dspl/liblapack/SRC/zlalsd.f | 693 +++ dspl/liblapack/SRC/zlamswlq.f | 419 ++ dspl/liblapack/SRC/zlamtsqr.f | 415 ++ dspl/liblapack/SRC/zlangb.f | 226 + dspl/liblapack/SRC/zlange.f | 213 + dspl/liblapack/SRC/zlangt.f | 208 + dspl/liblapack/SRC/zlanhb.f | 276 ++ dspl/liblapack/SRC/zlanhe.f | 258 ++ dspl/liblapack/SRC/zlanhf.f | 1573 +++++++ dspl/liblapack/SRC/zlanhp.f | 269 ++ dspl/liblapack/SRC/zlanhs.f | 207 + dspl/liblapack/SRC/zlanht.f | 188 + dspl/liblapack/SRC/zlansb.f | 260 ++ dspl/liblapack/SRC/zlansp.f | 272 ++ dspl/liblapack/SRC/zlansy.f | 243 ++ dspl/liblapack/SRC/zlantb.f | 363 ++ dspl/liblapack/SRC/zlantp.f | 357 ++ dspl/liblapack/SRC/zlantr.f | 355 ++ dspl/liblapack/SRC/zlapll.f | 169 + dspl/liblapack/SRC/zlapmr.f | 204 + dspl/liblapack/SRC/zlapmt.f | 203 + dspl/liblapack/SRC/zlaqgb.f | 258 ++ dspl/liblapack/SRC/zlaqge.f | 238 ++ dspl/liblapack/SRC/zlaqhb.f | 230 + dspl/liblapack/SRC/zlaqhe.f | 223 + dspl/liblapack/SRC/zlaqhp.f | 219 + dspl/liblapack/SRC/zlaqp2.f | 266 ++ dspl/liblapack/SRC/zlaqps.f | 370 ++ dspl/liblapack/SRC/zlaqr0.f | 703 ++++ dspl/liblapack/SRC/zlaqr1.f | 172 + dspl/liblapack/SRC/zlaqr2.f | 567 +++ dspl/liblapack/SRC/zlaqr3.f | 578 +++ dspl/liblapack/SRC/zlaqr4.f | 703 ++++ dspl/liblapack/SRC/zlaqr5.f | 908 ++++ dspl/liblapack/SRC/zlaqsb.f | 228 + dspl/liblapack/SRC/zlaqsp.f | 214 + dspl/liblapack/SRC/zlaqsy.f | 218 + dspl/liblapack/SRC/zlar1v.f | 488 +++ dspl/liblapack/SRC/zlar2v.f | 169 + dspl/liblapack/SRC/zlarcm.f | 185 + dspl/liblapack/SRC/zlarf.f | 232 ++ dspl/liblapack/SRC/zlarfb.f | 731 ++++ dspl/liblapack/SRC/zlarfg.f | 203 + dspl/liblapack/SRC/zlarfgp.f | 272 ++ dspl/liblapack/SRC/zlarft.f | 327 ++ dspl/liblapack/SRC/zlarfx.f | 700 ++++ dspl/liblapack/SRC/zlarfy.f | 163 + dspl/liblapack/SRC/zlargv.f | 300 ++ dspl/liblapack/SRC/zlarnv.f | 193 + dspl/liblapack/SRC/zlarrv.f | 1060 +++++ dspl/liblapack/SRC/zlarscl2.f | 122 + dspl/liblapack/SRC/zlartg.f | 250 ++ dspl/liblapack/SRC/zlartv.f | 150 + dspl/liblapack/SRC/zlarz.f | 241 ++ dspl/liblapack/SRC/zlarzb.f | 337 ++ dspl/liblapack/SRC/zlarzt.f | 266 ++ dspl/liblapack/SRC/zlascl.f | 368 ++ dspl/liblapack/SRC/zlascl2.f | 122 + dspl/liblapack/SRC/zlaset.f | 184 + dspl/liblapack/SRC/zlasr.f | 439 ++ dspl/liblapack/SRC/zlassq.f | 168 + dspl/liblapack/SRC/zlaswlq.f | 258 ++ dspl/liblapack/SRC/zlaswp.f | 193 + dspl/liblapack/SRC/zlasyf.f | 830 ++++ dspl/liblapack/SRC/zlasyf_aa.f | 493 +++ dspl/liblapack/SRC/zlasyf_rk.f | 974 +++++ dspl/liblapack/SRC/zlasyf_rook.f | 900 ++++ dspl/liblapack/SRC/zlat2c.f | 180 + dspl/liblapack/SRC/zlatbs.f | 998 +++++ dspl/liblapack/SRC/zlatdf.f | 323 ++ dspl/liblapack/SRC/zlatps.f | 978 +++++ dspl/liblapack/SRC/zlatrd.f | 358 ++ dspl/liblapack/SRC/zlatrs.f | 966 +++++ dspl/liblapack/SRC/zlatrz.f | 206 + dspl/liblapack/SRC/zlatsqr.f | 255 ++ dspl/liblapack/SRC/zlauu2.f | 206 + dspl/liblapack/SRC/zlauum.f | 223 + dspl/liblapack/SRC/zpbcon.f | 277 ++ dspl/liblapack/SRC/zpbequ.f | 244 ++ dspl/liblapack/SRC/zpbrfs.f | 448 ++ dspl/liblapack/SRC/zpbstf.f | 332 ++ dspl/liblapack/SRC/zpbsv.f | 229 + dspl/liblapack/SRC/zpbsvx.f | 543 +++ dspl/liblapack/SRC/zpbtf2.f | 269 ++ dspl/liblapack/SRC/zpbtrf.f | 442 ++ dspl/liblapack/SRC/zpbtrs.f | 220 + dspl/liblapack/SRC/zpftrf.f | 471 +++ dspl/liblapack/SRC/zpftri.f | 445 ++ dspl/liblapack/SRC/zpftrs.f | 301 ++ dspl/liblapack/SRC/zpocon.f | 260 ++ dspl/liblapack/SRC/zpoequ.f | 207 + dspl/liblapack/SRC/zpoequb.f | 223 + dspl/liblapack/SRC/zporfs.f | 436 ++ dspl/liblapack/SRC/zporfsx.f | 693 +++ dspl/liblapack/SRC/zposv.f | 193 + dspl/liblapack/SRC/zposvx.f | 492 +++ dspl/liblapack/SRC/zposvxx.f | 680 +++ dspl/liblapack/SRC/zpotf2.f | 237 ++ dspl/liblapack/SRC/zpotrf.f | 249 ++ dspl/liblapack/SRC/zpotrf2.f | 241 ++ dspl/liblapack/SRC/zpotri.f | 159 + dspl/liblapack/SRC/zpotrs.f | 204 + dspl/liblapack/SRC/zppcon.f | 255 ++ dspl/liblapack/SRC/zppequ.f | 240 ++ dspl/liblapack/SRC/zpprfs.f | 428 ++ dspl/liblapack/SRC/zppsv.f | 205 + dspl/liblapack/SRC/zppsvx.f | 493 +++ dspl/liblapack/SRC/zpptrf.f | 241 ++ dspl/liblapack/SRC/zpptri.f | 190 + dspl/liblapack/SRC/zpptrs.f | 203 + dspl/liblapack/SRC/zpstf2.f | 406 ++ dspl/liblapack/SRC/zpstrf.f | 464 +++ dspl/liblapack/SRC/zptcon.f | 223 + dspl/liblapack/SRC/zpteqr.f | 263 ++ dspl/liblapack/SRC/zptrfs.f | 468 +++ dspl/liblapack/SRC/zptsv.f | 169 + dspl/liblapack/SRC/zptsvx.f | 343 ++ dspl/liblapack/SRC/zpttrf.f | 228 + dspl/liblapack/SRC/zpttrs.f | 208 + dspl/liblapack/SRC/zptts2.f | 245 ++ dspl/liblapack/SRC/zrot.f | 162 + dspl/liblapack/SRC/zspcon.f | 231 + dspl/liblapack/SRC/zspmv.f | 340 ++ dspl/liblapack/SRC/zspr.f | 280 ++ dspl/liblapack/SRC/zsprfs.f | 437 ++ dspl/liblapack/SRC/zspsv.f | 224 + dspl/liblapack/SRC/zspsvx.f | 387 ++ dspl/liblapack/SRC/zsptrf.f | 619 +++ dspl/liblapack/SRC/zsptri.f | 404 ++ dspl/liblapack/SRC/zsptrs.f | 450 ++ dspl/liblapack/SRC/zstedc.f | 486 +++ dspl/liblapack/SRC/zstegr.f | 302 ++ dspl/liblapack/SRC/zstein.f | 469 +++ dspl/liblapack/SRC/zstemr.f | 791 ++++ dspl/liblapack/SRC/zsteqr.f | 576 +++ dspl/liblapack/SRC/zsycon.f | 239 ++ dspl/liblapack/SRC/zsycon_3.f | 287 ++ dspl/liblapack/SRC/zsycon_rook.f | 255 ++ dspl/liblapack/SRC/zsyconv.f | 366 ++ dspl/liblapack/SRC/zsyconvf.f | 562 +++ dspl/liblapack/SRC/zsyconvf_rook.f | 547 +++ dspl/liblapack/SRC/zsyequb.f | 343 ++ dspl/liblapack/SRC/zsymv.f | 343 ++ dspl/liblapack/SRC/zsyr.f | 268 ++ dspl/liblapack/SRC/zsyrfs.f | 446 ++ dspl/liblapack/SRC/zsyrfsx.f | 703 ++++ dspl/liblapack/SRC/zsysv.f | 270 ++ dspl/liblapack/SRC/zsysv_aa.f | 254 ++ dspl/liblapack/SRC/zsysv_aa_2stage.f | 276 ++ dspl/liblapack/SRC/zsysv_rk.f | 317 ++ dspl/liblapack/SRC/zsysv_rook.f | 293 ++ dspl/liblapack/SRC/zsysvx.f | 417 ++ dspl/liblapack/SRC/zsysvxx.f | 698 ++++ dspl/liblapack/SRC/zsyswapr.f | 193 + dspl/liblapack/SRC/zsytf2.f | 611 +++ dspl/liblapack/SRC/zsytf2_rk.f | 952 +++++ dspl/liblapack/SRC/zsytf2_rook.f | 821 ++++ dspl/liblapack/SRC/zsytrf.f | 362 ++ dspl/liblapack/SRC/zsytrf_aa.f | 467 +++ dspl/liblapack/SRC/zsytrf_aa_2stage.f | 668 +++ dspl/liblapack/SRC/zsytrf_rk.f | 498 +++ dspl/liblapack/SRC/zsytrf_rook.f | 393 ++ dspl/liblapack/SRC/zsytri.f | 383 ++ dspl/liblapack/SRC/zsytri2.f | 205 + dspl/liblapack/SRC/zsytri2x.f | 589 +++ dspl/liblapack/SRC/zsytri_3.f | 248 ++ dspl/liblapack/SRC/zsytri_3x.f | 647 +++ dspl/liblapack/SRC/zsytri_rook.f | 451 ++ dspl/liblapack/SRC/zsytrs.f | 445 ++ dspl/liblapack/SRC/zsytrs2.f | 361 ++ dspl/liblapack/SRC/zsytrs_3.f | 371 ++ dspl/liblapack/SRC/zsytrs_aa.f | 285 ++ dspl/liblapack/SRC/zsytrs_aa_2stage.f | 281 ++ dspl/liblapack/SRC/zsytrs_rook.f | 484 +++ dspl/liblapack/SRC/ztbcon.f | 291 ++ dspl/liblapack/SRC/ztbrfs.f | 497 +++ dspl/liblapack/SRC/ztbtrs.f | 244 ++ dspl/liblapack/SRC/ztfsm.f | 1026 +++++ dspl/liblapack/SRC/ztftri.f | 492 +++ dspl/liblapack/SRC/ztfttp.f | 543 +++ dspl/liblapack/SRC/ztfttr.f | 538 +++ dspl/liblapack/SRC/ztgevc.f | 737 ++++ dspl/liblapack/SRC/ztgex2.f | 367 ++ dspl/liblapack/SRC/ztgexc.f | 300 ++ dspl/liblapack/SRC/ztgsen.f | 787 ++++ dspl/liblapack/SRC/ztgsja.f | 666 +++ dspl/liblapack/SRC/ztgsna.f | 519 +++ dspl/liblapack/SRC/ztgsy2.f | 472 +++ dspl/liblapack/SRC/ztgsyl.f | 697 ++++ dspl/liblapack/SRC/ztpcon.f | 274 ++ dspl/liblapack/SRC/ztplqt.f | 270 ++ dspl/liblapack/SRC/ztplqt2.f | 333 ++ dspl/liblapack/SRC/ztpmlqt.f | 366 ++ dspl/liblapack/SRC/ztpmqrt.f | 368 ++ dspl/liblapack/SRC/ztpqrt.f | 270 ++ dspl/liblapack/SRC/ztpqrt2.f | 302 ++ dspl/liblapack/SRC/ztprfb.f | 814 ++++ dspl/liblapack/SRC/ztprfs.f | 484 +++ dspl/liblapack/SRC/ztptri.f | 242 ++ dspl/liblapack/SRC/ztptrs.f | 228 + dspl/liblapack/SRC/ztpttf.f | 539 +++ dspl/liblapack/SRC/ztpttr.f | 176 + dspl/liblapack/SRC/ztrcon.f | 283 ++ dspl/liblapack/SRC/ztrevc.f | 486 +++ dspl/liblapack/SRC/ztrevc3.f | 631 +++ dspl/liblapack/SRC/ztrexc.f | 241 ++ dspl/liblapack/SRC/ztrrfs.f | 479 +++ dspl/liblapack/SRC/ztrsen.f | 456 ++ dspl/liblapack/SRC/ztrsna.f | 464 +++ dspl/liblapack/SRC/ztrsyl.f | 454 ++ dspl/liblapack/SRC/ztrti2.f | 212 + dspl/liblapack/SRC/ztrtri.f | 243 ++ dspl/liblapack/SRC/ztrtrs.f | 227 + dspl/liblapack/SRC/ztrttf.f | 537 +++ dspl/liblapack/SRC/ztrttp.f | 176 + dspl/liblapack/SRC/ztzrzf.f | 313 ++ dspl/liblapack/SRC/zunbdb.f | 689 +++ dspl/liblapack/SRC/zunbdb1.f | 327 ++ dspl/liblapack/SRC/zunbdb2.f | 337 ++ dspl/liblapack/SRC/zunbdb3.f | 336 ++ dspl/liblapack/SRC/zunbdb4.f | 385 ++ dspl/liblapack/SRC/zunbdb5.f | 274 ++ dspl/liblapack/SRC/zunbdb6.f | 313 ++ dspl/liblapack/SRC/zuncsd.f | 658 +++ dspl/liblapack/SRC/zuncsd2by1.f | 772 ++++ dspl/liblapack/SRC/zung2l.f | 199 + dspl/liblapack/SRC/zung2r.f | 201 + dspl/liblapack/SRC/zungbr.f | 338 ++ dspl/liblapack/SRC/zunghr.f | 241 ++ dspl/liblapack/SRC/zungl2.f | 207 + dspl/liblapack/SRC/zunglq.f | 289 ++ dspl/liblapack/SRC/zungql.f | 296 ++ dspl/liblapack/SRC/zungqr.f | 290 ++ dspl/liblapack/SRC/zungr2.f | 205 + dspl/liblapack/SRC/zungrq.f | 297 ++ dspl/liblapack/SRC/zungtr.f | 256 ++ dspl/liblapack/SRC/zunm22.f | 440 ++ dspl/liblapack/SRC/zunm2l.f | 281 ++ dspl/liblapack/SRC/zunm2r.f | 286 ++ dspl/liblapack/SRC/zunmbr.f | 379 ++ dspl/liblapack/SRC/zunmhr.f | 294 ++ dspl/liblapack/SRC/zunml2.f | 290 ++ dspl/liblapack/SRC/zunmlq.f | 347 ++ dspl/liblapack/SRC/zunmql.f | 339 ++ dspl/liblapack/SRC/zunmqr.f | 340 ++ dspl/liblapack/SRC/zunmr2.f | 283 ++ dspl/liblapack/SRC/zunmr3.f | 305 ++ dspl/liblapack/SRC/zunmrq.f | 346 ++ dspl/liblapack/SRC/zunmrz.f | 384 ++ dspl/liblapack/SRC/zunmtr.f | 310 ++ dspl/liblapack/SRC/zupgtr.f | 233 ++ dspl/liblapack/SRC/zupmtr.f | 349 ++ dspl/liblapack/make.inc | 85 + make.inc | 59 + 2389 files changed, 845766 insertions(+), 99 deletions(-) delete mode 100644 Makefile.dirs rename dspl/{blas_obj => libblas}/.gitignore (100%) create mode 100644 dspl/libblas/SRC/._Makefile create mode 100644 dspl/libblas/SRC/._caxpy.f create mode 100644 dspl/libblas/SRC/._ccopy.f create mode 100644 dspl/libblas/SRC/._cdotc.f create mode 100644 dspl/libblas/SRC/._cdotu.f create mode 100644 dspl/libblas/SRC/._cgbmv.f create mode 100644 dspl/libblas/SRC/._cgemm.f create mode 100644 dspl/libblas/SRC/._cgemv.f create mode 100644 dspl/libblas/SRC/._cgerc.f create mode 100644 dspl/libblas/SRC/._cgeru.f create mode 100644 dspl/libblas/SRC/._chbmv.f create mode 100644 dspl/libblas/SRC/._chemm.f create mode 100644 dspl/libblas/SRC/._chemv.f create mode 100644 dspl/libblas/SRC/._cher.f create mode 100644 dspl/libblas/SRC/._cher2.f create mode 100644 dspl/libblas/SRC/._cher2k.f create mode 100644 dspl/libblas/SRC/._cherk.f create mode 100644 dspl/libblas/SRC/._chpmv.f create mode 100644 dspl/libblas/SRC/._chpr.f create mode 100644 dspl/libblas/SRC/._chpr2.f create mode 100644 dspl/libblas/SRC/._crotg.f create mode 100644 dspl/libblas/SRC/._cscal.f create mode 100644 dspl/libblas/SRC/._csrot.f create mode 100644 dspl/libblas/SRC/._csscal.f create mode 100644 dspl/libblas/SRC/._cswap.f create mode 100644 dspl/libblas/SRC/._csymm.f create mode 100644 dspl/libblas/SRC/._csyr2k.f create mode 100644 dspl/libblas/SRC/._csyrk.f create mode 100644 dspl/libblas/SRC/._ctbmv.f create mode 100644 dspl/libblas/SRC/._ctbsv.f create mode 100644 dspl/libblas/SRC/._ctpmv.f create mode 100644 dspl/libblas/SRC/._ctpsv.f create mode 100644 dspl/libblas/SRC/._ctrmm.f create mode 100644 dspl/libblas/SRC/._ctrmv.f create mode 100644 dspl/libblas/SRC/._ctrsm.f create mode 100644 dspl/libblas/SRC/._ctrsv.f create mode 100644 dspl/libblas/SRC/._dasum.f create mode 100644 dspl/libblas/SRC/._daxpy.f create mode 100644 dspl/libblas/SRC/._dcabs1.f create mode 100644 dspl/libblas/SRC/._dcopy.f create mode 100644 dspl/libblas/SRC/._ddot.f create mode 100644 dspl/libblas/SRC/._dgbmv.f create mode 100644 dspl/libblas/SRC/._dgemm.f create mode 100644 dspl/libblas/SRC/._dgemv.f create mode 100644 dspl/libblas/SRC/._dger.f create mode 100644 dspl/libblas/SRC/._dnrm2.f create mode 100644 dspl/libblas/SRC/._drot.f create mode 100644 dspl/libblas/SRC/._drotg.f create mode 100644 dspl/libblas/SRC/._drotm.f create mode 100644 dspl/libblas/SRC/._drotmg.f create mode 100644 dspl/libblas/SRC/._dsbmv.f create mode 100644 dspl/libblas/SRC/._dscal.f create mode 100644 dspl/libblas/SRC/._dsdot.f create mode 100644 dspl/libblas/SRC/._dspmv.f create mode 100644 dspl/libblas/SRC/._dspr.f create mode 100644 dspl/libblas/SRC/._dspr2.f create mode 100644 dspl/libblas/SRC/._dswap.f create mode 100644 dspl/libblas/SRC/._dsymm.f create mode 100644 dspl/libblas/SRC/._dsymv.f create mode 100644 dspl/libblas/SRC/._dsyr.f create mode 100644 dspl/libblas/SRC/._dsyr2.f create mode 100644 dspl/libblas/SRC/._dsyr2k.f create mode 100644 dspl/libblas/SRC/._dsyrk.f create mode 100644 dspl/libblas/SRC/._dtbmv.f create mode 100644 dspl/libblas/SRC/._dtbsv.f create mode 100644 dspl/libblas/SRC/._dtpmv.f create mode 100644 dspl/libblas/SRC/._dtpsv.f create mode 100644 dspl/libblas/SRC/._dtrmm.f create mode 100644 dspl/libblas/SRC/._dtrmv.f create mode 100644 dspl/libblas/SRC/._dtrsm.f create mode 100644 dspl/libblas/SRC/._dtrsv.f create mode 100644 dspl/libblas/SRC/._dzasum.f create mode 100644 dspl/libblas/SRC/._dznrm2.f create mode 100644 dspl/libblas/SRC/._icamax.f create mode 100644 dspl/libblas/SRC/._idamax.f create mode 100644 dspl/libblas/SRC/._isamax.f create mode 100644 dspl/libblas/SRC/._izamax.f create mode 100644 dspl/libblas/SRC/._lsame.f create mode 100644 dspl/libblas/SRC/._make.inc create mode 100644 dspl/libblas/SRC/._sasum.f create mode 100644 dspl/libblas/SRC/._saxpy.f create mode 100644 dspl/libblas/SRC/._scabs1.f create mode 100644 dspl/libblas/SRC/._scasum.f create mode 100644 dspl/libblas/SRC/._scnrm2.f create mode 100644 dspl/libblas/SRC/._scopy.f create mode 100644 dspl/libblas/SRC/._sdot.f create mode 100644 dspl/libblas/SRC/._sdsdot.f create mode 100644 dspl/libblas/SRC/._sgbmv.f create mode 100644 dspl/libblas/SRC/._sgemm.f create mode 100644 dspl/libblas/SRC/._sgemv.f create mode 100644 dspl/libblas/SRC/._sger.f create mode 100644 dspl/libblas/SRC/._snrm2.f create mode 100644 dspl/libblas/SRC/._srot.f create mode 100644 dspl/libblas/SRC/._srotg.f create mode 100644 dspl/libblas/SRC/._srotm.f create mode 100644 dspl/libblas/SRC/._srotmg.f create mode 100644 dspl/libblas/SRC/._ssbmv.f create mode 100644 dspl/libblas/SRC/._sscal.f create mode 100644 dspl/libblas/SRC/._sspmv.f create mode 100644 dspl/libblas/SRC/._sspr.f create mode 100644 dspl/libblas/SRC/._sspr2.f create mode 100644 dspl/libblas/SRC/._sswap.f create mode 100644 dspl/libblas/SRC/._ssymm.f create mode 100644 dspl/libblas/SRC/._ssymv.f create mode 100644 dspl/libblas/SRC/._ssyr.f create mode 100644 dspl/libblas/SRC/._ssyr2.f create mode 100644 dspl/libblas/SRC/._ssyr2k.f create mode 100644 dspl/libblas/SRC/._ssyrk.f create mode 100644 dspl/libblas/SRC/._stbmv.f create mode 100644 dspl/libblas/SRC/._stbsv.f create mode 100644 dspl/libblas/SRC/._stpmv.f create mode 100644 dspl/libblas/SRC/._stpsv.f create mode 100644 dspl/libblas/SRC/._strmm.f create mode 100644 dspl/libblas/SRC/._strmv.f create mode 100644 dspl/libblas/SRC/._strsm.f create mode 100644 dspl/libblas/SRC/._strsv.f create mode 100644 dspl/libblas/SRC/._xerbla.f create mode 100644 dspl/libblas/SRC/._xerbla_array.f create mode 100644 dspl/libblas/SRC/._zaxpy.f create mode 100644 dspl/libblas/SRC/._zcopy.f create mode 100644 dspl/libblas/SRC/._zdotc.f create mode 100644 dspl/libblas/SRC/._zdotu.f create mode 100644 dspl/libblas/SRC/._zdrot.f create mode 100644 dspl/libblas/SRC/._zdscal.f create mode 100644 dspl/libblas/SRC/._zgbmv.f create mode 100644 dspl/libblas/SRC/._zgemm.f create mode 100644 dspl/libblas/SRC/._zgemv.f create mode 100644 dspl/libblas/SRC/._zgerc.f create mode 100644 dspl/libblas/SRC/._zgeru.f create mode 100644 dspl/libblas/SRC/._zhbmv.f create mode 100644 dspl/libblas/SRC/._zhemm.f create mode 100644 dspl/libblas/SRC/._zhemv.f create mode 100644 dspl/libblas/SRC/._zher.f create mode 100644 dspl/libblas/SRC/._zher2.f create mode 100644 dspl/libblas/SRC/._zher2k.f create mode 100644 dspl/libblas/SRC/._zherk.f create mode 100644 dspl/libblas/SRC/._zhpmv.f create mode 100644 dspl/libblas/SRC/._zhpr.f create mode 100644 dspl/libblas/SRC/._zhpr2.f create mode 100644 dspl/libblas/SRC/._zrotg.f create mode 100644 dspl/libblas/SRC/._zscal.f create mode 100644 dspl/libblas/SRC/._zswap.f create mode 100644 dspl/libblas/SRC/._zsymm.f create mode 100644 dspl/libblas/SRC/._zsyr2k.f create mode 100644 dspl/libblas/SRC/._zsyrk.f create mode 100644 dspl/libblas/SRC/._ztbmv.f create mode 100644 dspl/libblas/SRC/._ztbsv.f create mode 100644 dspl/libblas/SRC/._ztpmv.f create mode 100644 dspl/libblas/SRC/._ztpsv.f create mode 100644 dspl/libblas/SRC/._ztrmm.f create mode 100644 dspl/libblas/SRC/._ztrmv.f create mode 100644 dspl/libblas/SRC/._ztrsm.f create mode 100644 dspl/libblas/SRC/._ztrsv.f create mode 100644 dspl/libblas/SRC/.gitignore rename dspl/{blas_src => libblas/SRC}/Makefile (100%) rename dspl/{blas_src => libblas/SRC}/caxpy.f (100%) rename dspl/{blas_src => libblas/SRC}/ccopy.f (100%) rename dspl/{blas_src => libblas/SRC}/cdotc.f (100%) rename dspl/{blas_src => libblas/SRC}/cdotu.f (100%) rename dspl/{blas_src => libblas/SRC}/cgbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/cgemm.f (100%) rename dspl/{blas_src => libblas/SRC}/cgemv.f (100%) rename dspl/{blas_src => libblas/SRC}/cgerc.f (100%) rename dspl/{blas_src => libblas/SRC}/cgeru.f (100%) rename dspl/{blas_src => libblas/SRC}/chbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/chemm.f (100%) rename dspl/{blas_src => libblas/SRC}/chemv.f (100%) rename dspl/{blas_src => libblas/SRC}/cher.f (100%) rename dspl/{blas_src => libblas/SRC}/cher2.f (100%) rename dspl/{blas_src => libblas/SRC}/cher2k.f (100%) rename dspl/{blas_src => libblas/SRC}/cherk.f (100%) rename dspl/{blas_src => libblas/SRC}/chpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/chpr.f (100%) rename dspl/{blas_src => libblas/SRC}/chpr2.f (100%) rename dspl/{blas_src => libblas/SRC}/crotg.f (100%) rename dspl/{blas_src => libblas/SRC}/cscal.f (100%) rename dspl/{blas_src => libblas/SRC}/csrot.f (100%) rename dspl/{blas_src => libblas/SRC}/csscal.f (100%) rename dspl/{blas_src => libblas/SRC}/cswap.f (100%) rename dspl/{blas_src => libblas/SRC}/csymm.f (100%) rename dspl/{blas_src => libblas/SRC}/csyr2k.f (100%) rename dspl/{blas_src => libblas/SRC}/csyrk.f (100%) rename dspl/{blas_src => libblas/SRC}/ctbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ctbsv.f (100%) rename dspl/{blas_src => libblas/SRC}/ctpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ctpsv.f (100%) rename dspl/{blas_src => libblas/SRC}/ctrmm.f (100%) rename dspl/{blas_src => libblas/SRC}/ctrmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ctrsm.f (100%) rename dspl/{blas_src => libblas/SRC}/ctrsv.f (100%) rename dspl/{blas_src => libblas/SRC}/dasum.f (100%) rename dspl/{blas_src => libblas/SRC}/daxpy.f (100%) rename dspl/{blas_src => libblas/SRC}/dcabs1.f (100%) rename dspl/{blas_src => libblas/SRC}/dcopy.f (100%) rename dspl/{blas_src => libblas/SRC}/ddot.f (100%) rename dspl/{blas_src => libblas/SRC}/dgbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dgemm.f (100%) rename dspl/{blas_src => libblas/SRC}/dgemv.f (100%) rename dspl/{blas_src => libblas/SRC}/dger.f (100%) rename dspl/{blas_src => libblas/SRC}/dnrm2.f (100%) rename dspl/{blas_src => libblas/SRC}/drot.f (100%) rename dspl/{blas_src => libblas/SRC}/drotg.f (100%) rename dspl/{blas_src => libblas/SRC}/drotm.f (100%) rename dspl/{blas_src => libblas/SRC}/drotmg.f (100%) rename dspl/{blas_src => libblas/SRC}/dsbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dscal.f (100%) rename dspl/{blas_src => libblas/SRC}/dsdot.f (100%) rename dspl/{blas_src => libblas/SRC}/dspmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dspr.f (100%) rename dspl/{blas_src => libblas/SRC}/dspr2.f (100%) rename dspl/{blas_src => libblas/SRC}/dswap.f (100%) rename dspl/{blas_src => libblas/SRC}/dsymm.f (100%) rename dspl/{blas_src => libblas/SRC}/dsymv.f (100%) rename dspl/{blas_src => libblas/SRC}/dsyr.f (100%) rename dspl/{blas_src => libblas/SRC}/dsyr2.f (100%) rename dspl/{blas_src => libblas/SRC}/dsyr2k.f (100%) rename dspl/{blas_src => libblas/SRC}/dsyrk.f (100%) rename dspl/{blas_src => libblas/SRC}/dtbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dtbsv.f (100%) rename dspl/{blas_src => libblas/SRC}/dtpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dtpsv.f (100%) rename dspl/{blas_src => libblas/SRC}/dtrmm.f (100%) rename dspl/{blas_src => libblas/SRC}/dtrmv.f (100%) rename dspl/{blas_src => libblas/SRC}/dtrsm.f (100%) rename dspl/{blas_src => libblas/SRC}/dtrsv.f (100%) rename dspl/{blas_src => libblas/SRC}/dzasum.f (100%) rename dspl/{blas_src => libblas/SRC}/dznrm2.f (100%) rename dspl/{blas_src => libblas/SRC}/icamax.f (100%) rename dspl/{blas_src => libblas/SRC}/idamax.f (100%) rename dspl/{blas_src => libblas/SRC}/isamax.f (100%) rename dspl/{blas_src => libblas/SRC}/izamax.f (100%) rename dspl/{blas_src => libblas/SRC}/lsame.f (100%) rename dspl/{blas_src => libblas/SRC}/make.inc (97%) rename dspl/{blas_src => libblas/SRC}/sasum.f (100%) rename dspl/{blas_src => libblas/SRC}/saxpy.f (100%) rename dspl/{blas_src => libblas/SRC}/scabs1.f (100%) rename dspl/{blas_src => libblas/SRC}/scasum.f (100%) rename dspl/{blas_src => libblas/SRC}/scnrm2.f (100%) rename dspl/{blas_src => libblas/SRC}/scopy.f (100%) rename dspl/{blas_src => libblas/SRC}/sdot.f (100%) rename dspl/{blas_src => libblas/SRC}/sdsdot.f (100%) rename dspl/{blas_src => libblas/SRC}/sgbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/sgemm.f (100%) rename dspl/{blas_src => libblas/SRC}/sgemv.f (100%) rename dspl/{blas_src => libblas/SRC}/sger.f (100%) rename dspl/{blas_src => libblas/SRC}/snrm2.f (100%) rename dspl/{blas_src => libblas/SRC}/srot.f (100%) rename dspl/{blas_src => libblas/SRC}/srotg.f (100%) rename dspl/{blas_src => libblas/SRC}/srotm.f (100%) rename dspl/{blas_src => libblas/SRC}/srotmg.f (100%) rename dspl/{blas_src => libblas/SRC}/ssbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/sscal.f (100%) rename dspl/{blas_src => libblas/SRC}/sspmv.f (100%) rename dspl/{blas_src => libblas/SRC}/sspr.f (100%) rename dspl/{blas_src => libblas/SRC}/sspr2.f (100%) rename dspl/{blas_src => libblas/SRC}/sswap.f (100%) rename dspl/{blas_src => libblas/SRC}/ssymm.f (100%) rename dspl/{blas_src => libblas/SRC}/ssymv.f (100%) rename dspl/{blas_src => libblas/SRC}/ssyr.f (100%) rename dspl/{blas_src => libblas/SRC}/ssyr2.f (100%) rename dspl/{blas_src => libblas/SRC}/ssyr2k.f (100%) rename dspl/{blas_src => libblas/SRC}/ssyrk.f (100%) rename dspl/{blas_src => libblas/SRC}/stbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/stbsv.f (100%) rename dspl/{blas_src => libblas/SRC}/stpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/stpsv.f (100%) rename dspl/{blas_src => libblas/SRC}/strmm.f (100%) rename dspl/{blas_src => libblas/SRC}/strmv.f (100%) rename dspl/{blas_src => libblas/SRC}/strsm.f (100%) rename dspl/{blas_src => libblas/SRC}/strsv.f (100%) rename dspl/{blas_src => libblas/SRC}/xerbla.f (100%) rename dspl/{blas_src => libblas/SRC}/xerbla_array.f (100%) rename dspl/{blas_src => libblas/SRC}/zaxpy.f (100%) rename dspl/{blas_src => libblas/SRC}/zcopy.f (100%) rename dspl/{blas_src => libblas/SRC}/zdotc.f (100%) rename dspl/{blas_src => libblas/SRC}/zdotu.f (100%) rename dspl/{blas_src => libblas/SRC}/zdrot.f (100%) rename dspl/{blas_src => libblas/SRC}/zdscal.f (100%) rename dspl/{blas_src => libblas/SRC}/zgbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/zgemm.f (100%) rename dspl/{blas_src => libblas/SRC}/zgemv.f (100%) rename dspl/{blas_src => libblas/SRC}/zgerc.f (100%) rename dspl/{blas_src => libblas/SRC}/zgeru.f (100%) rename dspl/{blas_src => libblas/SRC}/zhbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/zhemm.f (100%) rename dspl/{blas_src => libblas/SRC}/zhemv.f (100%) rename dspl/{blas_src => libblas/SRC}/zher.f (100%) rename dspl/{blas_src => libblas/SRC}/zher2.f (100%) rename dspl/{blas_src => libblas/SRC}/zher2k.f (100%) rename dspl/{blas_src => libblas/SRC}/zherk.f (100%) rename dspl/{blas_src => libblas/SRC}/zhpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/zhpr.f (100%) rename dspl/{blas_src => libblas/SRC}/zhpr2.f (100%) rename dspl/{blas_src => libblas/SRC}/zrotg.f (100%) rename dspl/{blas_src => libblas/SRC}/zscal.f (100%) rename dspl/{blas_src => libblas/SRC}/zswap.f (100%) rename dspl/{blas_src => libblas/SRC}/zsymm.f (100%) rename dspl/{blas_src => libblas/SRC}/zsyr2k.f (100%) rename dspl/{blas_src => libblas/SRC}/zsyrk.f (100%) rename dspl/{blas_src => libblas/SRC}/ztbmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ztbsv.f (100%) rename dspl/{blas_src => libblas/SRC}/ztpmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ztpsv.f (100%) rename dspl/{blas_src => libblas/SRC}/ztrmm.f (100%) rename dspl/{blas_src => libblas/SRC}/ztrmv.f (100%) rename dspl/{blas_src => libblas/SRC}/ztrsm.f (100%) rename dspl/{blas_src => libblas/SRC}/ztrsv.f (100%) create mode 100644 dspl/liblapack/INSTALL/.gitignore create mode 100644 dspl/liblapack/INSTALL/LAPACK_version.f create mode 100644 dspl/liblapack/INSTALL/Makefile create mode 100644 dspl/liblapack/INSTALL/dlamch.f create mode 100644 dspl/liblapack/INSTALL/dlamchf77.f create mode 100644 dspl/liblapack/INSTALL/dlamchtst.f create mode 100644 dspl/liblapack/INSTALL/dsecnd_EXT_ETIME.f create mode 100644 dspl/liblapack/INSTALL/dsecnd_EXT_ETIME_.f create mode 100644 dspl/liblapack/INSTALL/dsecnd_INT_CPU_TIME.f create mode 100644 dspl/liblapack/INSTALL/dsecnd_INT_ETIME.f create mode 100644 dspl/liblapack/INSTALL/dsecnd_NONE.f create mode 100644 dspl/liblapack/INSTALL/dsecndtst.f create mode 100644 dspl/liblapack/INSTALL/ilaver.f create mode 100644 dspl/liblapack/INSTALL/lsame.f create mode 100644 dspl/liblapack/INSTALL/lsametst.f create mode 100644 dspl/liblapack/INSTALL/make.inc.ALPHA create mode 100644 dspl/liblapack/INSTALL/make.inc.HPPA create mode 100644 dspl/liblapack/INSTALL/make.inc.IRIX64 create mode 100644 dspl/liblapack/INSTALL/make.inc.O2K create mode 100644 dspl/liblapack/INSTALL/make.inc.SGI5 create mode 100644 dspl/liblapack/INSTALL/make.inc.SUN4 create mode 100644 dspl/liblapack/INSTALL/make.inc.SUN4SOL2 create mode 100644 dspl/liblapack/INSTALL/make.inc.XLF create mode 100644 dspl/liblapack/INSTALL/make.inc.gfortran create mode 100644 dspl/liblapack/INSTALL/make.inc.gfortran_debug create mode 100644 dspl/liblapack/INSTALL/make.inc.ifort create mode 100644 dspl/liblapack/INSTALL/make.inc.pgf95 create mode 100644 dspl/liblapack/INSTALL/make.inc.pghpf create mode 100644 dspl/liblapack/INSTALL/second_EXT_ETIME.f create mode 100644 dspl/liblapack/INSTALL/second_EXT_ETIME_.f create mode 100644 dspl/liblapack/INSTALL/second_INT_CPU_TIME.f create mode 100644 dspl/liblapack/INSTALL/second_INT_ETIME.f create mode 100644 dspl/liblapack/INSTALL/second_NONE.f create mode 100644 dspl/liblapack/INSTALL/secondtst.f create mode 100644 dspl/liblapack/INSTALL/slamch.f create mode 100644 dspl/liblapack/INSTALL/slamchf77.f create mode 100644 dspl/liblapack/INSTALL/slamchtst.f create mode 100644 dspl/liblapack/INSTALL/tstiee.f create mode 100644 dspl/liblapack/SRC/.gitignore create mode 100644 dspl/liblapack/SRC/DEPRECATED/cgegs.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/cgegv.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/cgelsx.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/cgeqpf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/cggsvd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/cggsvp.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/clahrd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/clatzm.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/ctzrqf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dgegs.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dgegv.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dgelsx.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dgeqpf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dggsvd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dggsvp.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dlahrd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dlatzm.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/dtzrqf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sgegs.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sgegv.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sgelsx.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sgeqpf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sggsvd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/sggsvp.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/slahrd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/slatzm.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/stzrqf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zgegs.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zgegv.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zgelsx.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zgeqpf.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zggsvd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zggsvp.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zlahrd.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/zlatzm.f create mode 100644 dspl/liblapack/SRC/DEPRECATED/ztzrqf.f create mode 100644 dspl/liblapack/SRC/Makefile create mode 100644 dspl/liblapack/SRC/VARIANTS/Makefile create mode 100644 dspl/liblapack/SRC/VARIANTS/README create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/RL/cpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/RL/dpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/RL/spotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/RL/zpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/TOP/cpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/TOP/dpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/TOP/spotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/cholesky/TOP/zpotrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/CR/cgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/CR/dgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/CR/sgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/CR/zgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/LL/cgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/LL/dgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/LL/sgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/LL/zgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/REC/cgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/REC/dgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/REC/sgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/lu/REC/zgetrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/qr/LL/cgeqrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/qr/LL/dgeqrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/qr/LL/sceil.f create mode 100644 dspl/liblapack/SRC/VARIANTS/qr/LL/sgeqrf.f create mode 100644 dspl/liblapack/SRC/VARIANTS/qr/LL/zgeqrf.f create mode 100644 dspl/liblapack/SRC/cbbcsd.f create mode 100644 dspl/liblapack/SRC/cbdsqr.f create mode 100644 dspl/liblapack/SRC/cgbbrd.f create mode 100644 dspl/liblapack/SRC/cgbcon.f create mode 100644 dspl/liblapack/SRC/cgbequ.f create mode 100644 dspl/liblapack/SRC/cgbequb.f create mode 100644 dspl/liblapack/SRC/cgbrfs.f create mode 100644 dspl/liblapack/SRC/cgbrfsx.f create mode 100644 dspl/liblapack/SRC/cgbsv.f create mode 100644 dspl/liblapack/SRC/cgbsvx.f create mode 100644 dspl/liblapack/SRC/cgbsvxx.f create mode 100644 dspl/liblapack/SRC/cgbtf2.f create mode 100644 dspl/liblapack/SRC/cgbtrf.f create mode 100644 dspl/liblapack/SRC/cgbtrs.f create mode 100644 dspl/liblapack/SRC/cgebak.f create mode 100644 dspl/liblapack/SRC/cgebal.f create mode 100644 dspl/liblapack/SRC/cgebd2.f create mode 100644 dspl/liblapack/SRC/cgebrd.f create mode 100644 dspl/liblapack/SRC/cgecon.f create mode 100644 dspl/liblapack/SRC/cgeequ.f create mode 100644 dspl/liblapack/SRC/cgeequb.f create mode 100644 dspl/liblapack/SRC/cgees.f create mode 100644 dspl/liblapack/SRC/cgeesx.f create mode 100644 dspl/liblapack/SRC/cgeev.f create mode 100644 dspl/liblapack/SRC/cgeevx.f create mode 100644 dspl/liblapack/SRC/cgehd2.f create mode 100644 dspl/liblapack/SRC/cgehrd.f create mode 100644 dspl/liblapack/SRC/cgejsv.f create mode 100644 dspl/liblapack/SRC/cgelq.f create mode 100644 dspl/liblapack/SRC/cgelq2.f create mode 100644 dspl/liblapack/SRC/cgelqf.f create mode 100644 dspl/liblapack/SRC/cgelqt.f create mode 100644 dspl/liblapack/SRC/cgelqt3.f create mode 100644 dspl/liblapack/SRC/cgels.f create mode 100644 dspl/liblapack/SRC/cgelsd.f create mode 100644 dspl/liblapack/SRC/cgelss.f create mode 100644 dspl/liblapack/SRC/cgelsy.f create mode 100644 dspl/liblapack/SRC/cgemlq.f create mode 100644 dspl/liblapack/SRC/cgemlqt.f create mode 100644 dspl/liblapack/SRC/cgemqr.f create mode 100644 dspl/liblapack/SRC/cgemqrt.f create mode 100644 dspl/liblapack/SRC/cgeql2.f create mode 100644 dspl/liblapack/SRC/cgeqlf.f create mode 100644 dspl/liblapack/SRC/cgeqp3.f create mode 100644 dspl/liblapack/SRC/cgeqr.f create mode 100644 dspl/liblapack/SRC/cgeqr2.f create mode 100644 dspl/liblapack/SRC/cgeqr2p.f create mode 100644 dspl/liblapack/SRC/cgeqrf.f create mode 100644 dspl/liblapack/SRC/cgeqrfp.f create mode 100644 dspl/liblapack/SRC/cgeqrt.f create mode 100644 dspl/liblapack/SRC/cgeqrt2.f create mode 100644 dspl/liblapack/SRC/cgeqrt3.f create mode 100644 dspl/liblapack/SRC/cgerfs.f create mode 100644 dspl/liblapack/SRC/cgerfsx.f create mode 100644 dspl/liblapack/SRC/cgerq2.f create mode 100644 dspl/liblapack/SRC/cgerqf.f create mode 100644 dspl/liblapack/SRC/cgesc2.f create mode 100644 dspl/liblapack/SRC/cgesdd.f create mode 100644 dspl/liblapack/SRC/cgesv.f create mode 100644 dspl/liblapack/SRC/cgesvd.f create mode 100644 dspl/liblapack/SRC/cgesvdx.f create mode 100644 dspl/liblapack/SRC/cgesvj.f create mode 100644 dspl/liblapack/SRC/cgesvx.f create mode 100644 dspl/liblapack/SRC/cgesvxx.f create mode 100644 dspl/liblapack/SRC/cgetc2.f create mode 100644 dspl/liblapack/SRC/cgetf2.f create mode 100644 dspl/liblapack/SRC/cgetrf.f create mode 100644 dspl/liblapack/SRC/cgetrf2.f create mode 100644 dspl/liblapack/SRC/cgetri.f create mode 100644 dspl/liblapack/SRC/cgetrs.f create mode 100644 dspl/liblapack/SRC/cgetsls.f create mode 100644 dspl/liblapack/SRC/cggbak.f create mode 100644 dspl/liblapack/SRC/cggbal.f create mode 100644 dspl/liblapack/SRC/cgges.f create mode 100644 dspl/liblapack/SRC/cgges3.f create mode 100644 dspl/liblapack/SRC/cggesx.f create mode 100644 dspl/liblapack/SRC/cggev.f create mode 100644 dspl/liblapack/SRC/cggev3.f create mode 100644 dspl/liblapack/SRC/cggevx.f create mode 100644 dspl/liblapack/SRC/cggglm.f create mode 100644 dspl/liblapack/SRC/cgghd3.f create mode 100644 dspl/liblapack/SRC/cgghrd.f create mode 100644 dspl/liblapack/SRC/cgglse.f create mode 100644 dspl/liblapack/SRC/cggqrf.f create mode 100644 dspl/liblapack/SRC/cggrqf.f create mode 100644 dspl/liblapack/SRC/cggsvd3.f create mode 100644 dspl/liblapack/SRC/cggsvp3.f create mode 100644 dspl/liblapack/SRC/cgsvj0.f create mode 100644 dspl/liblapack/SRC/cgsvj1.f create mode 100644 dspl/liblapack/SRC/cgtcon.f create mode 100644 dspl/liblapack/SRC/cgtrfs.f create mode 100644 dspl/liblapack/SRC/cgtsv.f create mode 100644 dspl/liblapack/SRC/cgtsvx.f create mode 100644 dspl/liblapack/SRC/cgttrf.f create mode 100644 dspl/liblapack/SRC/cgttrs.f create mode 100644 dspl/liblapack/SRC/cgtts2.f create mode 100644 dspl/liblapack/SRC/chb2st_kernels.f create mode 100644 dspl/liblapack/SRC/chbev.f create mode 100644 dspl/liblapack/SRC/chbev_2stage.f create mode 100644 dspl/liblapack/SRC/chbevd.f create mode 100644 dspl/liblapack/SRC/chbevd_2stage.f create mode 100644 dspl/liblapack/SRC/chbevx.f create mode 100644 dspl/liblapack/SRC/chbevx_2stage.f create mode 100644 dspl/liblapack/SRC/chbgst.f create mode 100644 dspl/liblapack/SRC/chbgv.f create mode 100644 dspl/liblapack/SRC/chbgvd.f create mode 100644 dspl/liblapack/SRC/chbgvx.f create mode 100644 dspl/liblapack/SRC/chbtrd.f create mode 100644 dspl/liblapack/SRC/checon.f create mode 100644 dspl/liblapack/SRC/checon_3.f create mode 100644 dspl/liblapack/SRC/checon_rook.f create mode 100644 dspl/liblapack/SRC/cheequb.f create mode 100644 dspl/liblapack/SRC/cheev.f create mode 100644 dspl/liblapack/SRC/cheev_2stage.f create mode 100644 dspl/liblapack/SRC/cheevd.f create mode 100644 dspl/liblapack/SRC/cheevd_2stage.f create mode 100644 dspl/liblapack/SRC/cheevr.f create mode 100644 dspl/liblapack/SRC/cheevr_2stage.f create mode 100644 dspl/liblapack/SRC/cheevx.f create mode 100644 dspl/liblapack/SRC/cheevx_2stage.f create mode 100644 dspl/liblapack/SRC/chegs2.f create mode 100644 dspl/liblapack/SRC/chegst.f create mode 100644 dspl/liblapack/SRC/chegv.f create mode 100644 dspl/liblapack/SRC/chegv_2stage.f create mode 100644 dspl/liblapack/SRC/chegvd.f create mode 100644 dspl/liblapack/SRC/chegvx.f create mode 100644 dspl/liblapack/SRC/cherfs.f create mode 100644 dspl/liblapack/SRC/cherfsx.f create mode 100644 dspl/liblapack/SRC/chesv.f create mode 100644 dspl/liblapack/SRC/chesv_aa.f create mode 100644 dspl/liblapack/SRC/chesv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/chesv_rk.f create mode 100644 dspl/liblapack/SRC/chesv_rook.f create mode 100644 dspl/liblapack/SRC/chesvx.f create mode 100644 dspl/liblapack/SRC/chesvxx.f create mode 100644 dspl/liblapack/SRC/cheswapr.f create mode 100644 dspl/liblapack/SRC/chetd2.f create mode 100644 dspl/liblapack/SRC/chetf2.f create mode 100644 dspl/liblapack/SRC/chetf2_rk.f create mode 100644 dspl/liblapack/SRC/chetf2_rook.f create mode 100644 dspl/liblapack/SRC/chetrd.f create mode 100644 dspl/liblapack/SRC/chetrd_2stage.f create mode 100644 dspl/liblapack/SRC/chetrd_hb2st.F create mode 100644 dspl/liblapack/SRC/chetrd_he2hb.f create mode 100644 dspl/liblapack/SRC/chetrf.f create mode 100644 dspl/liblapack/SRC/chetrf_aa.f create mode 100644 dspl/liblapack/SRC/chetrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/chetrf_rk.f create mode 100644 dspl/liblapack/SRC/chetrf_rook.f create mode 100644 dspl/liblapack/SRC/chetri.f create mode 100644 dspl/liblapack/SRC/chetri2.f create mode 100644 dspl/liblapack/SRC/chetri2x.f create mode 100644 dspl/liblapack/SRC/chetri_3.f create mode 100644 dspl/liblapack/SRC/chetri_3x.f create mode 100644 dspl/liblapack/SRC/chetri_rook.f create mode 100644 dspl/liblapack/SRC/chetrs.f create mode 100644 dspl/liblapack/SRC/chetrs2.f create mode 100644 dspl/liblapack/SRC/chetrs_3.f create mode 100644 dspl/liblapack/SRC/chetrs_aa.f create mode 100644 dspl/liblapack/SRC/chetrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/chetrs_rook.f create mode 100644 dspl/liblapack/SRC/chfrk.f create mode 100644 dspl/liblapack/SRC/chgeqz.f create mode 100644 dspl/liblapack/SRC/chla_transtype.f create mode 100644 dspl/liblapack/SRC/chpcon.f create mode 100644 dspl/liblapack/SRC/chpev.f create mode 100644 dspl/liblapack/SRC/chpevd.f create mode 100644 dspl/liblapack/SRC/chpevx.f create mode 100644 dspl/liblapack/SRC/chpgst.f create mode 100644 dspl/liblapack/SRC/chpgv.f create mode 100644 dspl/liblapack/SRC/chpgvd.f create mode 100644 dspl/liblapack/SRC/chpgvx.f create mode 100644 dspl/liblapack/SRC/chprfs.f create mode 100644 dspl/liblapack/SRC/chpsv.f create mode 100644 dspl/liblapack/SRC/chpsvx.f create mode 100644 dspl/liblapack/SRC/chptrd.f create mode 100644 dspl/liblapack/SRC/chptrf.f create mode 100644 dspl/liblapack/SRC/chptri.f create mode 100644 dspl/liblapack/SRC/chptrs.f create mode 100644 dspl/liblapack/SRC/chsein.f create mode 100644 dspl/liblapack/SRC/chseqr.f create mode 100644 dspl/liblapack/SRC/cla_gbamv.f create mode 100644 dspl/liblapack/SRC/cla_gbrcond_c.f create mode 100644 dspl/liblapack/SRC/cla_gbrcond_x.f create mode 100644 dspl/liblapack/SRC/cla_gbrfsx_extended.f create mode 100644 dspl/liblapack/SRC/cla_gbrpvgrw.f create mode 100644 dspl/liblapack/SRC/cla_geamv.f create mode 100644 dspl/liblapack/SRC/cla_gercond_c.f create mode 100644 dspl/liblapack/SRC/cla_gercond_x.f create mode 100644 dspl/liblapack/SRC/cla_gerfsx_extended.f create mode 100644 dspl/liblapack/SRC/cla_gerpvgrw.f create mode 100644 dspl/liblapack/SRC/cla_heamv.f create mode 100644 dspl/liblapack/SRC/cla_hercond_c.f create mode 100644 dspl/liblapack/SRC/cla_hercond_x.f create mode 100644 dspl/liblapack/SRC/cla_herfsx_extended.f create mode 100644 dspl/liblapack/SRC/cla_herpvgrw.f create mode 100644 dspl/liblapack/SRC/cla_lin_berr.f create mode 100644 dspl/liblapack/SRC/cla_porcond_c.f create mode 100644 dspl/liblapack/SRC/cla_porcond_x.f create mode 100644 dspl/liblapack/SRC/cla_porfsx_extended.f create mode 100644 dspl/liblapack/SRC/cla_porpvgrw.f create mode 100644 dspl/liblapack/SRC/cla_syamv.f create mode 100644 dspl/liblapack/SRC/cla_syrcond_c.f create mode 100644 dspl/liblapack/SRC/cla_syrcond_x.f create mode 100644 dspl/liblapack/SRC/cla_syrfsx_extended.f create mode 100644 dspl/liblapack/SRC/cla_syrpvgrw.f create mode 100644 dspl/liblapack/SRC/cla_wwaddw.f create mode 100644 dspl/liblapack/SRC/clabrd.f create mode 100644 dspl/liblapack/SRC/clacgv.f create mode 100644 dspl/liblapack/SRC/clacn2.f create mode 100644 dspl/liblapack/SRC/clacon.f create mode 100644 dspl/liblapack/SRC/clacp2.f create mode 100644 dspl/liblapack/SRC/clacpy.f create mode 100644 dspl/liblapack/SRC/clacrm.f create mode 100644 dspl/liblapack/SRC/clacrt.f create mode 100644 dspl/liblapack/SRC/cladiv.f create mode 100644 dspl/liblapack/SRC/claed0.f create mode 100644 dspl/liblapack/SRC/claed7.f create mode 100644 dspl/liblapack/SRC/claed8.f create mode 100644 dspl/liblapack/SRC/claein.f create mode 100644 dspl/liblapack/SRC/claesy.f create mode 100644 dspl/liblapack/SRC/claev2.f create mode 100644 dspl/liblapack/SRC/clag2z.f create mode 100644 dspl/liblapack/SRC/clags2.f create mode 100644 dspl/liblapack/SRC/clagtm.f create mode 100644 dspl/liblapack/SRC/clahef.f create mode 100644 dspl/liblapack/SRC/clahef_aa.f create mode 100644 dspl/liblapack/SRC/clahef_rk.f create mode 100644 dspl/liblapack/SRC/clahef_rook.f create mode 100644 dspl/liblapack/SRC/clahqr.f create mode 100644 dspl/liblapack/SRC/clahr2.f create mode 100644 dspl/liblapack/SRC/claic1.f create mode 100644 dspl/liblapack/SRC/clals0.f create mode 100644 dspl/liblapack/SRC/clalsa.f create mode 100644 dspl/liblapack/SRC/clalsd.f create mode 100644 dspl/liblapack/SRC/clamswlq.f create mode 100644 dspl/liblapack/SRC/clamtsqr.f create mode 100644 dspl/liblapack/SRC/clangb.f create mode 100644 dspl/liblapack/SRC/clange.f create mode 100644 dspl/liblapack/SRC/clangt.f create mode 100644 dspl/liblapack/SRC/clanhb.f create mode 100644 dspl/liblapack/SRC/clanhe.f create mode 100644 dspl/liblapack/SRC/clanhf.f create mode 100644 dspl/liblapack/SRC/clanhp.f create mode 100644 dspl/liblapack/SRC/clanhs.f create mode 100644 dspl/liblapack/SRC/clanht.f create mode 100644 dspl/liblapack/SRC/clansb.f create mode 100644 dspl/liblapack/SRC/clansp.f create mode 100644 dspl/liblapack/SRC/clansy.f create mode 100644 dspl/liblapack/SRC/clantb.f create mode 100644 dspl/liblapack/SRC/clantp.f create mode 100644 dspl/liblapack/SRC/clantr.f create mode 100644 dspl/liblapack/SRC/clapll.f create mode 100644 dspl/liblapack/SRC/clapmr.f create mode 100644 dspl/liblapack/SRC/clapmt.f create mode 100644 dspl/liblapack/SRC/claqgb.f create mode 100644 dspl/liblapack/SRC/claqge.f create mode 100644 dspl/liblapack/SRC/claqhb.f create mode 100644 dspl/liblapack/SRC/claqhe.f create mode 100644 dspl/liblapack/SRC/claqhp.f create mode 100644 dspl/liblapack/SRC/claqp2.f create mode 100644 dspl/liblapack/SRC/claqps.f create mode 100644 dspl/liblapack/SRC/claqr0.f create mode 100644 dspl/liblapack/SRC/claqr1.f create mode 100644 dspl/liblapack/SRC/claqr2.f create mode 100644 dspl/liblapack/SRC/claqr3.f create mode 100644 dspl/liblapack/SRC/claqr4.f create mode 100644 dspl/liblapack/SRC/claqr5.f create mode 100644 dspl/liblapack/SRC/claqsb.f create mode 100644 dspl/liblapack/SRC/claqsp.f create mode 100644 dspl/liblapack/SRC/claqsy.f create mode 100644 dspl/liblapack/SRC/clar1v.f create mode 100644 dspl/liblapack/SRC/clar2v.f create mode 100644 dspl/liblapack/SRC/clarcm.f create mode 100644 dspl/liblapack/SRC/clarf.f create mode 100644 dspl/liblapack/SRC/clarfb.f create mode 100644 dspl/liblapack/SRC/clarfg.f create mode 100644 dspl/liblapack/SRC/clarfgp.f create mode 100644 dspl/liblapack/SRC/clarft.f create mode 100644 dspl/liblapack/SRC/clarfx.f create mode 100644 dspl/liblapack/SRC/clarfy.f create mode 100644 dspl/liblapack/SRC/clargv.f create mode 100644 dspl/liblapack/SRC/clarnv.f create mode 100644 dspl/liblapack/SRC/clarrv.f create mode 100644 dspl/liblapack/SRC/clarscl2.f create mode 100644 dspl/liblapack/SRC/clartg.f create mode 100644 dspl/liblapack/SRC/clartv.f create mode 100644 dspl/liblapack/SRC/clarz.f create mode 100644 dspl/liblapack/SRC/clarzb.f create mode 100644 dspl/liblapack/SRC/clarzt.f create mode 100644 dspl/liblapack/SRC/clascl.f create mode 100644 dspl/liblapack/SRC/clascl2.f create mode 100644 dspl/liblapack/SRC/claset.f create mode 100644 dspl/liblapack/SRC/clasr.f create mode 100644 dspl/liblapack/SRC/classq.f create mode 100644 dspl/liblapack/SRC/claswlq.f create mode 100644 dspl/liblapack/SRC/claswp.f create mode 100644 dspl/liblapack/SRC/clasyf.f create mode 100644 dspl/liblapack/SRC/clasyf_aa.f create mode 100644 dspl/liblapack/SRC/clasyf_rk.f create mode 100644 dspl/liblapack/SRC/clasyf_rook.f create mode 100644 dspl/liblapack/SRC/clatbs.f create mode 100644 dspl/liblapack/SRC/clatdf.f create mode 100644 dspl/liblapack/SRC/clatps.f create mode 100644 dspl/liblapack/SRC/clatrd.f create mode 100644 dspl/liblapack/SRC/clatrs.f create mode 100644 dspl/liblapack/SRC/clatrz.f create mode 100644 dspl/liblapack/SRC/clatsqr.f create mode 100644 dspl/liblapack/SRC/clauu2.f create mode 100644 dspl/liblapack/SRC/clauum.f create mode 100644 dspl/liblapack/SRC/cpbcon.f create mode 100644 dspl/liblapack/SRC/cpbequ.f create mode 100644 dspl/liblapack/SRC/cpbrfs.f create mode 100644 dspl/liblapack/SRC/cpbstf.f create mode 100644 dspl/liblapack/SRC/cpbsv.f create mode 100644 dspl/liblapack/SRC/cpbsvx.f create mode 100644 dspl/liblapack/SRC/cpbtf2.f create mode 100644 dspl/liblapack/SRC/cpbtrf.f create mode 100644 dspl/liblapack/SRC/cpbtrs.f create mode 100644 dspl/liblapack/SRC/cpftrf.f create mode 100644 dspl/liblapack/SRC/cpftri.f create mode 100644 dspl/liblapack/SRC/cpftrs.f create mode 100644 dspl/liblapack/SRC/cpocon.f create mode 100644 dspl/liblapack/SRC/cpoequ.f create mode 100644 dspl/liblapack/SRC/cpoequb.f create mode 100644 dspl/liblapack/SRC/cporfs.f create mode 100644 dspl/liblapack/SRC/cporfsx.f create mode 100644 dspl/liblapack/SRC/cposv.f create mode 100644 dspl/liblapack/SRC/cposvx.f create mode 100644 dspl/liblapack/SRC/cposvxx.f create mode 100644 dspl/liblapack/SRC/cpotf2.f create mode 100644 dspl/liblapack/SRC/cpotrf.f create mode 100644 dspl/liblapack/SRC/cpotrf2.f create mode 100644 dspl/liblapack/SRC/cpotri.f create mode 100644 dspl/liblapack/SRC/cpotrs.f create mode 100644 dspl/liblapack/SRC/cppcon.f create mode 100644 dspl/liblapack/SRC/cppequ.f create mode 100644 dspl/liblapack/SRC/cpprfs.f create mode 100644 dspl/liblapack/SRC/cppsv.f create mode 100644 dspl/liblapack/SRC/cppsvx.f create mode 100644 dspl/liblapack/SRC/cpptrf.f create mode 100644 dspl/liblapack/SRC/cpptri.f create mode 100644 dspl/liblapack/SRC/cpptrs.f create mode 100644 dspl/liblapack/SRC/cpstf2.f create mode 100644 dspl/liblapack/SRC/cpstrf.f create mode 100644 dspl/liblapack/SRC/cptcon.f create mode 100644 dspl/liblapack/SRC/cpteqr.f create mode 100644 dspl/liblapack/SRC/cptrfs.f create mode 100644 dspl/liblapack/SRC/cptsv.f create mode 100644 dspl/liblapack/SRC/cptsvx.f create mode 100644 dspl/liblapack/SRC/cpttrf.f create mode 100644 dspl/liblapack/SRC/cpttrs.f create mode 100644 dspl/liblapack/SRC/cptts2.f create mode 100644 dspl/liblapack/SRC/crot.f create mode 100644 dspl/liblapack/SRC/cspcon.f create mode 100644 dspl/liblapack/SRC/cspmv.f create mode 100644 dspl/liblapack/SRC/cspr.f create mode 100644 dspl/liblapack/SRC/csprfs.f create mode 100644 dspl/liblapack/SRC/cspsv.f create mode 100644 dspl/liblapack/SRC/cspsvx.f create mode 100644 dspl/liblapack/SRC/csptrf.f create mode 100644 dspl/liblapack/SRC/csptri.f create mode 100644 dspl/liblapack/SRC/csptrs.f create mode 100644 dspl/liblapack/SRC/csrscl.f create mode 100644 dspl/liblapack/SRC/cstedc.f create mode 100644 dspl/liblapack/SRC/cstegr.f create mode 100644 dspl/liblapack/SRC/cstein.f create mode 100644 dspl/liblapack/SRC/cstemr.f create mode 100644 dspl/liblapack/SRC/csteqr.f create mode 100644 dspl/liblapack/SRC/csycon.f create mode 100644 dspl/liblapack/SRC/csycon_3.f create mode 100644 dspl/liblapack/SRC/csycon_rook.f create mode 100644 dspl/liblapack/SRC/csyconv.f create mode 100644 dspl/liblapack/SRC/csyconvf.f create mode 100644 dspl/liblapack/SRC/csyconvf_rook.f create mode 100644 dspl/liblapack/SRC/csyequb.f create mode 100644 dspl/liblapack/SRC/csymv.f create mode 100644 dspl/liblapack/SRC/csyr.f create mode 100644 dspl/liblapack/SRC/csyrfs.f create mode 100644 dspl/liblapack/SRC/csyrfsx.f create mode 100644 dspl/liblapack/SRC/csysv.f create mode 100644 dspl/liblapack/SRC/csysv_aa.f create mode 100644 dspl/liblapack/SRC/csysv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/csysv_rk.f create mode 100644 dspl/liblapack/SRC/csysv_rook.f create mode 100644 dspl/liblapack/SRC/csysvx.f create mode 100644 dspl/liblapack/SRC/csysvxx.f create mode 100644 dspl/liblapack/SRC/csyswapr.f create mode 100644 dspl/liblapack/SRC/csytf2.f create mode 100644 dspl/liblapack/SRC/csytf2_rk.f create mode 100644 dspl/liblapack/SRC/csytf2_rook.f create mode 100644 dspl/liblapack/SRC/csytrf.f create mode 100644 dspl/liblapack/SRC/csytrf_aa.f create mode 100644 dspl/liblapack/SRC/csytrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/csytrf_rk.f create mode 100644 dspl/liblapack/SRC/csytrf_rook.f create mode 100644 dspl/liblapack/SRC/csytri.f create mode 100644 dspl/liblapack/SRC/csytri2.f create mode 100644 dspl/liblapack/SRC/csytri2x.f create mode 100644 dspl/liblapack/SRC/csytri_3.f create mode 100644 dspl/liblapack/SRC/csytri_3x.f create mode 100644 dspl/liblapack/SRC/csytri_rook.f create mode 100644 dspl/liblapack/SRC/csytrs.f create mode 100644 dspl/liblapack/SRC/csytrs2.f create mode 100644 dspl/liblapack/SRC/csytrs_3.f create mode 100644 dspl/liblapack/SRC/csytrs_aa.f create mode 100644 dspl/liblapack/SRC/csytrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/csytrs_rook.f create mode 100644 dspl/liblapack/SRC/ctbcon.f create mode 100644 dspl/liblapack/SRC/ctbrfs.f create mode 100644 dspl/liblapack/SRC/ctbtrs.f create mode 100644 dspl/liblapack/SRC/ctfsm.f create mode 100644 dspl/liblapack/SRC/ctftri.f create mode 100644 dspl/liblapack/SRC/ctfttp.f create mode 100644 dspl/liblapack/SRC/ctfttr.f create mode 100644 dspl/liblapack/SRC/ctgevc.f create mode 100644 dspl/liblapack/SRC/ctgex2.f create mode 100644 dspl/liblapack/SRC/ctgexc.f create mode 100644 dspl/liblapack/SRC/ctgsen.f create mode 100644 dspl/liblapack/SRC/ctgsja.f create mode 100644 dspl/liblapack/SRC/ctgsna.f create mode 100644 dspl/liblapack/SRC/ctgsy2.f create mode 100644 dspl/liblapack/SRC/ctgsyl.f create mode 100644 dspl/liblapack/SRC/ctpcon.f create mode 100644 dspl/liblapack/SRC/ctplqt.f create mode 100644 dspl/liblapack/SRC/ctplqt2.f create mode 100644 dspl/liblapack/SRC/ctpmlqt.f create mode 100644 dspl/liblapack/SRC/ctpmqrt.f create mode 100644 dspl/liblapack/SRC/ctpqrt.f create mode 100644 dspl/liblapack/SRC/ctpqrt2.f create mode 100644 dspl/liblapack/SRC/ctprfb.f create mode 100644 dspl/liblapack/SRC/ctprfs.f create mode 100644 dspl/liblapack/SRC/ctptri.f create mode 100644 dspl/liblapack/SRC/ctptrs.f create mode 100644 dspl/liblapack/SRC/ctpttf.f create mode 100644 dspl/liblapack/SRC/ctpttr.f create mode 100644 dspl/liblapack/SRC/ctrcon.f create mode 100644 dspl/liblapack/SRC/ctrevc.f create mode 100644 dspl/liblapack/SRC/ctrevc3.f create mode 100644 dspl/liblapack/SRC/ctrexc.f create mode 100644 dspl/liblapack/SRC/ctrrfs.f create mode 100644 dspl/liblapack/SRC/ctrsen.f create mode 100644 dspl/liblapack/SRC/ctrsna.f create mode 100644 dspl/liblapack/SRC/ctrsyl.f create mode 100644 dspl/liblapack/SRC/ctrti2.f create mode 100644 dspl/liblapack/SRC/ctrtri.f create mode 100644 dspl/liblapack/SRC/ctrtrs.f create mode 100644 dspl/liblapack/SRC/ctrttf.f create mode 100644 dspl/liblapack/SRC/ctrttp.f create mode 100644 dspl/liblapack/SRC/ctzrzf.f create mode 100644 dspl/liblapack/SRC/cunbdb.f create mode 100644 dspl/liblapack/SRC/cunbdb1.f create mode 100644 dspl/liblapack/SRC/cunbdb2.f create mode 100644 dspl/liblapack/SRC/cunbdb3.f create mode 100644 dspl/liblapack/SRC/cunbdb4.f create mode 100644 dspl/liblapack/SRC/cunbdb5.f create mode 100644 dspl/liblapack/SRC/cunbdb6.f create mode 100644 dspl/liblapack/SRC/cuncsd.f create mode 100644 dspl/liblapack/SRC/cuncsd2by1.f create mode 100644 dspl/liblapack/SRC/cung2l.f create mode 100644 dspl/liblapack/SRC/cung2r.f create mode 100644 dspl/liblapack/SRC/cungbr.f create mode 100644 dspl/liblapack/SRC/cunghr.f create mode 100644 dspl/liblapack/SRC/cungl2.f create mode 100644 dspl/liblapack/SRC/cunglq.f create mode 100644 dspl/liblapack/SRC/cungql.f create mode 100644 dspl/liblapack/SRC/cungqr.f create mode 100644 dspl/liblapack/SRC/cungr2.f create mode 100644 dspl/liblapack/SRC/cungrq.f create mode 100644 dspl/liblapack/SRC/cungtr.f create mode 100644 dspl/liblapack/SRC/cunm22.f create mode 100644 dspl/liblapack/SRC/cunm2l.f create mode 100644 dspl/liblapack/SRC/cunm2r.f create mode 100644 dspl/liblapack/SRC/cunmbr.f create mode 100644 dspl/liblapack/SRC/cunmhr.f create mode 100644 dspl/liblapack/SRC/cunml2.f create mode 100644 dspl/liblapack/SRC/cunmlq.f create mode 100644 dspl/liblapack/SRC/cunmql.f create mode 100644 dspl/liblapack/SRC/cunmqr.f create mode 100644 dspl/liblapack/SRC/cunmr2.f create mode 100644 dspl/liblapack/SRC/cunmr3.f create mode 100644 dspl/liblapack/SRC/cunmrq.f create mode 100644 dspl/liblapack/SRC/cunmrz.f create mode 100644 dspl/liblapack/SRC/cunmtr.f create mode 100644 dspl/liblapack/SRC/cupgtr.f create mode 100644 dspl/liblapack/SRC/cupmtr.f create mode 100644 dspl/liblapack/SRC/dbbcsd.f create mode 100644 dspl/liblapack/SRC/dbdsdc.f create mode 100644 dspl/liblapack/SRC/dbdsqr.f create mode 100644 dspl/liblapack/SRC/dbdsvdx.f create mode 100644 dspl/liblapack/SRC/ddisna.f create mode 100644 dspl/liblapack/SRC/dgbbrd.f create mode 100644 dspl/liblapack/SRC/dgbcon.f create mode 100644 dspl/liblapack/SRC/dgbequ.f create mode 100644 dspl/liblapack/SRC/dgbequb.f create mode 100644 dspl/liblapack/SRC/dgbrfs.f create mode 100644 dspl/liblapack/SRC/dgbrfsx.f create mode 100644 dspl/liblapack/SRC/dgbsv.f create mode 100644 dspl/liblapack/SRC/dgbsvx.f create mode 100644 dspl/liblapack/SRC/dgbsvxx.f create mode 100644 dspl/liblapack/SRC/dgbtf2.f create mode 100644 dspl/liblapack/SRC/dgbtrf.f create mode 100644 dspl/liblapack/SRC/dgbtrs.f create mode 100644 dspl/liblapack/SRC/dgebak.f create mode 100644 dspl/liblapack/SRC/dgebal.f create mode 100644 dspl/liblapack/SRC/dgebd2.f create mode 100644 dspl/liblapack/SRC/dgebrd.f create mode 100644 dspl/liblapack/SRC/dgecon.f create mode 100644 dspl/liblapack/SRC/dgeequ.f create mode 100644 dspl/liblapack/SRC/dgeequb.f create mode 100644 dspl/liblapack/SRC/dgees.f create mode 100644 dspl/liblapack/SRC/dgeesx.f create mode 100644 dspl/liblapack/SRC/dgeev.f create mode 100644 dspl/liblapack/SRC/dgeevx.f create mode 100644 dspl/liblapack/SRC/dgehd2.f create mode 100644 dspl/liblapack/SRC/dgehrd.f create mode 100644 dspl/liblapack/SRC/dgejsv.f create mode 100644 dspl/liblapack/SRC/dgelq.f create mode 100644 dspl/liblapack/SRC/dgelq2.f create mode 100644 dspl/liblapack/SRC/dgelqf.f create mode 100644 dspl/liblapack/SRC/dgelqt.f create mode 100644 dspl/liblapack/SRC/dgelqt3.f create mode 100644 dspl/liblapack/SRC/dgels.f create mode 100644 dspl/liblapack/SRC/dgelsd.f create mode 100644 dspl/liblapack/SRC/dgelss.f create mode 100644 dspl/liblapack/SRC/dgelsy.f create mode 100644 dspl/liblapack/SRC/dgemlq.f create mode 100644 dspl/liblapack/SRC/dgemlqt.f create mode 100644 dspl/liblapack/SRC/dgemqr.f create mode 100644 dspl/liblapack/SRC/dgemqrt.f create mode 100644 dspl/liblapack/SRC/dgeql2.f create mode 100644 dspl/liblapack/SRC/dgeqlf.f create mode 100644 dspl/liblapack/SRC/dgeqp3.f create mode 100644 dspl/liblapack/SRC/dgeqr.f create mode 100644 dspl/liblapack/SRC/dgeqr2.f create mode 100644 dspl/liblapack/SRC/dgeqr2p.f create mode 100644 dspl/liblapack/SRC/dgeqrf.f create mode 100644 dspl/liblapack/SRC/dgeqrfp.f create mode 100644 dspl/liblapack/SRC/dgeqrt.f create mode 100644 dspl/liblapack/SRC/dgeqrt2.f create mode 100644 dspl/liblapack/SRC/dgeqrt3.f create mode 100644 dspl/liblapack/SRC/dgerfs.f create mode 100644 dspl/liblapack/SRC/dgerfsx.f create mode 100644 dspl/liblapack/SRC/dgerq2.f create mode 100644 dspl/liblapack/SRC/dgerqf.f create mode 100644 dspl/liblapack/SRC/dgesc2.f create mode 100644 dspl/liblapack/SRC/dgesdd.f create mode 100644 dspl/liblapack/SRC/dgesv.f create mode 100644 dspl/liblapack/SRC/dgesvd.f create mode 100644 dspl/liblapack/SRC/dgesvdx.f create mode 100644 dspl/liblapack/SRC/dgesvj.f create mode 100644 dspl/liblapack/SRC/dgesvx.f create mode 100644 dspl/liblapack/SRC/dgesvxx.f create mode 100644 dspl/liblapack/SRC/dgetc2.f create mode 100644 dspl/liblapack/SRC/dgetf2.f create mode 100644 dspl/liblapack/SRC/dgetrf.f create mode 100644 dspl/liblapack/SRC/dgetrf2.f create mode 100644 dspl/liblapack/SRC/dgetri.f create mode 100644 dspl/liblapack/SRC/dgetrs.f create mode 100644 dspl/liblapack/SRC/dgetsls.f create mode 100644 dspl/liblapack/SRC/dggbak.f create mode 100644 dspl/liblapack/SRC/dggbal.f create mode 100644 dspl/liblapack/SRC/dgges.f create mode 100644 dspl/liblapack/SRC/dgges3.f create mode 100644 dspl/liblapack/SRC/dggesx.f create mode 100644 dspl/liblapack/SRC/dggev.f create mode 100644 dspl/liblapack/SRC/dggev3.f create mode 100644 dspl/liblapack/SRC/dggevx.f create mode 100644 dspl/liblapack/SRC/dggglm.f create mode 100644 dspl/liblapack/SRC/dgghd3.f create mode 100644 dspl/liblapack/SRC/dgghrd.f create mode 100644 dspl/liblapack/SRC/dgglse.f create mode 100644 dspl/liblapack/SRC/dggqrf.f create mode 100644 dspl/liblapack/SRC/dggrqf.f create mode 100644 dspl/liblapack/SRC/dggsvd3.f create mode 100644 dspl/liblapack/SRC/dggsvp3.f create mode 100644 dspl/liblapack/SRC/dgsvj0.f create mode 100644 dspl/liblapack/SRC/dgsvj1.f create mode 100644 dspl/liblapack/SRC/dgtcon.f create mode 100644 dspl/liblapack/SRC/dgtrfs.f create mode 100644 dspl/liblapack/SRC/dgtsv.f create mode 100644 dspl/liblapack/SRC/dgtsvx.f create mode 100644 dspl/liblapack/SRC/dgttrf.f create mode 100644 dspl/liblapack/SRC/dgttrs.f create mode 100644 dspl/liblapack/SRC/dgtts2.f create mode 100644 dspl/liblapack/SRC/dhgeqz.f create mode 100644 dspl/liblapack/SRC/dhsein.f create mode 100644 dspl/liblapack/SRC/dhseqr.f create mode 100644 dspl/liblapack/SRC/disnan.f create mode 100644 dspl/liblapack/SRC/dla_gbamv.f create mode 100644 dspl/liblapack/SRC/dla_gbrcond.f create mode 100644 dspl/liblapack/SRC/dla_gbrfsx_extended.f create mode 100644 dspl/liblapack/SRC/dla_gbrpvgrw.f create mode 100644 dspl/liblapack/SRC/dla_geamv.f create mode 100644 dspl/liblapack/SRC/dla_gercond.f create mode 100644 dspl/liblapack/SRC/dla_gerfsx_extended.f create mode 100644 dspl/liblapack/SRC/dla_gerpvgrw.f create mode 100644 dspl/liblapack/SRC/dla_lin_berr.f create mode 100644 dspl/liblapack/SRC/dla_porcond.f create mode 100644 dspl/liblapack/SRC/dla_porfsx_extended.f create mode 100644 dspl/liblapack/SRC/dla_porpvgrw.f create mode 100644 dspl/liblapack/SRC/dla_syamv.f create mode 100644 dspl/liblapack/SRC/dla_syrcond.f create mode 100644 dspl/liblapack/SRC/dla_syrfsx_extended.f create mode 100644 dspl/liblapack/SRC/dla_syrpvgrw.f create mode 100644 dspl/liblapack/SRC/dla_wwaddw.f create mode 100644 dspl/liblapack/SRC/dlabad.f create mode 100644 dspl/liblapack/SRC/dlabrd.f create mode 100644 dspl/liblapack/SRC/dlacn2.f create mode 100644 dspl/liblapack/SRC/dlacon.f create mode 100644 dspl/liblapack/SRC/dlacpy.f create mode 100644 dspl/liblapack/SRC/dladiv.f create mode 100644 dspl/liblapack/SRC/dlae2.f create mode 100644 dspl/liblapack/SRC/dlaebz.f create mode 100644 dspl/liblapack/SRC/dlaed0.f create mode 100644 dspl/liblapack/SRC/dlaed1.f create mode 100644 dspl/liblapack/SRC/dlaed2.f create mode 100644 dspl/liblapack/SRC/dlaed3.f create mode 100644 dspl/liblapack/SRC/dlaed4.f create mode 100644 dspl/liblapack/SRC/dlaed5.f create mode 100644 dspl/liblapack/SRC/dlaed6.f create mode 100644 dspl/liblapack/SRC/dlaed7.f create mode 100644 dspl/liblapack/SRC/dlaed8.f create mode 100644 dspl/liblapack/SRC/dlaed9.f create mode 100644 dspl/liblapack/SRC/dlaeda.f create mode 100644 dspl/liblapack/SRC/dlaein.f create mode 100644 dspl/liblapack/SRC/dlaev2.f create mode 100644 dspl/liblapack/SRC/dlaexc.f create mode 100644 dspl/liblapack/SRC/dlag2.f create mode 100644 dspl/liblapack/SRC/dlag2s.f create mode 100644 dspl/liblapack/SRC/dlags2.f create mode 100644 dspl/liblapack/SRC/dlagtf.f create mode 100644 dspl/liblapack/SRC/dlagtm.f create mode 100644 dspl/liblapack/SRC/dlagts.f create mode 100644 dspl/liblapack/SRC/dlagv2.f create mode 100644 dspl/liblapack/SRC/dlahqr.f create mode 100644 dspl/liblapack/SRC/dlahr2.f create mode 100644 dspl/liblapack/SRC/dlaic1.f create mode 100644 dspl/liblapack/SRC/dlaisnan.f create mode 100644 dspl/liblapack/SRC/dlaln2.f create mode 100644 dspl/liblapack/SRC/dlals0.f create mode 100644 dspl/liblapack/SRC/dlalsa.f create mode 100644 dspl/liblapack/SRC/dlalsd.f create mode 100644 dspl/liblapack/SRC/dlamrg.f create mode 100644 dspl/liblapack/SRC/dlamswlq.f create mode 100644 dspl/liblapack/SRC/dlamtsqr.f create mode 100644 dspl/liblapack/SRC/dlaneg.f create mode 100644 dspl/liblapack/SRC/dlangb.f create mode 100644 dspl/liblapack/SRC/dlange.f create mode 100644 dspl/liblapack/SRC/dlangt.f create mode 100644 dspl/liblapack/SRC/dlanhs.f create mode 100644 dspl/liblapack/SRC/dlansb.f create mode 100644 dspl/liblapack/SRC/dlansf.f create mode 100644 dspl/liblapack/SRC/dlansp.f create mode 100644 dspl/liblapack/SRC/dlanst.f create mode 100644 dspl/liblapack/SRC/dlansy.f create mode 100644 dspl/liblapack/SRC/dlantb.f create mode 100644 dspl/liblapack/SRC/dlantp.f create mode 100644 dspl/liblapack/SRC/dlantr.f create mode 100644 dspl/liblapack/SRC/dlanv2.f create mode 100644 dspl/liblapack/SRC/dlapll.f create mode 100644 dspl/liblapack/SRC/dlapmr.f create mode 100644 dspl/liblapack/SRC/dlapmt.f create mode 100644 dspl/liblapack/SRC/dlapy2.f create mode 100644 dspl/liblapack/SRC/dlapy3.f create mode 100644 dspl/liblapack/SRC/dlaqgb.f create mode 100644 dspl/liblapack/SRC/dlaqge.f create mode 100644 dspl/liblapack/SRC/dlaqp2.f create mode 100644 dspl/liblapack/SRC/dlaqps.f create mode 100644 dspl/liblapack/SRC/dlaqr0.f create mode 100644 dspl/liblapack/SRC/dlaqr1.f create mode 100644 dspl/liblapack/SRC/dlaqr2.f create mode 100644 dspl/liblapack/SRC/dlaqr3.f create mode 100644 dspl/liblapack/SRC/dlaqr4.f create mode 100644 dspl/liblapack/SRC/dlaqr5.f create mode 100644 dspl/liblapack/SRC/dlaqsb.f create mode 100644 dspl/liblapack/SRC/dlaqsp.f create mode 100644 dspl/liblapack/SRC/dlaqsy.f create mode 100644 dspl/liblapack/SRC/dlaqtr.f create mode 100644 dspl/liblapack/SRC/dlar1v.f create mode 100644 dspl/liblapack/SRC/dlar2v.f create mode 100644 dspl/liblapack/SRC/dlarf.f create mode 100644 dspl/liblapack/SRC/dlarfb.f create mode 100644 dspl/liblapack/SRC/dlarfg.f create mode 100644 dspl/liblapack/SRC/dlarfgp.f create mode 100644 dspl/liblapack/SRC/dlarft.f create mode 100644 dspl/liblapack/SRC/dlarfx.f create mode 100644 dspl/liblapack/SRC/dlarfy.f create mode 100644 dspl/liblapack/SRC/dlargv.f create mode 100644 dspl/liblapack/SRC/dlarnv.f create mode 100644 dspl/liblapack/SRC/dlarra.f create mode 100644 dspl/liblapack/SRC/dlarrb.f create mode 100644 dspl/liblapack/SRC/dlarrc.f create mode 100644 dspl/liblapack/SRC/dlarrd.f create mode 100644 dspl/liblapack/SRC/dlarre.f create mode 100644 dspl/liblapack/SRC/dlarrf.f create mode 100644 dspl/liblapack/SRC/dlarrj.f create mode 100644 dspl/liblapack/SRC/dlarrk.f create mode 100644 dspl/liblapack/SRC/dlarrr.f create mode 100644 dspl/liblapack/SRC/dlarrv.f create mode 100644 dspl/liblapack/SRC/dlarscl2.f create mode 100644 dspl/liblapack/SRC/dlartg.f create mode 100644 dspl/liblapack/SRC/dlartgp.f create mode 100644 dspl/liblapack/SRC/dlartgs.f create mode 100644 dspl/liblapack/SRC/dlartv.f create mode 100644 dspl/liblapack/SRC/dlaruv.f create mode 100644 dspl/liblapack/SRC/dlarz.f create mode 100644 dspl/liblapack/SRC/dlarzb.f create mode 100644 dspl/liblapack/SRC/dlarzt.f create mode 100644 dspl/liblapack/SRC/dlas2.f create mode 100644 dspl/liblapack/SRC/dlascl.f create mode 100644 dspl/liblapack/SRC/dlascl2.f create mode 100644 dspl/liblapack/SRC/dlasd0.f create mode 100644 dspl/liblapack/SRC/dlasd1.f create mode 100644 dspl/liblapack/SRC/dlasd2.f create mode 100644 dspl/liblapack/SRC/dlasd3.f create mode 100644 dspl/liblapack/SRC/dlasd4.f create mode 100644 dspl/liblapack/SRC/dlasd5.f create mode 100644 dspl/liblapack/SRC/dlasd6.f create mode 100644 dspl/liblapack/SRC/dlasd7.f create mode 100644 dspl/liblapack/SRC/dlasd8.f create mode 100644 dspl/liblapack/SRC/dlasda.f create mode 100644 dspl/liblapack/SRC/dlasdq.f create mode 100644 dspl/liblapack/SRC/dlasdt.f create mode 100644 dspl/liblapack/SRC/dlaset.f create mode 100644 dspl/liblapack/SRC/dlasq1.f create mode 100644 dspl/liblapack/SRC/dlasq2.f create mode 100644 dspl/liblapack/SRC/dlasq3.f create mode 100644 dspl/liblapack/SRC/dlasq4.f create mode 100644 dspl/liblapack/SRC/dlasq5.f create mode 100644 dspl/liblapack/SRC/dlasq6.f create mode 100644 dspl/liblapack/SRC/dlasr.f create mode 100644 dspl/liblapack/SRC/dlasrt.f create mode 100644 dspl/liblapack/SRC/dlassq.f create mode 100644 dspl/liblapack/SRC/dlasv2.f create mode 100644 dspl/liblapack/SRC/dlaswlq.f create mode 100644 dspl/liblapack/SRC/dlaswp.f create mode 100644 dspl/liblapack/SRC/dlasy2.f create mode 100644 dspl/liblapack/SRC/dlasyf.f create mode 100644 dspl/liblapack/SRC/dlasyf_aa.f create mode 100644 dspl/liblapack/SRC/dlasyf_rk.f create mode 100644 dspl/liblapack/SRC/dlasyf_rook.f create mode 100644 dspl/liblapack/SRC/dlat2s.f create mode 100644 dspl/liblapack/SRC/dlatbs.f create mode 100644 dspl/liblapack/SRC/dlatdf.f create mode 100644 dspl/liblapack/SRC/dlatps.f create mode 100644 dspl/liblapack/SRC/dlatrd.f create mode 100644 dspl/liblapack/SRC/dlatrs.f create mode 100644 dspl/liblapack/SRC/dlatrz.f create mode 100644 dspl/liblapack/SRC/dlatsqr.f create mode 100644 dspl/liblapack/SRC/dlauu2.f create mode 100644 dspl/liblapack/SRC/dlauum.f create mode 100644 dspl/liblapack/SRC/dopgtr.f create mode 100644 dspl/liblapack/SRC/dopmtr.f create mode 100644 dspl/liblapack/SRC/dorbdb.f create mode 100644 dspl/liblapack/SRC/dorbdb1.f create mode 100644 dspl/liblapack/SRC/dorbdb2.f create mode 100644 dspl/liblapack/SRC/dorbdb3.f create mode 100644 dspl/liblapack/SRC/dorbdb4.f create mode 100644 dspl/liblapack/SRC/dorbdb5.f create mode 100644 dspl/liblapack/SRC/dorbdb6.f create mode 100644 dspl/liblapack/SRC/dorcsd.f create mode 100644 dspl/liblapack/SRC/dorcsd2by1.f create mode 100644 dspl/liblapack/SRC/dorg2l.f create mode 100644 dspl/liblapack/SRC/dorg2r.f create mode 100644 dspl/liblapack/SRC/dorgbr.f create mode 100644 dspl/liblapack/SRC/dorghr.f create mode 100644 dspl/liblapack/SRC/dorgl2.f create mode 100644 dspl/liblapack/SRC/dorglq.f create mode 100644 dspl/liblapack/SRC/dorgql.f create mode 100644 dspl/liblapack/SRC/dorgqr.f create mode 100644 dspl/liblapack/SRC/dorgr2.f create mode 100644 dspl/liblapack/SRC/dorgrq.f create mode 100644 dspl/liblapack/SRC/dorgtr.f create mode 100644 dspl/liblapack/SRC/dorm22.f create mode 100644 dspl/liblapack/SRC/dorm2l.f create mode 100644 dspl/liblapack/SRC/dorm2r.f create mode 100644 dspl/liblapack/SRC/dormbr.f create mode 100644 dspl/liblapack/SRC/dormhr.f create mode 100644 dspl/liblapack/SRC/dorml2.f create mode 100644 dspl/liblapack/SRC/dormlq.f create mode 100644 dspl/liblapack/SRC/dormql.f create mode 100644 dspl/liblapack/SRC/dormqr.f create mode 100644 dspl/liblapack/SRC/dormr2.f create mode 100644 dspl/liblapack/SRC/dormr3.f create mode 100644 dspl/liblapack/SRC/dormrq.f create mode 100644 dspl/liblapack/SRC/dormrz.f create mode 100644 dspl/liblapack/SRC/dormtr.f create mode 100644 dspl/liblapack/SRC/dpbcon.f create mode 100644 dspl/liblapack/SRC/dpbequ.f create mode 100644 dspl/liblapack/SRC/dpbrfs.f create mode 100644 dspl/liblapack/SRC/dpbstf.f create mode 100644 dspl/liblapack/SRC/dpbsv.f create mode 100644 dspl/liblapack/SRC/dpbsvx.f create mode 100644 dspl/liblapack/SRC/dpbtf2.f create mode 100644 dspl/liblapack/SRC/dpbtrf.f create mode 100644 dspl/liblapack/SRC/dpbtrs.f create mode 100644 dspl/liblapack/SRC/dpftrf.f create mode 100644 dspl/liblapack/SRC/dpftri.f create mode 100644 dspl/liblapack/SRC/dpftrs.f create mode 100644 dspl/liblapack/SRC/dpocon.f create mode 100644 dspl/liblapack/SRC/dpoequ.f create mode 100644 dspl/liblapack/SRC/dpoequb.f create mode 100644 dspl/liblapack/SRC/dporfs.f create mode 100644 dspl/liblapack/SRC/dporfsx.f create mode 100644 dspl/liblapack/SRC/dposv.f create mode 100644 dspl/liblapack/SRC/dposvx.f create mode 100644 dspl/liblapack/SRC/dposvxx.f create mode 100644 dspl/liblapack/SRC/dpotf2.f create mode 100644 dspl/liblapack/SRC/dpotrf.f create mode 100644 dspl/liblapack/SRC/dpotrf2.f create mode 100644 dspl/liblapack/SRC/dpotri.f create mode 100644 dspl/liblapack/SRC/dpotrs.f create mode 100644 dspl/liblapack/SRC/dppcon.f create mode 100644 dspl/liblapack/SRC/dppequ.f create mode 100644 dspl/liblapack/SRC/dpprfs.f create mode 100644 dspl/liblapack/SRC/dppsv.f create mode 100644 dspl/liblapack/SRC/dppsvx.f create mode 100644 dspl/liblapack/SRC/dpptrf.f create mode 100644 dspl/liblapack/SRC/dpptri.f create mode 100644 dspl/liblapack/SRC/dpptrs.f create mode 100644 dspl/liblapack/SRC/dpstf2.f create mode 100644 dspl/liblapack/SRC/dpstrf.f create mode 100644 dspl/liblapack/SRC/dptcon.f create mode 100644 dspl/liblapack/SRC/dpteqr.f create mode 100644 dspl/liblapack/SRC/dptrfs.f create mode 100644 dspl/liblapack/SRC/dptsv.f create mode 100644 dspl/liblapack/SRC/dptsvx.f create mode 100644 dspl/liblapack/SRC/dpttrf.f create mode 100644 dspl/liblapack/SRC/dpttrs.f create mode 100644 dspl/liblapack/SRC/dptts2.f create mode 100644 dspl/liblapack/SRC/drscl.f create mode 100644 dspl/liblapack/SRC/dsb2st_kernels.f create mode 100644 dspl/liblapack/SRC/dsbev.f create mode 100644 dspl/liblapack/SRC/dsbev_2stage.f create mode 100644 dspl/liblapack/SRC/dsbevd.f create mode 100644 dspl/liblapack/SRC/dsbevd_2stage.f create mode 100644 dspl/liblapack/SRC/dsbevx.f create mode 100644 dspl/liblapack/SRC/dsbevx_2stage.f create mode 100644 dspl/liblapack/SRC/dsbgst.f create mode 100644 dspl/liblapack/SRC/dsbgv.f create mode 100644 dspl/liblapack/SRC/dsbgvd.f create mode 100644 dspl/liblapack/SRC/dsbgvx.f create mode 100644 dspl/liblapack/SRC/dsbtrd.f create mode 100644 dspl/liblapack/SRC/dsfrk.f create mode 100644 dspl/liblapack/SRC/dsgesv.f create mode 100644 dspl/liblapack/SRC/dspcon.f create mode 100644 dspl/liblapack/SRC/dspev.f create mode 100644 dspl/liblapack/SRC/dspevd.f create mode 100644 dspl/liblapack/SRC/dspevx.f create mode 100644 dspl/liblapack/SRC/dspgst.f create mode 100644 dspl/liblapack/SRC/dspgv.f create mode 100644 dspl/liblapack/SRC/dspgvd.f create mode 100644 dspl/liblapack/SRC/dspgvx.f create mode 100644 dspl/liblapack/SRC/dsposv.f create mode 100644 dspl/liblapack/SRC/dsprfs.f create mode 100644 dspl/liblapack/SRC/dspsv.f create mode 100644 dspl/liblapack/SRC/dspsvx.f create mode 100644 dspl/liblapack/SRC/dsptrd.f create mode 100644 dspl/liblapack/SRC/dsptrf.f create mode 100644 dspl/liblapack/SRC/dsptri.f create mode 100644 dspl/liblapack/SRC/dsptrs.f create mode 100644 dspl/liblapack/SRC/dstebz.f create mode 100644 dspl/liblapack/SRC/dstedc.f create mode 100644 dspl/liblapack/SRC/dstegr.f create mode 100644 dspl/liblapack/SRC/dstein.f create mode 100644 dspl/liblapack/SRC/dstemr.f create mode 100644 dspl/liblapack/SRC/dsteqr.f create mode 100644 dspl/liblapack/SRC/dsterf.f create mode 100644 dspl/liblapack/SRC/dstev.f create mode 100644 dspl/liblapack/SRC/dstevd.f create mode 100644 dspl/liblapack/SRC/dstevr.f create mode 100644 dspl/liblapack/SRC/dstevx.f create mode 100644 dspl/liblapack/SRC/dsycon.f create mode 100644 dspl/liblapack/SRC/dsycon_3.f create mode 100644 dspl/liblapack/SRC/dsycon_rook.f create mode 100644 dspl/liblapack/SRC/dsyconv.f create mode 100644 dspl/liblapack/SRC/dsyconvf.f create mode 100644 dspl/liblapack/SRC/dsyconvf_rook.f create mode 100644 dspl/liblapack/SRC/dsyequb.f create mode 100644 dspl/liblapack/SRC/dsyev.f create mode 100644 dspl/liblapack/SRC/dsyev_2stage.f create mode 100644 dspl/liblapack/SRC/dsyevd.f create mode 100644 dspl/liblapack/SRC/dsyevd_2stage.f create mode 100644 dspl/liblapack/SRC/dsyevr.f create mode 100644 dspl/liblapack/SRC/dsyevr_2stage.f create mode 100644 dspl/liblapack/SRC/dsyevx.f create mode 100644 dspl/liblapack/SRC/dsyevx_2stage.f create mode 100644 dspl/liblapack/SRC/dsygs2.f create mode 100644 dspl/liblapack/SRC/dsygst.f create mode 100644 dspl/liblapack/SRC/dsygv.f create mode 100644 dspl/liblapack/SRC/dsygv_2stage.f create mode 100644 dspl/liblapack/SRC/dsygvd.f create mode 100644 dspl/liblapack/SRC/dsygvx.f create mode 100644 dspl/liblapack/SRC/dsyrfs.f create mode 100644 dspl/liblapack/SRC/dsyrfsx.f create mode 100644 dspl/liblapack/SRC/dsysv.f create mode 100644 dspl/liblapack/SRC/dsysv_aa.f create mode 100644 dspl/liblapack/SRC/dsysv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/dsysv_rk.f create mode 100644 dspl/liblapack/SRC/dsysv_rook.f create mode 100644 dspl/liblapack/SRC/dsysvx.f create mode 100644 dspl/liblapack/SRC/dsysvxx.f create mode 100644 dspl/liblapack/SRC/dsyswapr.f create mode 100644 dspl/liblapack/SRC/dsytd2.f create mode 100644 dspl/liblapack/SRC/dsytf2.f create mode 100644 dspl/liblapack/SRC/dsytf2_rk.f create mode 100644 dspl/liblapack/SRC/dsytf2_rook.f create mode 100644 dspl/liblapack/SRC/dsytrd.f create mode 100644 dspl/liblapack/SRC/dsytrd_2stage.f create mode 100644 dspl/liblapack/SRC/dsytrd_sb2st.F create mode 100644 dspl/liblapack/SRC/dsytrd_sy2sb.f create mode 100644 dspl/liblapack/SRC/dsytrf.f create mode 100644 dspl/liblapack/SRC/dsytrf_aa.f create mode 100644 dspl/liblapack/SRC/dsytrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/dsytrf_rk.f create mode 100644 dspl/liblapack/SRC/dsytrf_rook.f create mode 100644 dspl/liblapack/SRC/dsytri.f create mode 100644 dspl/liblapack/SRC/dsytri2.f create mode 100644 dspl/liblapack/SRC/dsytri2x.f create mode 100644 dspl/liblapack/SRC/dsytri_3.f create mode 100644 dspl/liblapack/SRC/dsytri_3x.f create mode 100644 dspl/liblapack/SRC/dsytri_rook.f create mode 100644 dspl/liblapack/SRC/dsytrs.f create mode 100644 dspl/liblapack/SRC/dsytrs2.f create mode 100644 dspl/liblapack/SRC/dsytrs_3.f create mode 100644 dspl/liblapack/SRC/dsytrs_aa.f create mode 100644 dspl/liblapack/SRC/dsytrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/dsytrs_rook.f create mode 100644 dspl/liblapack/SRC/dtbcon.f create mode 100644 dspl/liblapack/SRC/dtbrfs.f create mode 100644 dspl/liblapack/SRC/dtbtrs.f create mode 100644 dspl/liblapack/SRC/dtfsm.f create mode 100644 dspl/liblapack/SRC/dtftri.f create mode 100644 dspl/liblapack/SRC/dtfttp.f create mode 100644 dspl/liblapack/SRC/dtfttr.f create mode 100644 dspl/liblapack/SRC/dtgevc.f create mode 100644 dspl/liblapack/SRC/dtgex2.f create mode 100644 dspl/liblapack/SRC/dtgexc.f create mode 100644 dspl/liblapack/SRC/dtgsen.f create mode 100644 dspl/liblapack/SRC/dtgsja.f create mode 100644 dspl/liblapack/SRC/dtgsna.f create mode 100644 dspl/liblapack/SRC/dtgsy2.f create mode 100644 dspl/liblapack/SRC/dtgsyl.f create mode 100644 dspl/liblapack/SRC/dtpcon.f create mode 100644 dspl/liblapack/SRC/dtplqt.f create mode 100644 dspl/liblapack/SRC/dtplqt2.f create mode 100644 dspl/liblapack/SRC/dtpmlqt.f create mode 100644 dspl/liblapack/SRC/dtpmqrt.f create mode 100644 dspl/liblapack/SRC/dtpqrt.f create mode 100644 dspl/liblapack/SRC/dtpqrt2.f create mode 100644 dspl/liblapack/SRC/dtprfb.f create mode 100644 dspl/liblapack/SRC/dtprfs.f create mode 100644 dspl/liblapack/SRC/dtptri.f create mode 100644 dspl/liblapack/SRC/dtptrs.f create mode 100644 dspl/liblapack/SRC/dtpttf.f create mode 100644 dspl/liblapack/SRC/dtpttr.f create mode 100644 dspl/liblapack/SRC/dtrcon.f create mode 100644 dspl/liblapack/SRC/dtrevc.f create mode 100644 dspl/liblapack/SRC/dtrevc3.f create mode 100644 dspl/liblapack/SRC/dtrexc.f create mode 100644 dspl/liblapack/SRC/dtrrfs.f create mode 100644 dspl/liblapack/SRC/dtrsen.f create mode 100644 dspl/liblapack/SRC/dtrsna.f create mode 100644 dspl/liblapack/SRC/dtrsyl.f create mode 100644 dspl/liblapack/SRC/dtrti2.f create mode 100644 dspl/liblapack/SRC/dtrtri.f create mode 100644 dspl/liblapack/SRC/dtrtrs.f create mode 100644 dspl/liblapack/SRC/dtrttf.f create mode 100644 dspl/liblapack/SRC/dtrttp.f create mode 100644 dspl/liblapack/SRC/dtzrzf.f create mode 100644 dspl/liblapack/SRC/dzsum1.f create mode 100644 dspl/liblapack/SRC/icmax1.f create mode 100644 dspl/liblapack/SRC/ieeeck.f create mode 100644 dspl/liblapack/SRC/ilaclc.f create mode 100644 dspl/liblapack/SRC/ilaclr.f create mode 100644 dspl/liblapack/SRC/iladiag.f create mode 100644 dspl/liblapack/SRC/iladlc.f create mode 100644 dspl/liblapack/SRC/iladlr.f create mode 100644 dspl/liblapack/SRC/ilaenv.f create mode 100644 dspl/liblapack/SRC/ilaenv2stage.f create mode 100644 dspl/liblapack/SRC/ilaprec.f create mode 100644 dspl/liblapack/SRC/ilaslc.f create mode 100644 dspl/liblapack/SRC/ilaslr.f create mode 100644 dspl/liblapack/SRC/ilatrans.f create mode 100644 dspl/liblapack/SRC/ilauplo.f create mode 100644 dspl/liblapack/SRC/ilazlc.f create mode 100644 dspl/liblapack/SRC/ilazlr.f create mode 100644 dspl/liblapack/SRC/iparam2stage.F create mode 100644 dspl/liblapack/SRC/iparmq.f create mode 100644 dspl/liblapack/SRC/izmax1.f create mode 100644 dspl/liblapack/SRC/lsamen.f create mode 100644 dspl/liblapack/SRC/sbbcsd.f create mode 100644 dspl/liblapack/SRC/sbdsdc.f create mode 100644 dspl/liblapack/SRC/sbdsqr.f create mode 100644 dspl/liblapack/SRC/sbdsvdx.f create mode 100644 dspl/liblapack/SRC/scsum1.f create mode 100644 dspl/liblapack/SRC/sdisna.f create mode 100644 dspl/liblapack/SRC/sgbbrd.f create mode 100644 dspl/liblapack/SRC/sgbcon.f create mode 100644 dspl/liblapack/SRC/sgbequ.f create mode 100644 dspl/liblapack/SRC/sgbequb.f create mode 100644 dspl/liblapack/SRC/sgbrfs.f create mode 100644 dspl/liblapack/SRC/sgbrfsx.f create mode 100644 dspl/liblapack/SRC/sgbsv.f create mode 100644 dspl/liblapack/SRC/sgbsvx.f create mode 100644 dspl/liblapack/SRC/sgbsvxx.f create mode 100644 dspl/liblapack/SRC/sgbtf2.f create mode 100644 dspl/liblapack/SRC/sgbtrf.f create mode 100644 dspl/liblapack/SRC/sgbtrs.f create mode 100644 dspl/liblapack/SRC/sgebak.f create mode 100644 dspl/liblapack/SRC/sgebal.f create mode 100644 dspl/liblapack/SRC/sgebd2.f create mode 100644 dspl/liblapack/SRC/sgebrd.f create mode 100644 dspl/liblapack/SRC/sgecon.f create mode 100644 dspl/liblapack/SRC/sgeequ.f create mode 100644 dspl/liblapack/SRC/sgeequb.f create mode 100644 dspl/liblapack/SRC/sgees.f create mode 100644 dspl/liblapack/SRC/sgeesx.f create mode 100644 dspl/liblapack/SRC/sgeev.f create mode 100644 dspl/liblapack/SRC/sgeevx.f create mode 100644 dspl/liblapack/SRC/sgehd2.f create mode 100644 dspl/liblapack/SRC/sgehrd.f create mode 100644 dspl/liblapack/SRC/sgejsv.f create mode 100644 dspl/liblapack/SRC/sgelq.f create mode 100644 dspl/liblapack/SRC/sgelq2.f create mode 100644 dspl/liblapack/SRC/sgelqf.f create mode 100644 dspl/liblapack/SRC/sgelqt.f create mode 100644 dspl/liblapack/SRC/sgelqt3.f create mode 100644 dspl/liblapack/SRC/sgels.f create mode 100644 dspl/liblapack/SRC/sgelsd.f create mode 100644 dspl/liblapack/SRC/sgelss.f create mode 100644 dspl/liblapack/SRC/sgelsy.f create mode 100644 dspl/liblapack/SRC/sgemlq.f create mode 100644 dspl/liblapack/SRC/sgemlqt.f create mode 100644 dspl/liblapack/SRC/sgemqr.f create mode 100644 dspl/liblapack/SRC/sgemqrt.f create mode 100644 dspl/liblapack/SRC/sgeql2.f create mode 100644 dspl/liblapack/SRC/sgeqlf.f create mode 100644 dspl/liblapack/SRC/sgeqp3.f create mode 100644 dspl/liblapack/SRC/sgeqr.f create mode 100644 dspl/liblapack/SRC/sgeqr2.f create mode 100644 dspl/liblapack/SRC/sgeqr2p.f create mode 100644 dspl/liblapack/SRC/sgeqrf.f create mode 100644 dspl/liblapack/SRC/sgeqrfp.f create mode 100644 dspl/liblapack/SRC/sgeqrt.f create mode 100644 dspl/liblapack/SRC/sgeqrt2.f create mode 100644 dspl/liblapack/SRC/sgeqrt3.f create mode 100644 dspl/liblapack/SRC/sgerfs.f create mode 100644 dspl/liblapack/SRC/sgerfsx.f create mode 100644 dspl/liblapack/SRC/sgerq2.f create mode 100644 dspl/liblapack/SRC/sgerqf.f create mode 100644 dspl/liblapack/SRC/sgesc2.f create mode 100644 dspl/liblapack/SRC/sgesdd.f create mode 100644 dspl/liblapack/SRC/sgesv.f create mode 100644 dspl/liblapack/SRC/sgesvd.f create mode 100644 dspl/liblapack/SRC/sgesvdx.f create mode 100644 dspl/liblapack/SRC/sgesvj.f create mode 100644 dspl/liblapack/SRC/sgesvx.f create mode 100644 dspl/liblapack/SRC/sgesvxx.f create mode 100644 dspl/liblapack/SRC/sgetc2.f create mode 100644 dspl/liblapack/SRC/sgetf2.f create mode 100644 dspl/liblapack/SRC/sgetrf.f create mode 100644 dspl/liblapack/SRC/sgetrf2.f create mode 100644 dspl/liblapack/SRC/sgetri.f create mode 100644 dspl/liblapack/SRC/sgetrs.f create mode 100644 dspl/liblapack/SRC/sgetsls.f create mode 100644 dspl/liblapack/SRC/sggbak.f create mode 100644 dspl/liblapack/SRC/sggbal.f create mode 100644 dspl/liblapack/SRC/sgges.f create mode 100644 dspl/liblapack/SRC/sgges3.f create mode 100644 dspl/liblapack/SRC/sggesx.f create mode 100644 dspl/liblapack/SRC/sggev.f create mode 100644 dspl/liblapack/SRC/sggev3.f create mode 100644 dspl/liblapack/SRC/sggevx.f create mode 100644 dspl/liblapack/SRC/sggglm.f create mode 100644 dspl/liblapack/SRC/sgghd3.f create mode 100644 dspl/liblapack/SRC/sgghrd.f create mode 100644 dspl/liblapack/SRC/sgglse.f create mode 100644 dspl/liblapack/SRC/sggqrf.f create mode 100644 dspl/liblapack/SRC/sggrqf.f create mode 100644 dspl/liblapack/SRC/sggsvd3.f create mode 100644 dspl/liblapack/SRC/sggsvp3.f create mode 100644 dspl/liblapack/SRC/sgsvj0.f create mode 100644 dspl/liblapack/SRC/sgsvj1.f create mode 100644 dspl/liblapack/SRC/sgtcon.f create mode 100644 dspl/liblapack/SRC/sgtrfs.f create mode 100644 dspl/liblapack/SRC/sgtsv.f create mode 100644 dspl/liblapack/SRC/sgtsvx.f create mode 100644 dspl/liblapack/SRC/sgttrf.f create mode 100644 dspl/liblapack/SRC/sgttrs.f create mode 100644 dspl/liblapack/SRC/sgtts2.f create mode 100644 dspl/liblapack/SRC/shgeqz.f create mode 100644 dspl/liblapack/SRC/shsein.f create mode 100644 dspl/liblapack/SRC/shseqr.f create mode 100644 dspl/liblapack/SRC/sisnan.f create mode 100644 dspl/liblapack/SRC/sla_gbamv.f create mode 100644 dspl/liblapack/SRC/sla_gbrcond.f create mode 100644 dspl/liblapack/SRC/sla_gbrfsx_extended.f create mode 100644 dspl/liblapack/SRC/sla_gbrpvgrw.f create mode 100644 dspl/liblapack/SRC/sla_geamv.f create mode 100644 dspl/liblapack/SRC/sla_gercond.f create mode 100644 dspl/liblapack/SRC/sla_gerfsx_extended.f create mode 100644 dspl/liblapack/SRC/sla_gerpvgrw.f create mode 100644 dspl/liblapack/SRC/sla_lin_berr.f create mode 100644 dspl/liblapack/SRC/sla_porcond.f create mode 100644 dspl/liblapack/SRC/sla_porfsx_extended.f create mode 100644 dspl/liblapack/SRC/sla_porpvgrw.f create mode 100644 dspl/liblapack/SRC/sla_syamv.f create mode 100644 dspl/liblapack/SRC/sla_syrcond.f create mode 100644 dspl/liblapack/SRC/sla_syrfsx_extended.f create mode 100644 dspl/liblapack/SRC/sla_syrpvgrw.f create mode 100644 dspl/liblapack/SRC/sla_wwaddw.f create mode 100644 dspl/liblapack/SRC/slabad.f create mode 100644 dspl/liblapack/SRC/slabrd.f create mode 100644 dspl/liblapack/SRC/slacn2.f create mode 100644 dspl/liblapack/SRC/slacon.f create mode 100644 dspl/liblapack/SRC/slacpy.f create mode 100644 dspl/liblapack/SRC/sladiv.f create mode 100644 dspl/liblapack/SRC/slae2.f create mode 100644 dspl/liblapack/SRC/slaebz.f create mode 100644 dspl/liblapack/SRC/slaed0.f create mode 100644 dspl/liblapack/SRC/slaed1.f create mode 100644 dspl/liblapack/SRC/slaed2.f create mode 100644 dspl/liblapack/SRC/slaed3.f create mode 100644 dspl/liblapack/SRC/slaed4.f create mode 100644 dspl/liblapack/SRC/slaed5.f create mode 100644 dspl/liblapack/SRC/slaed6.f create mode 100644 dspl/liblapack/SRC/slaed7.f create mode 100644 dspl/liblapack/SRC/slaed8.f create mode 100644 dspl/liblapack/SRC/slaed9.f create mode 100644 dspl/liblapack/SRC/slaeda.f create mode 100644 dspl/liblapack/SRC/slaein.f create mode 100644 dspl/liblapack/SRC/slaev2.f create mode 100644 dspl/liblapack/SRC/slaexc.f create mode 100644 dspl/liblapack/SRC/slag2.f create mode 100644 dspl/liblapack/SRC/slag2d.f create mode 100644 dspl/liblapack/SRC/slags2.f create mode 100644 dspl/liblapack/SRC/slagtf.f create mode 100644 dspl/liblapack/SRC/slagtm.f create mode 100644 dspl/liblapack/SRC/slagts.f create mode 100644 dspl/liblapack/SRC/slagv2.f create mode 100644 dspl/liblapack/SRC/slahqr.f create mode 100644 dspl/liblapack/SRC/slahr2.f create mode 100644 dspl/liblapack/SRC/slaic1.f create mode 100644 dspl/liblapack/SRC/slaisnan.f create mode 100644 dspl/liblapack/SRC/slaln2.f create mode 100644 dspl/liblapack/SRC/slals0.f create mode 100644 dspl/liblapack/SRC/slalsa.f create mode 100644 dspl/liblapack/SRC/slalsd.f create mode 100644 dspl/liblapack/SRC/slamrg.f create mode 100644 dspl/liblapack/SRC/slamswlq.f create mode 100644 dspl/liblapack/SRC/slamtsqr.f create mode 100644 dspl/liblapack/SRC/slaneg.f create mode 100644 dspl/liblapack/SRC/slangb.f create mode 100644 dspl/liblapack/SRC/slange.f create mode 100644 dspl/liblapack/SRC/slangt.f create mode 100644 dspl/liblapack/SRC/slanhs.f create mode 100644 dspl/liblapack/SRC/slansb.f create mode 100644 dspl/liblapack/SRC/slansf.f create mode 100644 dspl/liblapack/SRC/slansp.f create mode 100644 dspl/liblapack/SRC/slanst.f create mode 100644 dspl/liblapack/SRC/slansy.f create mode 100644 dspl/liblapack/SRC/slantb.f create mode 100644 dspl/liblapack/SRC/slantp.f create mode 100644 dspl/liblapack/SRC/slantr.f create mode 100644 dspl/liblapack/SRC/slanv2.f create mode 100644 dspl/liblapack/SRC/slapll.f create mode 100644 dspl/liblapack/SRC/slapmr.f create mode 100644 dspl/liblapack/SRC/slapmt.f create mode 100644 dspl/liblapack/SRC/slapy2.f create mode 100644 dspl/liblapack/SRC/slapy3.f create mode 100644 dspl/liblapack/SRC/slaqgb.f create mode 100644 dspl/liblapack/SRC/slaqge.f create mode 100644 dspl/liblapack/SRC/slaqp2.f create mode 100644 dspl/liblapack/SRC/slaqps.f create mode 100644 dspl/liblapack/SRC/slaqr0.f create mode 100644 dspl/liblapack/SRC/slaqr1.f create mode 100644 dspl/liblapack/SRC/slaqr2.f create mode 100644 dspl/liblapack/SRC/slaqr3.f create mode 100644 dspl/liblapack/SRC/slaqr4.f create mode 100644 dspl/liblapack/SRC/slaqr5.f create mode 100644 dspl/liblapack/SRC/slaqsb.f create mode 100644 dspl/liblapack/SRC/slaqsp.f create mode 100644 dspl/liblapack/SRC/slaqsy.f create mode 100644 dspl/liblapack/SRC/slaqtr.f create mode 100644 dspl/liblapack/SRC/slar1v.f create mode 100644 dspl/liblapack/SRC/slar2v.f create mode 100644 dspl/liblapack/SRC/slarf.f create mode 100644 dspl/liblapack/SRC/slarfb.f create mode 100644 dspl/liblapack/SRC/slarfg.f create mode 100644 dspl/liblapack/SRC/slarfgp.f create mode 100644 dspl/liblapack/SRC/slarft.f create mode 100644 dspl/liblapack/SRC/slarfx.f create mode 100644 dspl/liblapack/SRC/slarfy.f create mode 100644 dspl/liblapack/SRC/slargv.f create mode 100644 dspl/liblapack/SRC/slarnv.f create mode 100644 dspl/liblapack/SRC/slarra.f create mode 100644 dspl/liblapack/SRC/slarrb.f create mode 100644 dspl/liblapack/SRC/slarrc.f create mode 100644 dspl/liblapack/SRC/slarrd.f create mode 100644 dspl/liblapack/SRC/slarre.f create mode 100644 dspl/liblapack/SRC/slarrf.f create mode 100644 dspl/liblapack/SRC/slarrj.f create mode 100644 dspl/liblapack/SRC/slarrk.f create mode 100644 dspl/liblapack/SRC/slarrr.f create mode 100644 dspl/liblapack/SRC/slarrv.f create mode 100644 dspl/liblapack/SRC/slarscl2.f create mode 100644 dspl/liblapack/SRC/slartg.f create mode 100644 dspl/liblapack/SRC/slartgp.f create mode 100644 dspl/liblapack/SRC/slartgs.f create mode 100644 dspl/liblapack/SRC/slartv.f create mode 100644 dspl/liblapack/SRC/slaruv.f create mode 100644 dspl/liblapack/SRC/slarz.f create mode 100644 dspl/liblapack/SRC/slarzb.f create mode 100644 dspl/liblapack/SRC/slarzt.f create mode 100644 dspl/liblapack/SRC/slas2.f create mode 100644 dspl/liblapack/SRC/slascl.f create mode 100644 dspl/liblapack/SRC/slascl2.f create mode 100644 dspl/liblapack/SRC/slasd0.f create mode 100644 dspl/liblapack/SRC/slasd1.f create mode 100644 dspl/liblapack/SRC/slasd2.f create mode 100644 dspl/liblapack/SRC/slasd3.f create mode 100644 dspl/liblapack/SRC/slasd4.f create mode 100644 dspl/liblapack/SRC/slasd5.f create mode 100644 dspl/liblapack/SRC/slasd6.f create mode 100644 dspl/liblapack/SRC/slasd7.f create mode 100644 dspl/liblapack/SRC/slasd8.f create mode 100644 dspl/liblapack/SRC/slasda.f create mode 100644 dspl/liblapack/SRC/slasdq.f create mode 100644 dspl/liblapack/SRC/slasdt.f create mode 100644 dspl/liblapack/SRC/slaset.f create mode 100644 dspl/liblapack/SRC/slasq1.f create mode 100644 dspl/liblapack/SRC/slasq2.f create mode 100644 dspl/liblapack/SRC/slasq3.f create mode 100644 dspl/liblapack/SRC/slasq4.f create mode 100644 dspl/liblapack/SRC/slasq5.f create mode 100644 dspl/liblapack/SRC/slasq6.f create mode 100644 dspl/liblapack/SRC/slasr.f create mode 100644 dspl/liblapack/SRC/slasrt.f create mode 100644 dspl/liblapack/SRC/slassq.f create mode 100644 dspl/liblapack/SRC/slasv2.f create mode 100644 dspl/liblapack/SRC/slaswlq.f create mode 100644 dspl/liblapack/SRC/slaswp.f create mode 100644 dspl/liblapack/SRC/slasy2.f create mode 100644 dspl/liblapack/SRC/slasyf.f create mode 100644 dspl/liblapack/SRC/slasyf_aa.f create mode 100644 dspl/liblapack/SRC/slasyf_rk.f create mode 100644 dspl/liblapack/SRC/slasyf_rook.f create mode 100644 dspl/liblapack/SRC/slatbs.f create mode 100644 dspl/liblapack/SRC/slatdf.f create mode 100644 dspl/liblapack/SRC/slatps.f create mode 100644 dspl/liblapack/SRC/slatrd.f create mode 100644 dspl/liblapack/SRC/slatrs.f create mode 100644 dspl/liblapack/SRC/slatrz.f create mode 100644 dspl/liblapack/SRC/slatsqr.f create mode 100644 dspl/liblapack/SRC/slauu2.f create mode 100644 dspl/liblapack/SRC/slauum.f create mode 100644 dspl/liblapack/SRC/sopgtr.f create mode 100644 dspl/liblapack/SRC/sopmtr.f create mode 100644 dspl/liblapack/SRC/sorbdb.f create mode 100644 dspl/liblapack/SRC/sorbdb1.f create mode 100644 dspl/liblapack/SRC/sorbdb2.f create mode 100644 dspl/liblapack/SRC/sorbdb3.f create mode 100644 dspl/liblapack/SRC/sorbdb4.f create mode 100644 dspl/liblapack/SRC/sorbdb5.f create mode 100644 dspl/liblapack/SRC/sorbdb6.f create mode 100644 dspl/liblapack/SRC/sorcsd.f create mode 100644 dspl/liblapack/SRC/sorcsd2by1.f create mode 100644 dspl/liblapack/SRC/sorg2l.f create mode 100644 dspl/liblapack/SRC/sorg2r.f create mode 100644 dspl/liblapack/SRC/sorgbr.f create mode 100644 dspl/liblapack/SRC/sorghr.f create mode 100644 dspl/liblapack/SRC/sorgl2.f create mode 100644 dspl/liblapack/SRC/sorglq.f create mode 100644 dspl/liblapack/SRC/sorgql.f create mode 100644 dspl/liblapack/SRC/sorgqr.f create mode 100644 dspl/liblapack/SRC/sorgr2.f create mode 100644 dspl/liblapack/SRC/sorgrq.f create mode 100644 dspl/liblapack/SRC/sorgtr.f create mode 100644 dspl/liblapack/SRC/sorm22.f create mode 100644 dspl/liblapack/SRC/sorm2l.f create mode 100644 dspl/liblapack/SRC/sorm2r.f create mode 100644 dspl/liblapack/SRC/sormbr.f create mode 100644 dspl/liblapack/SRC/sormhr.f create mode 100644 dspl/liblapack/SRC/sorml2.f create mode 100644 dspl/liblapack/SRC/sormlq.f create mode 100644 dspl/liblapack/SRC/sormql.f create mode 100644 dspl/liblapack/SRC/sormqr.f create mode 100644 dspl/liblapack/SRC/sormr2.f create mode 100644 dspl/liblapack/SRC/sormr3.f create mode 100644 dspl/liblapack/SRC/sormrq.f create mode 100644 dspl/liblapack/SRC/sormrz.f create mode 100644 dspl/liblapack/SRC/sormtr.f create mode 100644 dspl/liblapack/SRC/spbcon.f create mode 100644 dspl/liblapack/SRC/spbequ.f create mode 100644 dspl/liblapack/SRC/spbrfs.f create mode 100644 dspl/liblapack/SRC/spbstf.f create mode 100644 dspl/liblapack/SRC/spbsv.f create mode 100644 dspl/liblapack/SRC/spbsvx.f create mode 100644 dspl/liblapack/SRC/spbtf2.f create mode 100644 dspl/liblapack/SRC/spbtrf.f create mode 100644 dspl/liblapack/SRC/spbtrs.f create mode 100644 dspl/liblapack/SRC/spftrf.f create mode 100644 dspl/liblapack/SRC/spftri.f create mode 100644 dspl/liblapack/SRC/spftrs.f create mode 100644 dspl/liblapack/SRC/spocon.f create mode 100644 dspl/liblapack/SRC/spoequ.f create mode 100644 dspl/liblapack/SRC/spoequb.f create mode 100644 dspl/liblapack/SRC/sporfs.f create mode 100644 dspl/liblapack/SRC/sporfsx.f create mode 100644 dspl/liblapack/SRC/sposv.f create mode 100644 dspl/liblapack/SRC/sposvx.f create mode 100644 dspl/liblapack/SRC/sposvxx.f create mode 100644 dspl/liblapack/SRC/spotf2.f create mode 100644 dspl/liblapack/SRC/spotrf.f create mode 100644 dspl/liblapack/SRC/spotrf2.f create mode 100644 dspl/liblapack/SRC/spotri.f create mode 100644 dspl/liblapack/SRC/spotrs.f create mode 100644 dspl/liblapack/SRC/sppcon.f create mode 100644 dspl/liblapack/SRC/sppequ.f create mode 100644 dspl/liblapack/SRC/spprfs.f create mode 100644 dspl/liblapack/SRC/sppsv.f create mode 100644 dspl/liblapack/SRC/sppsvx.f create mode 100644 dspl/liblapack/SRC/spptrf.f create mode 100644 dspl/liblapack/SRC/spptri.f create mode 100644 dspl/liblapack/SRC/spptrs.f create mode 100644 dspl/liblapack/SRC/spstf2.f create mode 100644 dspl/liblapack/SRC/spstrf.f create mode 100644 dspl/liblapack/SRC/sptcon.f create mode 100644 dspl/liblapack/SRC/spteqr.f create mode 100644 dspl/liblapack/SRC/sptrfs.f create mode 100644 dspl/liblapack/SRC/sptsv.f create mode 100644 dspl/liblapack/SRC/sptsvx.f create mode 100644 dspl/liblapack/SRC/spttrf.f create mode 100644 dspl/liblapack/SRC/spttrs.f create mode 100644 dspl/liblapack/SRC/sptts2.f create mode 100644 dspl/liblapack/SRC/srscl.f create mode 100644 dspl/liblapack/SRC/ssb2st_kernels.f create mode 100644 dspl/liblapack/SRC/ssbev.f create mode 100644 dspl/liblapack/SRC/ssbev_2stage.f create mode 100644 dspl/liblapack/SRC/ssbevd.f create mode 100644 dspl/liblapack/SRC/ssbevd_2stage.f create mode 100644 dspl/liblapack/SRC/ssbevx.f create mode 100644 dspl/liblapack/SRC/ssbevx_2stage.f create mode 100644 dspl/liblapack/SRC/ssbgst.f create mode 100644 dspl/liblapack/SRC/ssbgv.f create mode 100644 dspl/liblapack/SRC/ssbgvd.f create mode 100644 dspl/liblapack/SRC/ssbgvx.f create mode 100644 dspl/liblapack/SRC/ssbtrd.f create mode 100644 dspl/liblapack/SRC/ssfrk.f create mode 100644 dspl/liblapack/SRC/sspcon.f create mode 100644 dspl/liblapack/SRC/sspev.f create mode 100644 dspl/liblapack/SRC/sspevd.f create mode 100644 dspl/liblapack/SRC/sspevx.f create mode 100644 dspl/liblapack/SRC/sspgst.f create mode 100644 dspl/liblapack/SRC/sspgv.f create mode 100644 dspl/liblapack/SRC/sspgvd.f create mode 100644 dspl/liblapack/SRC/sspgvx.f create mode 100644 dspl/liblapack/SRC/ssprfs.f create mode 100644 dspl/liblapack/SRC/sspsv.f create mode 100644 dspl/liblapack/SRC/sspsvx.f create mode 100644 dspl/liblapack/SRC/ssptrd.f create mode 100644 dspl/liblapack/SRC/ssptrf.f create mode 100644 dspl/liblapack/SRC/ssptri.f create mode 100644 dspl/liblapack/SRC/ssptrs.f create mode 100644 dspl/liblapack/SRC/sstebz.f create mode 100644 dspl/liblapack/SRC/sstedc.f create mode 100644 dspl/liblapack/SRC/sstegr.f create mode 100644 dspl/liblapack/SRC/sstein.f create mode 100644 dspl/liblapack/SRC/sstemr.f create mode 100644 dspl/liblapack/SRC/ssteqr.f create mode 100644 dspl/liblapack/SRC/ssterf.f create mode 100644 dspl/liblapack/SRC/sstev.f create mode 100644 dspl/liblapack/SRC/sstevd.f create mode 100644 dspl/liblapack/SRC/sstevr.f create mode 100644 dspl/liblapack/SRC/sstevx.f create mode 100644 dspl/liblapack/SRC/ssycon.f create mode 100644 dspl/liblapack/SRC/ssycon_3.f create mode 100644 dspl/liblapack/SRC/ssycon_rook.f create mode 100644 dspl/liblapack/SRC/ssyconv.f create mode 100644 dspl/liblapack/SRC/ssyconvf.f create mode 100644 dspl/liblapack/SRC/ssyconvf_rook.f create mode 100644 dspl/liblapack/SRC/ssyequb.f create mode 100644 dspl/liblapack/SRC/ssyev.f create mode 100644 dspl/liblapack/SRC/ssyev_2stage.f create mode 100644 dspl/liblapack/SRC/ssyevd.f create mode 100644 dspl/liblapack/SRC/ssyevd_2stage.f create mode 100644 dspl/liblapack/SRC/ssyevr.f create mode 100644 dspl/liblapack/SRC/ssyevr_2stage.f create mode 100644 dspl/liblapack/SRC/ssyevx.f create mode 100644 dspl/liblapack/SRC/ssyevx_2stage.f create mode 100644 dspl/liblapack/SRC/ssygs2.f create mode 100644 dspl/liblapack/SRC/ssygst.f create mode 100644 dspl/liblapack/SRC/ssygv.f create mode 100644 dspl/liblapack/SRC/ssygv_2stage.f create mode 100644 dspl/liblapack/SRC/ssygvd.f create mode 100644 dspl/liblapack/SRC/ssygvx.f create mode 100644 dspl/liblapack/SRC/ssyrfs.f create mode 100644 dspl/liblapack/SRC/ssyrfsx.f create mode 100644 dspl/liblapack/SRC/ssysv.f create mode 100644 dspl/liblapack/SRC/ssysv_aa.f create mode 100644 dspl/liblapack/SRC/ssysv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/ssysv_rk.f create mode 100644 dspl/liblapack/SRC/ssysv_rook.f create mode 100644 dspl/liblapack/SRC/ssysvx.f create mode 100644 dspl/liblapack/SRC/ssysvxx.f create mode 100644 dspl/liblapack/SRC/ssyswapr.f create mode 100644 dspl/liblapack/SRC/ssytd2.f create mode 100644 dspl/liblapack/SRC/ssytf2.f create mode 100644 dspl/liblapack/SRC/ssytf2_rk.f create mode 100644 dspl/liblapack/SRC/ssytf2_rook.f create mode 100644 dspl/liblapack/SRC/ssytrd.f create mode 100644 dspl/liblapack/SRC/ssytrd_2stage.f create mode 100644 dspl/liblapack/SRC/ssytrd_sb2st.F create mode 100644 dspl/liblapack/SRC/ssytrd_sy2sb.f create mode 100644 dspl/liblapack/SRC/ssytrf.f create mode 100644 dspl/liblapack/SRC/ssytrf_aa.f create mode 100644 dspl/liblapack/SRC/ssytrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/ssytrf_rk.f create mode 100644 dspl/liblapack/SRC/ssytrf_rook.f create mode 100644 dspl/liblapack/SRC/ssytri.f create mode 100644 dspl/liblapack/SRC/ssytri2.f create mode 100644 dspl/liblapack/SRC/ssytri2x.f create mode 100644 dspl/liblapack/SRC/ssytri_3.f create mode 100644 dspl/liblapack/SRC/ssytri_3x.f create mode 100644 dspl/liblapack/SRC/ssytri_rook.f create mode 100644 dspl/liblapack/SRC/ssytrs.f create mode 100644 dspl/liblapack/SRC/ssytrs2.f create mode 100644 dspl/liblapack/SRC/ssytrs_3.f create mode 100644 dspl/liblapack/SRC/ssytrs_aa.f create mode 100644 dspl/liblapack/SRC/ssytrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/ssytrs_rook.f create mode 100644 dspl/liblapack/SRC/stbcon.f create mode 100644 dspl/liblapack/SRC/stbrfs.f create mode 100644 dspl/liblapack/SRC/stbtrs.f create mode 100644 dspl/liblapack/SRC/stfsm.f create mode 100644 dspl/liblapack/SRC/stftri.f create mode 100644 dspl/liblapack/SRC/stfttp.f create mode 100644 dspl/liblapack/SRC/stfttr.f create mode 100644 dspl/liblapack/SRC/stgevc.f create mode 100644 dspl/liblapack/SRC/stgex2.f create mode 100644 dspl/liblapack/SRC/stgexc.f create mode 100644 dspl/liblapack/SRC/stgsen.f create mode 100644 dspl/liblapack/SRC/stgsja.f create mode 100644 dspl/liblapack/SRC/stgsna.f create mode 100644 dspl/liblapack/SRC/stgsy2.f create mode 100644 dspl/liblapack/SRC/stgsyl.f create mode 100644 dspl/liblapack/SRC/stpcon.f create mode 100644 dspl/liblapack/SRC/stplqt.f create mode 100644 dspl/liblapack/SRC/stplqt2.f create mode 100644 dspl/liblapack/SRC/stpmlqt.f create mode 100644 dspl/liblapack/SRC/stpmqrt.f create mode 100644 dspl/liblapack/SRC/stpqrt.f create mode 100644 dspl/liblapack/SRC/stpqrt2.f create mode 100644 dspl/liblapack/SRC/stprfb.f create mode 100644 dspl/liblapack/SRC/stprfs.f create mode 100644 dspl/liblapack/SRC/stptri.f create mode 100644 dspl/liblapack/SRC/stptrs.f create mode 100644 dspl/liblapack/SRC/stpttf.f create mode 100644 dspl/liblapack/SRC/stpttr.f create mode 100644 dspl/liblapack/SRC/strcon.f create mode 100644 dspl/liblapack/SRC/strevc.f create mode 100644 dspl/liblapack/SRC/strevc3.f create mode 100644 dspl/liblapack/SRC/strexc.f create mode 100644 dspl/liblapack/SRC/strrfs.f create mode 100644 dspl/liblapack/SRC/strsen.f create mode 100644 dspl/liblapack/SRC/strsna.f create mode 100644 dspl/liblapack/SRC/strsyl.f create mode 100644 dspl/liblapack/SRC/strti2.f create mode 100644 dspl/liblapack/SRC/strtri.f create mode 100644 dspl/liblapack/SRC/strtrs.f create mode 100644 dspl/liblapack/SRC/strttf.f create mode 100644 dspl/liblapack/SRC/strttp.f create mode 100644 dspl/liblapack/SRC/stzrzf.f create mode 100644 dspl/liblapack/SRC/xerbla.f create mode 100644 dspl/liblapack/SRC/xerbla_array.f create mode 100644 dspl/liblapack/SRC/zbbcsd.f create mode 100644 dspl/liblapack/SRC/zbdsqr.f create mode 100644 dspl/liblapack/SRC/zcgesv.f create mode 100644 dspl/liblapack/SRC/zcposv.f create mode 100644 dspl/liblapack/SRC/zdrscl.f create mode 100644 dspl/liblapack/SRC/zgbbrd.f create mode 100644 dspl/liblapack/SRC/zgbcon.f create mode 100644 dspl/liblapack/SRC/zgbequ.f create mode 100644 dspl/liblapack/SRC/zgbequb.f create mode 100644 dspl/liblapack/SRC/zgbrfs.f create mode 100644 dspl/liblapack/SRC/zgbrfsx.f create mode 100644 dspl/liblapack/SRC/zgbsv.f create mode 100644 dspl/liblapack/SRC/zgbsvx.f create mode 100644 dspl/liblapack/SRC/zgbsvxx.f create mode 100644 dspl/liblapack/SRC/zgbtf2.f create mode 100644 dspl/liblapack/SRC/zgbtrf.f create mode 100644 dspl/liblapack/SRC/zgbtrs.f create mode 100644 dspl/liblapack/SRC/zgebak.f create mode 100644 dspl/liblapack/SRC/zgebal.f create mode 100644 dspl/liblapack/SRC/zgebd2.f create mode 100644 dspl/liblapack/SRC/zgebrd.f create mode 100644 dspl/liblapack/SRC/zgecon.f create mode 100644 dspl/liblapack/SRC/zgeequ.f create mode 100644 dspl/liblapack/SRC/zgeequb.f create mode 100644 dspl/liblapack/SRC/zgees.f create mode 100644 dspl/liblapack/SRC/zgeesx.f create mode 100644 dspl/liblapack/SRC/zgeev.f create mode 100644 dspl/liblapack/SRC/zgeevx.f create mode 100644 dspl/liblapack/SRC/zgehd2.f create mode 100644 dspl/liblapack/SRC/zgehrd.f create mode 100644 dspl/liblapack/SRC/zgejsv.f create mode 100644 dspl/liblapack/SRC/zgelq.f create mode 100644 dspl/liblapack/SRC/zgelq2.f create mode 100644 dspl/liblapack/SRC/zgelqf.f create mode 100644 dspl/liblapack/SRC/zgelqt.f create mode 100644 dspl/liblapack/SRC/zgelqt3.f create mode 100644 dspl/liblapack/SRC/zgels.f create mode 100644 dspl/liblapack/SRC/zgelsd.f create mode 100644 dspl/liblapack/SRC/zgelss.f create mode 100644 dspl/liblapack/SRC/zgelsy.f create mode 100644 dspl/liblapack/SRC/zgemlq.f create mode 100644 dspl/liblapack/SRC/zgemlqt.f create mode 100644 dspl/liblapack/SRC/zgemqr.f create mode 100644 dspl/liblapack/SRC/zgemqrt.f create mode 100644 dspl/liblapack/SRC/zgeql2.f create mode 100644 dspl/liblapack/SRC/zgeqlf.f create mode 100644 dspl/liblapack/SRC/zgeqp3.f create mode 100644 dspl/liblapack/SRC/zgeqr.f create mode 100644 dspl/liblapack/SRC/zgeqr2.f create mode 100644 dspl/liblapack/SRC/zgeqr2p.f create mode 100644 dspl/liblapack/SRC/zgeqrf.f create mode 100644 dspl/liblapack/SRC/zgeqrfp.f create mode 100644 dspl/liblapack/SRC/zgeqrt.f create mode 100644 dspl/liblapack/SRC/zgeqrt2.f create mode 100644 dspl/liblapack/SRC/zgeqrt3.f create mode 100644 dspl/liblapack/SRC/zgerfs.f create mode 100644 dspl/liblapack/SRC/zgerfsx.f create mode 100644 dspl/liblapack/SRC/zgerq2.f create mode 100644 dspl/liblapack/SRC/zgerqf.f create mode 100644 dspl/liblapack/SRC/zgesc2.f create mode 100644 dspl/liblapack/SRC/zgesdd.f create mode 100644 dspl/liblapack/SRC/zgesv.f create mode 100644 dspl/liblapack/SRC/zgesvd.f create mode 100644 dspl/liblapack/SRC/zgesvdx.f create mode 100644 dspl/liblapack/SRC/zgesvj.f create mode 100644 dspl/liblapack/SRC/zgesvx.f create mode 100644 dspl/liblapack/SRC/zgesvxx.f create mode 100644 dspl/liblapack/SRC/zgetc2.f create mode 100644 dspl/liblapack/SRC/zgetf2.f create mode 100644 dspl/liblapack/SRC/zgetrf.f create mode 100644 dspl/liblapack/SRC/zgetrf2.f create mode 100644 dspl/liblapack/SRC/zgetri.f create mode 100644 dspl/liblapack/SRC/zgetrs.f create mode 100644 dspl/liblapack/SRC/zgetsls.f create mode 100644 dspl/liblapack/SRC/zggbak.f create mode 100644 dspl/liblapack/SRC/zggbal.f create mode 100644 dspl/liblapack/SRC/zgges.f create mode 100644 dspl/liblapack/SRC/zgges3.f create mode 100644 dspl/liblapack/SRC/zggesx.f create mode 100644 dspl/liblapack/SRC/zggev.f create mode 100644 dspl/liblapack/SRC/zggev3.f create mode 100644 dspl/liblapack/SRC/zggevx.f create mode 100644 dspl/liblapack/SRC/zggglm.f create mode 100644 dspl/liblapack/SRC/zgghd3.f create mode 100644 dspl/liblapack/SRC/zgghrd.f create mode 100644 dspl/liblapack/SRC/zgglse.f create mode 100644 dspl/liblapack/SRC/zggqrf.f create mode 100644 dspl/liblapack/SRC/zggrqf.f create mode 100644 dspl/liblapack/SRC/zggsvd3.f create mode 100644 dspl/liblapack/SRC/zggsvp3.f create mode 100644 dspl/liblapack/SRC/zgsvj0.f create mode 100644 dspl/liblapack/SRC/zgsvj1.f create mode 100644 dspl/liblapack/SRC/zgtcon.f create mode 100644 dspl/liblapack/SRC/zgtrfs.f create mode 100644 dspl/liblapack/SRC/zgtsv.f create mode 100644 dspl/liblapack/SRC/zgtsvx.f create mode 100644 dspl/liblapack/SRC/zgttrf.f create mode 100644 dspl/liblapack/SRC/zgttrs.f create mode 100644 dspl/liblapack/SRC/zgtts2.f create mode 100644 dspl/liblapack/SRC/zhb2st_kernels.f create mode 100644 dspl/liblapack/SRC/zhbev.f create mode 100644 dspl/liblapack/SRC/zhbev_2stage.f create mode 100644 dspl/liblapack/SRC/zhbevd.f create mode 100644 dspl/liblapack/SRC/zhbevd_2stage.f create mode 100644 dspl/liblapack/SRC/zhbevx.f create mode 100644 dspl/liblapack/SRC/zhbevx_2stage.f create mode 100644 dspl/liblapack/SRC/zhbgst.f create mode 100644 dspl/liblapack/SRC/zhbgv.f create mode 100644 dspl/liblapack/SRC/zhbgvd.f create mode 100644 dspl/liblapack/SRC/zhbgvx.f create mode 100644 dspl/liblapack/SRC/zhbtrd.f create mode 100644 dspl/liblapack/SRC/zhecon.f create mode 100644 dspl/liblapack/SRC/zhecon_3.f create mode 100644 dspl/liblapack/SRC/zhecon_rook.f create mode 100644 dspl/liblapack/SRC/zheequb.f create mode 100644 dspl/liblapack/SRC/zheev.f create mode 100644 dspl/liblapack/SRC/zheev_2stage.f create mode 100644 dspl/liblapack/SRC/zheevd.f create mode 100644 dspl/liblapack/SRC/zheevd_2stage.f create mode 100644 dspl/liblapack/SRC/zheevr.f create mode 100644 dspl/liblapack/SRC/zheevr_2stage.f create mode 100644 dspl/liblapack/SRC/zheevx.f create mode 100644 dspl/liblapack/SRC/zheevx_2stage.f create mode 100644 dspl/liblapack/SRC/zhegs2.f create mode 100644 dspl/liblapack/SRC/zhegst.f create mode 100644 dspl/liblapack/SRC/zhegv.f create mode 100644 dspl/liblapack/SRC/zhegv_2stage.f create mode 100644 dspl/liblapack/SRC/zhegvd.f create mode 100644 dspl/liblapack/SRC/zhegvx.f create mode 100644 dspl/liblapack/SRC/zherfs.f create mode 100644 dspl/liblapack/SRC/zherfsx.f create mode 100644 dspl/liblapack/SRC/zhesv.f create mode 100644 dspl/liblapack/SRC/zhesv_aa.f create mode 100644 dspl/liblapack/SRC/zhesv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zhesv_rk.f create mode 100644 dspl/liblapack/SRC/zhesv_rook.f create mode 100644 dspl/liblapack/SRC/zhesvx.f create mode 100644 dspl/liblapack/SRC/zhesvxx.f create mode 100644 dspl/liblapack/SRC/zheswapr.f create mode 100644 dspl/liblapack/SRC/zhetd2.f create mode 100644 dspl/liblapack/SRC/zhetf2.f create mode 100644 dspl/liblapack/SRC/zhetf2_rk.f create mode 100644 dspl/liblapack/SRC/zhetf2_rook.f create mode 100644 dspl/liblapack/SRC/zhetrd.f create mode 100644 dspl/liblapack/SRC/zhetrd_2stage.f create mode 100644 dspl/liblapack/SRC/zhetrd_hb2st.F create mode 100644 dspl/liblapack/SRC/zhetrd_he2hb.f create mode 100644 dspl/liblapack/SRC/zhetrf.f create mode 100644 dspl/liblapack/SRC/zhetrf_aa.f create mode 100644 dspl/liblapack/SRC/zhetrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zhetrf_rk.f create mode 100644 dspl/liblapack/SRC/zhetrf_rook.f create mode 100644 dspl/liblapack/SRC/zhetri.f create mode 100644 dspl/liblapack/SRC/zhetri2.f create mode 100644 dspl/liblapack/SRC/zhetri2x.f create mode 100644 dspl/liblapack/SRC/zhetri_3.f create mode 100644 dspl/liblapack/SRC/zhetri_3x.f create mode 100644 dspl/liblapack/SRC/zhetri_rook.f create mode 100644 dspl/liblapack/SRC/zhetrs.f create mode 100644 dspl/liblapack/SRC/zhetrs2.f create mode 100644 dspl/liblapack/SRC/zhetrs_3.f create mode 100644 dspl/liblapack/SRC/zhetrs_aa.f create mode 100644 dspl/liblapack/SRC/zhetrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zhetrs_rook.f create mode 100644 dspl/liblapack/SRC/zhfrk.f create mode 100644 dspl/liblapack/SRC/zhgeqz.f create mode 100644 dspl/liblapack/SRC/zhpcon.f create mode 100644 dspl/liblapack/SRC/zhpev.f create mode 100644 dspl/liblapack/SRC/zhpevd.f create mode 100644 dspl/liblapack/SRC/zhpevx.f create mode 100644 dspl/liblapack/SRC/zhpgst.f create mode 100644 dspl/liblapack/SRC/zhpgv.f create mode 100644 dspl/liblapack/SRC/zhpgvd.f create mode 100644 dspl/liblapack/SRC/zhpgvx.f create mode 100644 dspl/liblapack/SRC/zhprfs.f create mode 100644 dspl/liblapack/SRC/zhpsv.f create mode 100644 dspl/liblapack/SRC/zhpsvx.f create mode 100644 dspl/liblapack/SRC/zhptrd.f create mode 100644 dspl/liblapack/SRC/zhptrf.f create mode 100644 dspl/liblapack/SRC/zhptri.f create mode 100644 dspl/liblapack/SRC/zhptrs.f create mode 100644 dspl/liblapack/SRC/zhsein.f create mode 100644 dspl/liblapack/SRC/zhseqr.f create mode 100644 dspl/liblapack/SRC/zla_gbamv.f create mode 100644 dspl/liblapack/SRC/zla_gbrcond_c.f create mode 100644 dspl/liblapack/SRC/zla_gbrcond_x.f create mode 100644 dspl/liblapack/SRC/zla_gbrfsx_extended.f create mode 100644 dspl/liblapack/SRC/zla_gbrpvgrw.f create mode 100644 dspl/liblapack/SRC/zla_geamv.f create mode 100644 dspl/liblapack/SRC/zla_gercond_c.f create mode 100644 dspl/liblapack/SRC/zla_gercond_x.f create mode 100644 dspl/liblapack/SRC/zla_gerfsx_extended.f create mode 100644 dspl/liblapack/SRC/zla_gerpvgrw.f create mode 100644 dspl/liblapack/SRC/zla_heamv.f create mode 100644 dspl/liblapack/SRC/zla_hercond_c.f create mode 100644 dspl/liblapack/SRC/zla_hercond_x.f create mode 100644 dspl/liblapack/SRC/zla_herfsx_extended.f create mode 100644 dspl/liblapack/SRC/zla_herpvgrw.f create mode 100644 dspl/liblapack/SRC/zla_lin_berr.f create mode 100644 dspl/liblapack/SRC/zla_porcond_c.f create mode 100644 dspl/liblapack/SRC/zla_porcond_x.f create mode 100644 dspl/liblapack/SRC/zla_porfsx_extended.f create mode 100644 dspl/liblapack/SRC/zla_porpvgrw.f create mode 100644 dspl/liblapack/SRC/zla_syamv.f create mode 100644 dspl/liblapack/SRC/zla_syrcond_c.f create mode 100644 dspl/liblapack/SRC/zla_syrcond_x.f create mode 100644 dspl/liblapack/SRC/zla_syrfsx_extended.f create mode 100644 dspl/liblapack/SRC/zla_syrpvgrw.f create mode 100644 dspl/liblapack/SRC/zla_wwaddw.f create mode 100644 dspl/liblapack/SRC/zlabrd.f create mode 100644 dspl/liblapack/SRC/zlacgv.f create mode 100644 dspl/liblapack/SRC/zlacn2.f create mode 100644 dspl/liblapack/SRC/zlacon.f create mode 100644 dspl/liblapack/SRC/zlacp2.f create mode 100644 dspl/liblapack/SRC/zlacpy.f create mode 100644 dspl/liblapack/SRC/zlacrm.f create mode 100644 dspl/liblapack/SRC/zlacrt.f create mode 100644 dspl/liblapack/SRC/zladiv.f create mode 100644 dspl/liblapack/SRC/zlaed0.f create mode 100644 dspl/liblapack/SRC/zlaed7.f create mode 100644 dspl/liblapack/SRC/zlaed8.f create mode 100644 dspl/liblapack/SRC/zlaein.f create mode 100644 dspl/liblapack/SRC/zlaesy.f create mode 100644 dspl/liblapack/SRC/zlaev2.f create mode 100644 dspl/liblapack/SRC/zlag2c.f create mode 100644 dspl/liblapack/SRC/zlags2.f create mode 100644 dspl/liblapack/SRC/zlagtm.f create mode 100644 dspl/liblapack/SRC/zlahef.f create mode 100644 dspl/liblapack/SRC/zlahef_aa.f create mode 100644 dspl/liblapack/SRC/zlahef_rk.f create mode 100644 dspl/liblapack/SRC/zlahef_rook.f create mode 100644 dspl/liblapack/SRC/zlahqr.f create mode 100644 dspl/liblapack/SRC/zlahr2.f create mode 100644 dspl/liblapack/SRC/zlaic1.f create mode 100644 dspl/liblapack/SRC/zlals0.f create mode 100644 dspl/liblapack/SRC/zlalsa.f create mode 100644 dspl/liblapack/SRC/zlalsd.f create mode 100644 dspl/liblapack/SRC/zlamswlq.f create mode 100644 dspl/liblapack/SRC/zlamtsqr.f create mode 100644 dspl/liblapack/SRC/zlangb.f create mode 100644 dspl/liblapack/SRC/zlange.f create mode 100644 dspl/liblapack/SRC/zlangt.f create mode 100644 dspl/liblapack/SRC/zlanhb.f create mode 100644 dspl/liblapack/SRC/zlanhe.f create mode 100644 dspl/liblapack/SRC/zlanhf.f create mode 100644 dspl/liblapack/SRC/zlanhp.f create mode 100644 dspl/liblapack/SRC/zlanhs.f create mode 100644 dspl/liblapack/SRC/zlanht.f create mode 100644 dspl/liblapack/SRC/zlansb.f create mode 100644 dspl/liblapack/SRC/zlansp.f create mode 100644 dspl/liblapack/SRC/zlansy.f create mode 100644 dspl/liblapack/SRC/zlantb.f create mode 100644 dspl/liblapack/SRC/zlantp.f create mode 100644 dspl/liblapack/SRC/zlantr.f create mode 100644 dspl/liblapack/SRC/zlapll.f create mode 100644 dspl/liblapack/SRC/zlapmr.f create mode 100644 dspl/liblapack/SRC/zlapmt.f create mode 100644 dspl/liblapack/SRC/zlaqgb.f create mode 100644 dspl/liblapack/SRC/zlaqge.f create mode 100644 dspl/liblapack/SRC/zlaqhb.f create mode 100644 dspl/liblapack/SRC/zlaqhe.f create mode 100644 dspl/liblapack/SRC/zlaqhp.f create mode 100644 dspl/liblapack/SRC/zlaqp2.f create mode 100644 dspl/liblapack/SRC/zlaqps.f create mode 100644 dspl/liblapack/SRC/zlaqr0.f create mode 100644 dspl/liblapack/SRC/zlaqr1.f create mode 100644 dspl/liblapack/SRC/zlaqr2.f create mode 100644 dspl/liblapack/SRC/zlaqr3.f create mode 100644 dspl/liblapack/SRC/zlaqr4.f create mode 100644 dspl/liblapack/SRC/zlaqr5.f create mode 100644 dspl/liblapack/SRC/zlaqsb.f create mode 100644 dspl/liblapack/SRC/zlaqsp.f create mode 100644 dspl/liblapack/SRC/zlaqsy.f create mode 100644 dspl/liblapack/SRC/zlar1v.f create mode 100644 dspl/liblapack/SRC/zlar2v.f create mode 100644 dspl/liblapack/SRC/zlarcm.f create mode 100644 dspl/liblapack/SRC/zlarf.f create mode 100644 dspl/liblapack/SRC/zlarfb.f create mode 100644 dspl/liblapack/SRC/zlarfg.f create mode 100644 dspl/liblapack/SRC/zlarfgp.f create mode 100644 dspl/liblapack/SRC/zlarft.f create mode 100644 dspl/liblapack/SRC/zlarfx.f create mode 100644 dspl/liblapack/SRC/zlarfy.f create mode 100644 dspl/liblapack/SRC/zlargv.f create mode 100644 dspl/liblapack/SRC/zlarnv.f create mode 100644 dspl/liblapack/SRC/zlarrv.f create mode 100644 dspl/liblapack/SRC/zlarscl2.f create mode 100644 dspl/liblapack/SRC/zlartg.f create mode 100644 dspl/liblapack/SRC/zlartv.f create mode 100644 dspl/liblapack/SRC/zlarz.f create mode 100644 dspl/liblapack/SRC/zlarzb.f create mode 100644 dspl/liblapack/SRC/zlarzt.f create mode 100644 dspl/liblapack/SRC/zlascl.f create mode 100644 dspl/liblapack/SRC/zlascl2.f create mode 100644 dspl/liblapack/SRC/zlaset.f create mode 100644 dspl/liblapack/SRC/zlasr.f create mode 100644 dspl/liblapack/SRC/zlassq.f create mode 100644 dspl/liblapack/SRC/zlaswlq.f create mode 100644 dspl/liblapack/SRC/zlaswp.f create mode 100644 dspl/liblapack/SRC/zlasyf.f create mode 100644 dspl/liblapack/SRC/zlasyf_aa.f create mode 100644 dspl/liblapack/SRC/zlasyf_rk.f create mode 100644 dspl/liblapack/SRC/zlasyf_rook.f create mode 100644 dspl/liblapack/SRC/zlat2c.f create mode 100644 dspl/liblapack/SRC/zlatbs.f create mode 100644 dspl/liblapack/SRC/zlatdf.f create mode 100644 dspl/liblapack/SRC/zlatps.f create mode 100644 dspl/liblapack/SRC/zlatrd.f create mode 100644 dspl/liblapack/SRC/zlatrs.f create mode 100644 dspl/liblapack/SRC/zlatrz.f create mode 100644 dspl/liblapack/SRC/zlatsqr.f create mode 100644 dspl/liblapack/SRC/zlauu2.f create mode 100644 dspl/liblapack/SRC/zlauum.f create mode 100644 dspl/liblapack/SRC/zpbcon.f create mode 100644 dspl/liblapack/SRC/zpbequ.f create mode 100644 dspl/liblapack/SRC/zpbrfs.f create mode 100644 dspl/liblapack/SRC/zpbstf.f create mode 100644 dspl/liblapack/SRC/zpbsv.f create mode 100644 dspl/liblapack/SRC/zpbsvx.f create mode 100644 dspl/liblapack/SRC/zpbtf2.f create mode 100644 dspl/liblapack/SRC/zpbtrf.f create mode 100644 dspl/liblapack/SRC/zpbtrs.f create mode 100644 dspl/liblapack/SRC/zpftrf.f create mode 100644 dspl/liblapack/SRC/zpftri.f create mode 100644 dspl/liblapack/SRC/zpftrs.f create mode 100644 dspl/liblapack/SRC/zpocon.f create mode 100644 dspl/liblapack/SRC/zpoequ.f create mode 100644 dspl/liblapack/SRC/zpoequb.f create mode 100644 dspl/liblapack/SRC/zporfs.f create mode 100644 dspl/liblapack/SRC/zporfsx.f create mode 100644 dspl/liblapack/SRC/zposv.f create mode 100644 dspl/liblapack/SRC/zposvx.f create mode 100644 dspl/liblapack/SRC/zposvxx.f create mode 100644 dspl/liblapack/SRC/zpotf2.f create mode 100644 dspl/liblapack/SRC/zpotrf.f create mode 100644 dspl/liblapack/SRC/zpotrf2.f create mode 100644 dspl/liblapack/SRC/zpotri.f create mode 100644 dspl/liblapack/SRC/zpotrs.f create mode 100644 dspl/liblapack/SRC/zppcon.f create mode 100644 dspl/liblapack/SRC/zppequ.f create mode 100644 dspl/liblapack/SRC/zpprfs.f create mode 100644 dspl/liblapack/SRC/zppsv.f create mode 100644 dspl/liblapack/SRC/zppsvx.f create mode 100644 dspl/liblapack/SRC/zpptrf.f create mode 100644 dspl/liblapack/SRC/zpptri.f create mode 100644 dspl/liblapack/SRC/zpptrs.f create mode 100644 dspl/liblapack/SRC/zpstf2.f create mode 100644 dspl/liblapack/SRC/zpstrf.f create mode 100644 dspl/liblapack/SRC/zptcon.f create mode 100644 dspl/liblapack/SRC/zpteqr.f create mode 100644 dspl/liblapack/SRC/zptrfs.f create mode 100644 dspl/liblapack/SRC/zptsv.f create mode 100644 dspl/liblapack/SRC/zptsvx.f create mode 100644 dspl/liblapack/SRC/zpttrf.f create mode 100644 dspl/liblapack/SRC/zpttrs.f create mode 100644 dspl/liblapack/SRC/zptts2.f create mode 100644 dspl/liblapack/SRC/zrot.f create mode 100644 dspl/liblapack/SRC/zspcon.f create mode 100644 dspl/liblapack/SRC/zspmv.f create mode 100644 dspl/liblapack/SRC/zspr.f create mode 100644 dspl/liblapack/SRC/zsprfs.f create mode 100644 dspl/liblapack/SRC/zspsv.f create mode 100644 dspl/liblapack/SRC/zspsvx.f create mode 100644 dspl/liblapack/SRC/zsptrf.f create mode 100644 dspl/liblapack/SRC/zsptri.f create mode 100644 dspl/liblapack/SRC/zsptrs.f create mode 100644 dspl/liblapack/SRC/zstedc.f create mode 100644 dspl/liblapack/SRC/zstegr.f create mode 100644 dspl/liblapack/SRC/zstein.f create mode 100644 dspl/liblapack/SRC/zstemr.f create mode 100644 dspl/liblapack/SRC/zsteqr.f create mode 100644 dspl/liblapack/SRC/zsycon.f create mode 100644 dspl/liblapack/SRC/zsycon_3.f create mode 100644 dspl/liblapack/SRC/zsycon_rook.f create mode 100644 dspl/liblapack/SRC/zsyconv.f create mode 100644 dspl/liblapack/SRC/zsyconvf.f create mode 100644 dspl/liblapack/SRC/zsyconvf_rook.f create mode 100644 dspl/liblapack/SRC/zsyequb.f create mode 100644 dspl/liblapack/SRC/zsymv.f create mode 100644 dspl/liblapack/SRC/zsyr.f create mode 100644 dspl/liblapack/SRC/zsyrfs.f create mode 100644 dspl/liblapack/SRC/zsyrfsx.f create mode 100644 dspl/liblapack/SRC/zsysv.f create mode 100644 dspl/liblapack/SRC/zsysv_aa.f create mode 100644 dspl/liblapack/SRC/zsysv_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zsysv_rk.f create mode 100644 dspl/liblapack/SRC/zsysv_rook.f create mode 100644 dspl/liblapack/SRC/zsysvx.f create mode 100644 dspl/liblapack/SRC/zsysvxx.f create mode 100644 dspl/liblapack/SRC/zsyswapr.f create mode 100644 dspl/liblapack/SRC/zsytf2.f create mode 100644 dspl/liblapack/SRC/zsytf2_rk.f create mode 100644 dspl/liblapack/SRC/zsytf2_rook.f create mode 100644 dspl/liblapack/SRC/zsytrf.f create mode 100644 dspl/liblapack/SRC/zsytrf_aa.f create mode 100644 dspl/liblapack/SRC/zsytrf_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zsytrf_rk.f create mode 100644 dspl/liblapack/SRC/zsytrf_rook.f create mode 100644 dspl/liblapack/SRC/zsytri.f create mode 100644 dspl/liblapack/SRC/zsytri2.f create mode 100644 dspl/liblapack/SRC/zsytri2x.f create mode 100644 dspl/liblapack/SRC/zsytri_3.f create mode 100644 dspl/liblapack/SRC/zsytri_3x.f create mode 100644 dspl/liblapack/SRC/zsytri_rook.f create mode 100644 dspl/liblapack/SRC/zsytrs.f create mode 100644 dspl/liblapack/SRC/zsytrs2.f create mode 100644 dspl/liblapack/SRC/zsytrs_3.f create mode 100644 dspl/liblapack/SRC/zsytrs_aa.f create mode 100644 dspl/liblapack/SRC/zsytrs_aa_2stage.f create mode 100644 dspl/liblapack/SRC/zsytrs_rook.f create mode 100644 dspl/liblapack/SRC/ztbcon.f create mode 100644 dspl/liblapack/SRC/ztbrfs.f create mode 100644 dspl/liblapack/SRC/ztbtrs.f create mode 100644 dspl/liblapack/SRC/ztfsm.f create mode 100644 dspl/liblapack/SRC/ztftri.f create mode 100644 dspl/liblapack/SRC/ztfttp.f create mode 100644 dspl/liblapack/SRC/ztfttr.f create mode 100644 dspl/liblapack/SRC/ztgevc.f create mode 100644 dspl/liblapack/SRC/ztgex2.f create mode 100644 dspl/liblapack/SRC/ztgexc.f create mode 100644 dspl/liblapack/SRC/ztgsen.f create mode 100644 dspl/liblapack/SRC/ztgsja.f create mode 100644 dspl/liblapack/SRC/ztgsna.f create mode 100644 dspl/liblapack/SRC/ztgsy2.f create mode 100644 dspl/liblapack/SRC/ztgsyl.f create mode 100644 dspl/liblapack/SRC/ztpcon.f create mode 100644 dspl/liblapack/SRC/ztplqt.f create mode 100644 dspl/liblapack/SRC/ztplqt2.f create mode 100644 dspl/liblapack/SRC/ztpmlqt.f create mode 100644 dspl/liblapack/SRC/ztpmqrt.f create mode 100644 dspl/liblapack/SRC/ztpqrt.f create mode 100644 dspl/liblapack/SRC/ztpqrt2.f create mode 100644 dspl/liblapack/SRC/ztprfb.f create mode 100644 dspl/liblapack/SRC/ztprfs.f create mode 100644 dspl/liblapack/SRC/ztptri.f create mode 100644 dspl/liblapack/SRC/ztptrs.f create mode 100644 dspl/liblapack/SRC/ztpttf.f create mode 100644 dspl/liblapack/SRC/ztpttr.f create mode 100644 dspl/liblapack/SRC/ztrcon.f create mode 100644 dspl/liblapack/SRC/ztrevc.f create mode 100644 dspl/liblapack/SRC/ztrevc3.f create mode 100644 dspl/liblapack/SRC/ztrexc.f create mode 100644 dspl/liblapack/SRC/ztrrfs.f create mode 100644 dspl/liblapack/SRC/ztrsen.f create mode 100644 dspl/liblapack/SRC/ztrsna.f create mode 100644 dspl/liblapack/SRC/ztrsyl.f create mode 100644 dspl/liblapack/SRC/ztrti2.f create mode 100644 dspl/liblapack/SRC/ztrtri.f create mode 100644 dspl/liblapack/SRC/ztrtrs.f create mode 100644 dspl/liblapack/SRC/ztrttf.f create mode 100644 dspl/liblapack/SRC/ztrttp.f create mode 100644 dspl/liblapack/SRC/ztzrzf.f create mode 100644 dspl/liblapack/SRC/zunbdb.f create mode 100644 dspl/liblapack/SRC/zunbdb1.f create mode 100644 dspl/liblapack/SRC/zunbdb2.f create mode 100644 dspl/liblapack/SRC/zunbdb3.f create mode 100644 dspl/liblapack/SRC/zunbdb4.f create mode 100644 dspl/liblapack/SRC/zunbdb5.f create mode 100644 dspl/liblapack/SRC/zunbdb6.f create mode 100644 dspl/liblapack/SRC/zuncsd.f create mode 100644 dspl/liblapack/SRC/zuncsd2by1.f create mode 100644 dspl/liblapack/SRC/zung2l.f create mode 100644 dspl/liblapack/SRC/zung2r.f create mode 100644 dspl/liblapack/SRC/zungbr.f create mode 100644 dspl/liblapack/SRC/zunghr.f create mode 100644 dspl/liblapack/SRC/zungl2.f create mode 100644 dspl/liblapack/SRC/zunglq.f create mode 100644 dspl/liblapack/SRC/zungql.f create mode 100644 dspl/liblapack/SRC/zungqr.f create mode 100644 dspl/liblapack/SRC/zungr2.f create mode 100644 dspl/liblapack/SRC/zungrq.f create mode 100644 dspl/liblapack/SRC/zungtr.f create mode 100644 dspl/liblapack/SRC/zunm22.f create mode 100644 dspl/liblapack/SRC/zunm2l.f create mode 100644 dspl/liblapack/SRC/zunm2r.f create mode 100644 dspl/liblapack/SRC/zunmbr.f create mode 100644 dspl/liblapack/SRC/zunmhr.f create mode 100644 dspl/liblapack/SRC/zunml2.f create mode 100644 dspl/liblapack/SRC/zunmlq.f create mode 100644 dspl/liblapack/SRC/zunmql.f create mode 100644 dspl/liblapack/SRC/zunmqr.f create mode 100644 dspl/liblapack/SRC/zunmr2.f create mode 100644 dspl/liblapack/SRC/zunmr3.f create mode 100644 dspl/liblapack/SRC/zunmrq.f create mode 100644 dspl/liblapack/SRC/zunmrz.f create mode 100644 dspl/liblapack/SRC/zunmtr.f create mode 100644 dspl/liblapack/SRC/zupgtr.f create mode 100644 dspl/liblapack/SRC/zupmtr.f create mode 100644 dspl/liblapack/make.inc create mode 100644 make.inc diff --git a/.gitignore b/.gitignore index d739953..819556b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ *.txt *.dat *.bin -*.csv \ No newline at end of file +*.csv +*.a \ No newline at end of file diff --git a/Makefile b/Makefile index a21095a..a551ca6 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,5 @@ - - - -ifeq ($(OS),Windows_NT) - MAKE = mingw32-make -else - UNAME_S := $(shell uname -s) - ifeq ($(UNAME_S),Linux) - MAKE = make - else ifeq ($(UNAME_S),Darwin) - MAKE = make - endif -endif - -include Makefile.dirs - - - +include make.inc all: $(MAKE) -f Makefile.dspl @@ -34,5 +17,12 @@ clean: $(MAKE) -f Makefile.dspl clean $(MAKE) -f Makefile.verif clean $(MAKE) -f Makefile.examples clean + rm -f $(BLAS_SRC_DIR)/*.o + rm -f $(LAPACK_SRC_DIR)/*.o +clean_all: + $(MAKE) clean + rm -f $(BLAS_LIB_DIR)/*.a + rm -f $(LAPACK_LIB_DIR)/*.a + diff --git a/Makefile.dirs b/Makefile.dirs deleted file mode 100644 index 563a55f..0000000 --- a/Makefile.dirs +++ /dev/null @@ -1,27 +0,0 @@ -CC = gcc -FORTRAN = gfortran -AR = ar - - -INC_DIR = include -RELEASE_DIR = release/lib - -ifeq ($(OS),Windows_NT) - DSPL_LIBNAME = libdspl.dll - DEF_OS = WIN_OS - LFLAGS = -lm -else - UNAME_S := $(shell uname -s) - UNAME_P := $(shell uname -p) - ifeq ($(UNAME_S),Linux) - DSPL_LIBNAME = libdspl.so - DEF_OS = LINUX_OS - LFLAGS = -lm -ldl - else ifeq ($(UNAME_S),Darwin) - DSPL_LIBNAME = libdspl.so - DEF_OS = LINUX_OS - LFLAGS = -lm -ldl - endif -endif - - diff --git a/Makefile.dspl b/Makefile.dspl index c9c1df4..27f8129 100644 --- a/Makefile.dspl +++ b/Makefile.dspl @@ -1,56 +1,30 @@ -LIB_DIR = dspl -include Makefile.dirs +include make.inc -# DSPL source and obj file path -DSPL_SRC_DIR = $(LIB_DIR)/dspl_src -DSPL_OBJ_DIR = $(LIB_DIR)/dspl_obj - -# BLAS source and obj file path -BLAS_SRC_DIR = $(LIB_DIR)/blas_src -BLAS_OBJ_DIR = $(LIB_DIR)/blas_obj - -#common source for DSPL and examples -COMMON_SRC_DIR = $(COMMON_DIR)/src - -LIB_NAME = $(DSPL_LIBNAME) # C-compiler flags CFLAGS = -c -fPIC -Wall -O3 -I$(INC_DIR) -DBUILD_LIB -D$(DEF_OS) -#fortran compiler flags -FFLAGS = -O3 - # DSPL src and obj files list DSPL_SRC_FILES = $(wildcard $(DSPL_SRC_DIR)/*.c) DSPL_OBJ_FILES = $(addprefix $(DSPL_OBJ_DIR)/,$(notdir $(DSPL_SRC_FILES:.c=.o))) -# BLAS src and obj files list -BLAS_SRC_FILES = $(wildcard $(BLAS_SRC_DIR)/*.f) -BLAS_OBJ_FILES = $(addprefix $(BLAS_OBJ_DIR)/,$(notdir $(BLAS_SRC_FILES:.f=.o))) - -BLAS_LIB = $(BLAS_OBJ_DIR)/libblas.a - - all: $(RELEASE_DIR)/$(LIB_NAME) -$(RELEASE_DIR)/$(LIB_NAME): $(DSPL_OBJ_FILES) $(BLAS_LIB) - $(CC) -shared -o $(RELEASE_DIR)/$(LIB_NAME) $(DSPL_OBJ_FILES) -lm -L$(BLAS_OBJ_DIR) -lblas +$(RELEASE_DIR)/$(LIB_NAME): $(DSPL_OBJ_FILES) $(BLAS_LIB_NAME) $(LAPACK_LIB_NAME) + $(CC) -shared -o $(RELEASE_DIR)/$(LIB_NAME) $(DSPL_OBJ_FILES) -lm -L$(BLAS_LIB_DIR) -lblas -L$(LAPACK_LIB_DIR) -llapack $(DSPL_OBJ_DIR)/%.o:$(DSPL_SRC_DIR)/%.c $(CC) $(CFLAGS) $< -o $@ -lm +$(BLAS_LIB_NAME): + $(MAKE) -C $(BLAS_SRC_DIR) - -$(BLAS_OBJ_DIR)/libblas.a: $(BLAS_OBJ_FILES) - $(AR) rcs $(BLAS_LIB) $(BLAS_OBJ_FILES) - - -$(BLAS_OBJ_DIR)/%.o:$(BLAS_SRC_DIR)/%.f - $(FORTRAN) $(FFLAGS) -c $< -o $@ +$(LAPACK_LIB_NAME): + $(MAKE) -C $(LAPACK_SRC_DIR) clean: diff --git a/Makefile.examples b/Makefile.examples index 47036b6..b68125b 100644 --- a/Makefile.examples +++ b/Makefile.examples @@ -3,7 +3,7 @@ SRC_DIR = $(PRJ_DIR)/src BIN_DIR = $(PRJ_DIR)/bin -include Makefile.dirs +include make.inc DSPL_C_FILE = $(INC_DIR)/dspl.c DSPL_O_FILE = $(PRJ_DIR)/obj/dspl.o diff --git a/Makefile.verif b/Makefile.verif index 6608af3..5cec1f1 100644 --- a/Makefile.verif +++ b/Makefile.verif @@ -3,7 +3,7 @@ SRC_DIR = $(PRJ_DIR)/src BIN_DIR = $(PRJ_DIR)/bin -include Makefile.dirs +include make.inc DSPL_C_FILE = $(INC_DIR)/dspl.c DSPL_O_FILE = $(PRJ_DIR)/obj/dspl.o diff --git a/dox/doxyfile_ru b/dox/doxyfile_ru index 9dab857..f497024 100644 --- a/dox/doxyfile_ru +++ b/dox/doxyfile_ru @@ -819,7 +819,8 @@ INPUT = ru \ ../examples/src \ ../examples/bin/gnuplot \ ../examples/bin/img \ - ../dspl/dspl_dox/ru + ../dspl/dspl_dox/ru \ + ../dspl/blas_src # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/dspl.project.win.geany b/dspl.project.win.geany index dcc9851..3329281 100644 --- a/dspl.project.win.geany +++ b/dspl.project.win.geany @@ -28,17 +28,21 @@ long_line_behaviour=1 long_line_column=72 [files] -current_page=4 -FILE_NAME_0=0;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Csrc%5Cfilter_ap.c;0;2 -FILE_NAME_1=360;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Csrc%5Ccheby_poly1_test.c;0;2 -FILE_NAME_2=3040;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Csrc%5Ccheby.c;0;2 -FILE_NAME_3=507;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Csrc%5Ciir_test.c;0;2 -FILE_NAME_4=80;None;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Cbin%5Cgnuplot%5Ciir_test.plt;0;2 -FILE_NAME_5=1097;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_dirichlet_ex.c;0;2 -FILE_NAME_6=2672;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_pimp_spectrum.c;0;2 -FILE_NAME_7=0;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_rec.c;0;2 -FILE_NAME_8=4814;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Csrc%5Cfourier_series.c;0;2 -FILE_NAME_9=5801;None;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cdox%5Cru%5Cfourier_series.dox;0;2 +current_page=7 +FILE_NAME_0=360;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Csrc%5Ccheby_poly1_test.c;0;2 +FILE_NAME_1=507;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Csrc%5Ciir_test.c;0;2 +FILE_NAME_2=80;None;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Cbin%5Cgnuplot%5Ciir_test.plt;0;2 +FILE_NAME_3=1097;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_dirichlet_ex.c;0;2 +FILE_NAME_4=2672;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_pimp_spectrum.c;0;2 +FILE_NAME_5=0;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Cru%5Ccontent%5Cfourier_series%5Cc%5Cfourier_series_rec.c;0;2 +FILE_NAME_6=12388;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cdspl_src%5Carray.c;0;2 +FILE_NAME_7=424;C++;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cdspl_src%5Cblas.h;0;2 +FILE_NAME_8=1737;F77;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cblas_src%5Cddot.f;0;2 +FILE_NAME_9=51351;C++;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cinclude%5Cdspl.h;0;2 +FILE_NAME_10=13061;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cinclude%5Cdspl.c;0;2 +FILE_NAME_11=323;C;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cexamples%5Csrc%5Carray_test.c;0;2 +FILE_NAME_12=1454;F77;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cblas_src%5Cdasum.f;0;2 +FILE_NAME_13=419;F77;0;EUTF-8;0;1;0;F%3A%5Cdsplib.org%5Clibdspl-2.0%5Cdspl%5Cblas_src%5Cdaxpy.f;0;2 [build-menu] NF_00_LB=_Собрать diff --git a/dspl/dspl_src/array.c b/dspl/dspl_src/array.c index d65697d..d8fbb1a 100644 --- a/dspl/dspl_src/array.c +++ b/dspl/dspl_src/array.c @@ -451,10 +451,9 @@ int DSPL_API vector_dot(double* x, double* y, int n, double* p) return ERROR_PTR; if(n<1) return ERROR_SIZE; - - /* double ddot_(int* n, double* dx, int* incx, double* dy, int* incy); */ + *p = ddot_(&n, x, &inc, y, &inc); - //*p = 0; + return RES_OK; } diff --git a/dspl/dspl_src/blas.h b/dspl/dspl_src/blas.h index d80092a..4506ac9 100644 --- a/dspl/dspl_src/blas.h +++ b/dspl/dspl_src/blas.h @@ -1,5 +1,367 @@ #include #include +#ifndef BLAS_H +#define BLAS_H + + +#define FORTRAN_FUNC(FUNC) FUNC##_ + +int FORTRAN_FUNC(xerbla)(const char*, int*info, int); + +float FORTRAN_FUNC(sdot) (int*, float*, int*, float*, int*); +float FORTRAN_FUNC(sdsdot)(int*, float*, float*, int*, float*, int*); + +double FORTRAN_FUNC(dsdot) (int*, float*, int*, float*, int*); +double FORTRAN_FUNC(ddot) (int*, double*, int*, double*, int*); +double FORTRAN_FUNC(qdot) (int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(cdotuw) (int*, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(cdotcw) (int*, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(zdotuw) (int*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(zdotcw) (int*, double*, int*, double*, int*, double*); + +int FORTRAN_FUNC(saxpy) (const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(daxpy) (const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qaxpy) (const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(caxpy) (const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(zaxpy) (const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xaxpy) (const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(caxpyc)(const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(zaxpyc)(const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xaxpyc)(const int*, const double*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(scopy) (int*, float*, int*, float*, int*); +int FORTRAN_FUNC(dcopy) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(qcopy) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(ccopy) (int*, float*, int*, float*, int*); +int FORTRAN_FUNC(zcopy) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xcopy) (int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(sswap) (int*, float*, int*, float*, int*); +int FORTRAN_FUNC(dswap) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(qswap) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(cswap) (int*, float*, int*, float*, int*); +int FORTRAN_FUNC(zswap) (int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xswap) (int*, double*, int*, double*, int*); + +float FORTRAN_FUNC(sasum) (int*, float*, int*); +float FORTRAN_FUNC(scasum)(int*, float*, int*); +double FORTRAN_FUNC(dasum) (int*, double*, int*); +double FORTRAN_FUNC(qasum) (int*, double*, int*); +double FORTRAN_FUNC(dzasum)(int*, double*, int*); +double FORTRAN_FUNC(qxasum)(int*, double*, int*); + +int FORTRAN_FUNC(isamax)(int*, float*, int*); +int FORTRAN_FUNC(idamax)(int*, double*, int*); +int FORTRAN_FUNC(iqamax)(int*, double*, int*); +int FORTRAN_FUNC(icamax)(int*, float*, int*); +int FORTRAN_FUNC(izamax)(int*, double*, int*); +int FORTRAN_FUNC(ixamax)(int*, double*, int*); + +int FORTRAN_FUNC(ismax) (int*, float*, int*); +int FORTRAN_FUNC(idmax) (int*, double*, int*); +int FORTRAN_FUNC(iqmax) (int*, double*, int*); +int FORTRAN_FUNC(icmax) (int*, float*, int*); +int FORTRAN_FUNC(izmax) (int*, double*, int*); +int FORTRAN_FUNC(ixmax) (int*, double*, int*); + +int FORTRAN_FUNC(isamin)(int*, float*, int*); +int FORTRAN_FUNC(idamin)(int*, double*, int*); +int FORTRAN_FUNC(iqamin)(int*, double*, int*); +int FORTRAN_FUNC(icamin)(int*, float*, int*); +int FORTRAN_FUNC(izamin)(int*, double*, int*); +int FORTRAN_FUNC(ixamin)(int*, double*, int*); + +int FORTRAN_FUNC(ismin)(int*, float*, int*); +int FORTRAN_FUNC(idmin)(int*, double*, int*); +int FORTRAN_FUNC(iqmin)(int*, double*, int*); +int FORTRAN_FUNC(icmin)(int*, float*, int*); +int FORTRAN_FUNC(izmin)(int*, double*, int*); +int FORTRAN_FUNC(ixmin)(int*, double*, int*); + +float FORTRAN_FUNC(samax) (int*, float*, int*); +double FORTRAN_FUNC(damax) (int*, double*, int*); +double FORTRAN_FUNC(qamax) (int*, double*, int*); +float FORTRAN_FUNC(scamax)(int*, float*, int*); +double FORTRAN_FUNC(dzamax)(int*, double*, int*); +double FORTRAN_FUNC(qxamax)(int*, double*, int*); + +float FORTRAN_FUNC(samin) (int*, float*, int*); +double FORTRAN_FUNC(damin) (int*, double*, int*); +double FORTRAN_FUNC(qamin) (int*, double*, int*); +float FORTRAN_FUNC(scamin)(int*, float*, int*); +double FORTRAN_FUNC(dzamin)(int*, double*, int*); +double FORTRAN_FUNC(qxamin)(int*, double*, int*); + +float FORTRAN_FUNC(smax) (int*, float*, int*); +double FORTRAN_FUNC(dmax) (int*, double*, int*); +double FORTRAN_FUNC(qmax) (int*, double*, int*); +float FORTRAN_FUNC(scmax) (int*, float*, int*); +double FORTRAN_FUNC(dzmax) (int*, double*, int*); +double FORTRAN_FUNC(qxmax) (int*, double*, int*); + +float FORTRAN_FUNC(smin) (int*, float*, int*); +double FORTRAN_FUNC(dmin) (int*, double*, int*); +double FORTRAN_FUNC(qmin) (int*, double*, int*); +float FORTRAN_FUNC(scmin) (int*, float*, int*); +double FORTRAN_FUNC(dzmin) (int*, double*, int*); +double FORTRAN_FUNC(qxmin) (int*, double*, int*); + +int FORTRAN_FUNC(sscal) (int*, float*, float*, int*); +int FORTRAN_FUNC(dscal) (int*, double*, double*, int*); +int FORTRAN_FUNC(qscal) (int*, double*, double*, int*); +int FORTRAN_FUNC(cscal) (int*, float*, float*, int*); +int FORTRAN_FUNC(zscal) (int*, double*, double*, int*); +int FORTRAN_FUNC(xscal) (int*, double*, double*, int*); +int FORTRAN_FUNC(csscal)(int*, float*, float*, int*); +int FORTRAN_FUNC(zdscal)(int*, double*, double*, int*); +int FORTRAN_FUNC(xqscal)(int*, double*, double*, int*); + +float FORTRAN_FUNC(snrm2) (int*, float*, int*); +float FORTRAN_FUNC(scnrm2)(int*, float*, int*); + +double FORTRAN_FUNC(dnrm2) (int*, double*, int*); +double FORTRAN_FUNC(qnrm2) (int*, double*, int*); +double FORTRAN_FUNC(dznrm2)(int*, double*, int*); +double FORTRAN_FUNC(qxnrm2)(int*, double*, int*); + +int FORTRAN_FUNC(srot) (int*, float*, int*, float*, int*, float*, float*); +int FORTRAN_FUNC(drot) (int*, double*, int*, double*, int*, double*, double*); +int FORTRAN_FUNC(qrot) (int*, double*, int*, double*, int*, double*, double*); +int FORTRAN_FUNC(csrot) (int*, float*, int*, float*, int*, float*, float*); +int FORTRAN_FUNC(zdrot) (int*, double*, int*, double*, int*, double*, double*); +int FORTRAN_FUNC(xqrot) (int*, double*, int*, double*, int*, double*, double*); + +int FORTRAN_FUNC(srotg) (float*, float*, float*, float*); +int FORTRAN_FUNC(drotg) (double*, double*, double*, double*); +int FORTRAN_FUNC(qrotg) (double*, double*, double*, double*); +int FORTRAN_FUNC(crotg) (float*, float*, float*, float*); +int FORTRAN_FUNC(zrotg) (double*, double*, double*, double*); +int FORTRAN_FUNC(xrotg) (double*, double*, double*, double*); + +int FORTRAN_FUNC(srotmg)(float*, float*, float*, float*, float*); +int FORTRAN_FUNC(drotmg)(double*, double*, double*, double*, double*); + +int FORTRAN_FUNC(srotm) (int*, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(drotm) (int*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(qrotm) (int*, double*, int*, double*, int*, double*); + +/* Level 2 routines*/ + +int FORTRAN_FUNC(sger)(int*, int*, float*, float*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(dger)(int*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(qger)(int*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(cgeru)(int*, int*, float*, float*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(cgerc)(int*, int*, float*, float*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(zgeru)(int*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(zgerc)(int*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xgeru)(int*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xgerc)(int*, int*, double*, double*, int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(sgemv)(const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dgemv)(const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qgemv)(const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(cgemv)(const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zgemv)(const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xgemv)(const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(strsv) (const char*, const char*, const char*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dtrsv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qtrsv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(ctrsv) (const char*, const char*, const char*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(ztrsv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xtrsv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(stpsv) (char*, char*, char*, int*, float*, float*, int*); +int FORTRAN_FUNC(dtpsv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(qtpsv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(ctpsv) (char*, char*, char*, int*, float*, float*, int*); +int FORTRAN_FUNC(ztpsv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(xtpsv) (char*, char*, char*, int*, double*, double*, int*); + +int FORTRAN_FUNC(strmv) (const char*, const char*, const char*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dtrmv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qtrmv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(ctrmv) (const char*, const char*, const char*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(ztrmv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xtrmv) (const char*, const char*, const char*, const int*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(stpmv) (char*, char*, char*, int*, float*, float*, int*); +int FORTRAN_FUNC(dtpmv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(qtpmv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(ctpmv) (char*, char*, char*, int*, float*, float*, int*); +int FORTRAN_FUNC(ztpmv) (char*, char*, char*, int*, double*, double*, int*); +int FORTRAN_FUNC(xtpmv) (char*, char*, char*, int*, double*, double*, int*); + +int FORTRAN_FUNC(stbmv) (char*, char*, char*, int*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(dtbmv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(qtbmv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(ctbmv) (char*, char*, char*, int*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(ztbmv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xtbmv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(stbsv) (char*, char*, char*, int*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(dtbsv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(qtbsv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(ctbsv) (char*, char*, char*, int*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(ztbsv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xtbsv) (char*, char*, char*, int*, int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(ssymv) (const char*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dsymv) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qsymv) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(sspmv) (char*, int*, float*, float*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(dspmv) (char*, int*, double*, double*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(qspmv) (char*, int*, double*, double*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(ssyr) (const char*, const int*, const float *, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dsyr) (const char*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qsyr) (const char*, const int*, const double*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(ssyr2) (const char*, const int*, const float *, const float*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dsyr2) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qsyr2) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(csyr2) (const char*, const int*, const float *, const float*, const int*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(zsyr2) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xsyr2) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(sspr) (char*, int*, float *, float*, int*, float*); +int FORTRAN_FUNC(dspr) (char*, int*, double*, double*, int*, double*); +int FORTRAN_FUNC(qspr) (char*, int*, double*, double*, int*, double*); + +int FORTRAN_FUNC(sspr2) (char*, int*, float *, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(dspr2) (char*, int*, double*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(qspr2) (char*, int*, double*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(cspr2) (char*, int*, float *, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(zspr2) (char*, int*, double*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(xspr2) (char*, int*, double*, double*, int*, double*, int*, double*); + +int FORTRAN_FUNC(cher) (char*, int*, float *, float*, int*, float*, int*); +int FORTRAN_FUNC(zher) (char*, int*, double*, double*, int*, double*, int*); +int FORTRAN_FUNC(xher) (char*, int*, double*, double*, int*, double*, int*); + +int FORTRAN_FUNC(chpr) (char*, int*, float *, float*, int*, float*); +int FORTRAN_FUNC(zhpr) (char*, int*, double*, double*, int*, double*); +int FORTRAN_FUNC(xhpr) (char*, int*, double*, double*, int*, double*); + +int FORTRAN_FUNC(cher2) (char*, int*, float *, float*, int*, float*, int*, float*, int*); +int FORTRAN_FUNC(zher2) (char*, int*, double*, double*, int*, double*, int*, double*, int*); +int FORTRAN_FUNC(xher2) (char*, int*, double*, double*, int*, double*, int*, double*, int*); + +int FORTRAN_FUNC(chpr2) (char*, int*, float *, float*, int*, float*, int*, float*); +int FORTRAN_FUNC(zhpr2) (char*, int*, double*, double*, int*, double*, int*, double*); +int FORTRAN_FUNC(xhpr2) (char*, int*, double*, double*, int*, double*, int*, double*); + +int FORTRAN_FUNC(chemv) (const char*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zhemv) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xhemv) (const char*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(chpmv) (char*, int*, float*, float*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zhpmv) (char*, int*, double*, double*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xhpmv) (char*, int*, double*, double*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(snorm)(char*, int*, int*, float*, int*); +int FORTRAN_FUNC(dnorm)(char*, int*, int*, double*, int*); +int FORTRAN_FUNC(cnorm)(char*, int*, int*, float*, int*); +int FORTRAN_FUNC(znorm)(char*, int*, int*, double*, int*); + +int FORTRAN_FUNC(sgbmv)(char*, int*, int*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(dgbmv)(char*, int*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(qgbmv)(char*, int*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(cgbmv)(char*, int*, int*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zgbmv)(char*, int*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xgbmv)(char*, int*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(ssbmv)(char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(dsbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(qsbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(csbmv)(char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zsbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xsbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(chbmv)(char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zhbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xhbmv)(char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +/* Level 3 routines*/ + +int FORTRAN_FUNC(sgemm)(const char*, const char*, const int*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dgemm)(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qgemm)(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(cgemm)(const char*, const char*, const int*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zgemm)(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xgemm)(const char*, const char*, const int*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(cgemm3m)(char*, char*, int*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zgemm3m)(char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xgemm3m)(char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(sge2mm)(char*, char*, char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(dge2mm)(char*, char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(cge2mm)(char*, char*, char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zge2mm)(char*, char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(strsm)(const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dtrsm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qtrsm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(ctrsm)(const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(ztrsm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xtrsm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(strmm)(const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(dtrmm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(qtrmm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(ctrmm)(const char*, const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, float*, const int*); +int FORTRAN_FUNC(ztrmm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); +int FORTRAN_FUNC(xtrmm)(const char*, const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, double*, const int*); + +int FORTRAN_FUNC(ssymm)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dsymm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qsymm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(csymm)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zsymm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xsymm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(csymm3m)(char*, char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zsymm3m)(char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xsymm3m)(char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(ssyrk)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dsyrk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qsyrk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(csyrk)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zsyrk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xsyrk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(ssyr2k)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(dsyr2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(qsyr2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(csyr2k)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zsyr2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xsyr2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(chemm)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zhemm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xhemm)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(chemm3m)(char*, char*, int*, int*, float*, float*, int*, float*, int*, float*, float*, int*); +int FORTRAN_FUNC(zhemm3m)(char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); +int FORTRAN_FUNC(xhemm3m)(char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int*); + +int FORTRAN_FUNC(cherk)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zherk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xherk)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, double*, const int*); + +int FORTRAN_FUNC(cher2k)(const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zher2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xher2k)(const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(cher2m)(const char*, const char*, const char*, const int*, const int*, const float*, const float*, const int*, const float*, const int*, const float*, float*, const int*); +int FORTRAN_FUNC(zher2m)(const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); +int FORTRAN_FUNC(xher2m)(const char*, const char*, const char*, const int*, const int*, const double*, const double*, const int*, const double*, const int*, const double*, double*, const int*); + + + + +#endif -/* DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY) */ -double ddot_(int* n, double* dx, int* incx, double* dy, int* incy); diff --git a/dspl/dspl_src/matrix.c b/dspl/dspl_src/matrix.c index a9fedea..11249f7 100644 --- a/dspl/dspl_src/matrix.c +++ b/dspl/dspl_src/matrix.c @@ -22,6 +22,7 @@ #include #include "dspl.h" #include "dspl_internal.h" +#include "blas.h" diff --git a/dspl/blas_obj/.gitignore b/dspl/libblas/.gitignore similarity index 100% rename from dspl/blas_obj/.gitignore rename to dspl/libblas/.gitignore diff --git a/dspl/libblas/SRC/._Makefile b/dspl/libblas/SRC/._Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d167b72c0eacaa2189ff8dc5e5666594abd446de GIT binary patch literal 227 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}@kSs9@gD;*Lx^ic2#CfF9795apt2wx zdw^ISO*P0gkT^dBgLra&u3ln6K~Ab(NNPoiYhH4GN@iX$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ccopy.f b/dspl/libblas/SRC/._ccopy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cdotc.f b/dspl/libblas/SRC/._cdotc.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cdotu.f b/dspl/libblas/SRC/._cdotu.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cgbmv.f b/dspl/libblas/SRC/._cgbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cgemm.f b/dspl/libblas/SRC/._cgemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cgemv.f b/dspl/libblas/SRC/._cgemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cgerc.f b/dspl/libblas/SRC/._cgerc.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cgeru.f b/dspl/libblas/SRC/._cgeru.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chbmv.f b/dspl/libblas/SRC/._chbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chemm.f b/dspl/libblas/SRC/._chemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chemv.f b/dspl/libblas/SRC/._chemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cher.f b/dspl/libblas/SRC/._cher.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cher2.f b/dspl/libblas/SRC/._cher2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cher2k.f b/dspl/libblas/SRC/._cher2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cherk.f b/dspl/libblas/SRC/._cherk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chpmv.f b/dspl/libblas/SRC/._chpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chpr.f b/dspl/libblas/SRC/._chpr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._chpr2.f b/dspl/libblas/SRC/._chpr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._crotg.f b/dspl/libblas/SRC/._crotg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cscal.f b/dspl/libblas/SRC/._cscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._csrot.f b/dspl/libblas/SRC/._csrot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._csscal.f b/dspl/libblas/SRC/._csscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._cswap.f b/dspl/libblas/SRC/._cswap.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._csymm.f b/dspl/libblas/SRC/._csymm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._csyr2k.f b/dspl/libblas/SRC/._csyr2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._csyrk.f b/dspl/libblas/SRC/._csyrk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctbmv.f b/dspl/libblas/SRC/._ctbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctbsv.f b/dspl/libblas/SRC/._ctbsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctpmv.f b/dspl/libblas/SRC/._ctpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctpsv.f b/dspl/libblas/SRC/._ctpsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctrmm.f b/dspl/libblas/SRC/._ctrmm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctrmv.f b/dspl/libblas/SRC/._ctrmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctrsm.f b/dspl/libblas/SRC/._ctrsm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ctrsv.f b/dspl/libblas/SRC/._ctrsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dasum.f b/dspl/libblas/SRC/._dasum.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._daxpy.f b/dspl/libblas/SRC/._daxpy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dcabs1.f b/dspl/libblas/SRC/._dcabs1.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dcopy.f b/dspl/libblas/SRC/._dcopy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ddot.f b/dspl/libblas/SRC/._ddot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dgbmv.f b/dspl/libblas/SRC/._dgbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dgemm.f b/dspl/libblas/SRC/._dgemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dgemv.f b/dspl/libblas/SRC/._dgemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dger.f b/dspl/libblas/SRC/._dger.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dnrm2.f b/dspl/libblas/SRC/._dnrm2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._drot.f b/dspl/libblas/SRC/._drot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._drotg.f b/dspl/libblas/SRC/._drotg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._drotm.f b/dspl/libblas/SRC/._drotm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._drotmg.f b/dspl/libblas/SRC/._drotmg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsbmv.f b/dspl/libblas/SRC/._dsbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dscal.f b/dspl/libblas/SRC/._dscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsdot.f b/dspl/libblas/SRC/._dsdot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dspmv.f b/dspl/libblas/SRC/._dspmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dspr.f b/dspl/libblas/SRC/._dspr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dspr2.f b/dspl/libblas/SRC/._dspr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dswap.f b/dspl/libblas/SRC/._dswap.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsymm.f b/dspl/libblas/SRC/._dsymm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsymv.f b/dspl/libblas/SRC/._dsymv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsyr.f b/dspl/libblas/SRC/._dsyr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsyr2.f b/dspl/libblas/SRC/._dsyr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsyr2k.f b/dspl/libblas/SRC/._dsyr2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dsyrk.f b/dspl/libblas/SRC/._dsyrk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtbmv.f b/dspl/libblas/SRC/._dtbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtbsv.f b/dspl/libblas/SRC/._dtbsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtpmv.f b/dspl/libblas/SRC/._dtpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtpsv.f b/dspl/libblas/SRC/._dtpsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtrmm.f b/dspl/libblas/SRC/._dtrmm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtrmv.f b/dspl/libblas/SRC/._dtrmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtrsm.f b/dspl/libblas/SRC/._dtrsm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dtrsv.f b/dspl/libblas/SRC/._dtrsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dzasum.f b/dspl/libblas/SRC/._dzasum.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._dznrm2.f b/dspl/libblas/SRC/._dznrm2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._icamax.f b/dspl/libblas/SRC/._icamax.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._idamax.f b/dspl/libblas/SRC/._idamax.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._isamax.f b/dspl/libblas/SRC/._isamax.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._izamax.f b/dspl/libblas/SRC/._izamax.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._lsame.f b/dspl/libblas/SRC/._lsame.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._make.inc b/dspl/libblas/SRC/._make.inc new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sasum.f b/dspl/libblas/SRC/._sasum.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._saxpy.f b/dspl/libblas/SRC/._saxpy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._scabs1.f b/dspl/libblas/SRC/._scabs1.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._scasum.f b/dspl/libblas/SRC/._scasum.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._scnrm2.f b/dspl/libblas/SRC/._scnrm2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._scopy.f b/dspl/libblas/SRC/._scopy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sdot.f b/dspl/libblas/SRC/._sdot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sdsdot.f b/dspl/libblas/SRC/._sdsdot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sgbmv.f b/dspl/libblas/SRC/._sgbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sgemm.f b/dspl/libblas/SRC/._sgemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sgemv.f b/dspl/libblas/SRC/._sgemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sger.f b/dspl/libblas/SRC/._sger.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._snrm2.f b/dspl/libblas/SRC/._snrm2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._srot.f b/dspl/libblas/SRC/._srot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._srotg.f b/dspl/libblas/SRC/._srotg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._srotm.f b/dspl/libblas/SRC/._srotm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._srotmg.f b/dspl/libblas/SRC/._srotmg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssbmv.f b/dspl/libblas/SRC/._ssbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sscal.f b/dspl/libblas/SRC/._sscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sspmv.f b/dspl/libblas/SRC/._sspmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sspr.f b/dspl/libblas/SRC/._sspr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sspr2.f b/dspl/libblas/SRC/._sspr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._sswap.f b/dspl/libblas/SRC/._sswap.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssymm.f b/dspl/libblas/SRC/._ssymm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssymv.f b/dspl/libblas/SRC/._ssymv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssyr.f b/dspl/libblas/SRC/._ssyr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssyr2.f b/dspl/libblas/SRC/._ssyr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssyr2k.f b/dspl/libblas/SRC/._ssyr2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ssyrk.f b/dspl/libblas/SRC/._ssyrk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._stbmv.f b/dspl/libblas/SRC/._stbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._stbsv.f b/dspl/libblas/SRC/._stbsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._stpmv.f b/dspl/libblas/SRC/._stpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._stpsv.f b/dspl/libblas/SRC/._stpsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._strmm.f b/dspl/libblas/SRC/._strmm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._strmv.f b/dspl/libblas/SRC/._strmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._strsm.f b/dspl/libblas/SRC/._strsm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._strsv.f b/dspl/libblas/SRC/._strsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._xerbla.f b/dspl/libblas/SRC/._xerbla.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._xerbla_array.f b/dspl/libblas/SRC/._xerbla_array.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zaxpy.f b/dspl/libblas/SRC/._zaxpy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zcopy.f b/dspl/libblas/SRC/._zcopy.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zdotc.f b/dspl/libblas/SRC/._zdotc.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zdotu.f b/dspl/libblas/SRC/._zdotu.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zdrot.f b/dspl/libblas/SRC/._zdrot.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zdscal.f b/dspl/libblas/SRC/._zdscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zgbmv.f b/dspl/libblas/SRC/._zgbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zgemm.f b/dspl/libblas/SRC/._zgemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zgemv.f b/dspl/libblas/SRC/._zgemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zgerc.f b/dspl/libblas/SRC/._zgerc.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zgeru.f b/dspl/libblas/SRC/._zgeru.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhbmv.f b/dspl/libblas/SRC/._zhbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhemm.f b/dspl/libblas/SRC/._zhemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhemv.f b/dspl/libblas/SRC/._zhemv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zher.f b/dspl/libblas/SRC/._zher.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zher2.f b/dspl/libblas/SRC/._zher2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zher2k.f b/dspl/libblas/SRC/._zher2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zherk.f b/dspl/libblas/SRC/._zherk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhpmv.f b/dspl/libblas/SRC/._zhpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhpr.f b/dspl/libblas/SRC/._zhpr.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zhpr2.f b/dspl/libblas/SRC/._zhpr2.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zrotg.f b/dspl/libblas/SRC/._zrotg.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zscal.f b/dspl/libblas/SRC/._zscal.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zswap.f b/dspl/libblas/SRC/._zswap.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zsymm.f b/dspl/libblas/SRC/._zsymm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zsyr2k.f b/dspl/libblas/SRC/._zsyr2k.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._zsyrk.f b/dspl/libblas/SRC/._zsyrk.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztbmv.f b/dspl/libblas/SRC/._ztbmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztbsv.f b/dspl/libblas/SRC/._ztbsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztpmv.f b/dspl/libblas/SRC/._ztpmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztpsv.f b/dspl/libblas/SRC/._ztpsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztrmm.f b/dspl/libblas/SRC/._ztrmm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztrmv.f b/dspl/libblas/SRC/._ztrmv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztrsm.f b/dspl/libblas/SRC/._ztrsm.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/._ztrsv.f b/dspl/libblas/SRC/._ztrsv.f new file mode 100644 index 0000000000000000000000000000000000000000..f5c130cac2e64d7ccb98a5f430be796c7d5f93e8 GIT binary patch literal 176 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDI}aUBqY_#1$j2;dkJ5(HHS(lG;wCD61n zBE&_L^Kydi!!Yl09tky A!~g&Q literal 0 HcmV?d00001 diff --git a/dspl/libblas/SRC/.gitignore b/dspl/libblas/SRC/.gitignore new file mode 100644 index 0000000..819556b --- /dev/null +++ b/dspl/libblas/SRC/.gitignore @@ -0,0 +1,9 @@ +*.o +*.so +*.dll +*.exe +*.txt +*.dat +*.bin +*.csv +*.a \ No newline at end of file diff --git a/dspl/blas_src/Makefile b/dspl/libblas/SRC/Makefile similarity index 100% rename from dspl/blas_src/Makefile rename to dspl/libblas/SRC/Makefile diff --git a/dspl/blas_src/caxpy.f b/dspl/libblas/SRC/caxpy.f similarity index 100% rename from dspl/blas_src/caxpy.f rename to dspl/libblas/SRC/caxpy.f diff --git a/dspl/blas_src/ccopy.f b/dspl/libblas/SRC/ccopy.f similarity index 100% rename from dspl/blas_src/ccopy.f rename to dspl/libblas/SRC/ccopy.f diff --git a/dspl/blas_src/cdotc.f b/dspl/libblas/SRC/cdotc.f similarity index 100% rename from dspl/blas_src/cdotc.f rename to dspl/libblas/SRC/cdotc.f diff --git a/dspl/blas_src/cdotu.f b/dspl/libblas/SRC/cdotu.f similarity index 100% rename from dspl/blas_src/cdotu.f rename to dspl/libblas/SRC/cdotu.f diff --git a/dspl/blas_src/cgbmv.f b/dspl/libblas/SRC/cgbmv.f similarity index 100% rename from dspl/blas_src/cgbmv.f rename to dspl/libblas/SRC/cgbmv.f diff --git a/dspl/blas_src/cgemm.f b/dspl/libblas/SRC/cgemm.f similarity index 100% rename from dspl/blas_src/cgemm.f rename to dspl/libblas/SRC/cgemm.f diff --git a/dspl/blas_src/cgemv.f b/dspl/libblas/SRC/cgemv.f similarity index 100% rename from dspl/blas_src/cgemv.f rename to dspl/libblas/SRC/cgemv.f diff --git a/dspl/blas_src/cgerc.f b/dspl/libblas/SRC/cgerc.f similarity index 100% rename from dspl/blas_src/cgerc.f rename to dspl/libblas/SRC/cgerc.f diff --git a/dspl/blas_src/cgeru.f b/dspl/libblas/SRC/cgeru.f similarity index 100% rename from dspl/blas_src/cgeru.f rename to dspl/libblas/SRC/cgeru.f diff --git a/dspl/blas_src/chbmv.f b/dspl/libblas/SRC/chbmv.f similarity index 100% rename from dspl/blas_src/chbmv.f rename to dspl/libblas/SRC/chbmv.f diff --git a/dspl/blas_src/chemm.f b/dspl/libblas/SRC/chemm.f similarity index 100% rename from dspl/blas_src/chemm.f rename to dspl/libblas/SRC/chemm.f diff --git a/dspl/blas_src/chemv.f b/dspl/libblas/SRC/chemv.f similarity index 100% rename from dspl/blas_src/chemv.f rename to dspl/libblas/SRC/chemv.f diff --git a/dspl/blas_src/cher.f b/dspl/libblas/SRC/cher.f similarity index 100% rename from dspl/blas_src/cher.f rename to dspl/libblas/SRC/cher.f diff --git a/dspl/blas_src/cher2.f b/dspl/libblas/SRC/cher2.f similarity index 100% rename from dspl/blas_src/cher2.f rename to dspl/libblas/SRC/cher2.f diff --git a/dspl/blas_src/cher2k.f b/dspl/libblas/SRC/cher2k.f similarity index 100% rename from dspl/blas_src/cher2k.f rename to dspl/libblas/SRC/cher2k.f diff --git a/dspl/blas_src/cherk.f b/dspl/libblas/SRC/cherk.f similarity index 100% rename from dspl/blas_src/cherk.f rename to dspl/libblas/SRC/cherk.f diff --git a/dspl/blas_src/chpmv.f b/dspl/libblas/SRC/chpmv.f similarity index 100% rename from dspl/blas_src/chpmv.f rename to dspl/libblas/SRC/chpmv.f diff --git a/dspl/blas_src/chpr.f b/dspl/libblas/SRC/chpr.f similarity index 100% rename from dspl/blas_src/chpr.f rename to dspl/libblas/SRC/chpr.f diff --git a/dspl/blas_src/chpr2.f b/dspl/libblas/SRC/chpr2.f similarity index 100% rename from dspl/blas_src/chpr2.f rename to dspl/libblas/SRC/chpr2.f diff --git a/dspl/blas_src/crotg.f b/dspl/libblas/SRC/crotg.f similarity index 100% rename from dspl/blas_src/crotg.f rename to dspl/libblas/SRC/crotg.f diff --git a/dspl/blas_src/cscal.f b/dspl/libblas/SRC/cscal.f similarity index 100% rename from dspl/blas_src/cscal.f rename to dspl/libblas/SRC/cscal.f diff --git a/dspl/blas_src/csrot.f b/dspl/libblas/SRC/csrot.f similarity index 100% rename from dspl/blas_src/csrot.f rename to dspl/libblas/SRC/csrot.f diff --git a/dspl/blas_src/csscal.f b/dspl/libblas/SRC/csscal.f similarity index 100% rename from dspl/blas_src/csscal.f rename to dspl/libblas/SRC/csscal.f diff --git a/dspl/blas_src/cswap.f b/dspl/libblas/SRC/cswap.f similarity index 100% rename from dspl/blas_src/cswap.f rename to dspl/libblas/SRC/cswap.f diff --git a/dspl/blas_src/csymm.f b/dspl/libblas/SRC/csymm.f similarity index 100% rename from dspl/blas_src/csymm.f rename to dspl/libblas/SRC/csymm.f diff --git a/dspl/blas_src/csyr2k.f b/dspl/libblas/SRC/csyr2k.f similarity index 100% rename from dspl/blas_src/csyr2k.f rename to dspl/libblas/SRC/csyr2k.f diff --git a/dspl/blas_src/csyrk.f b/dspl/libblas/SRC/csyrk.f similarity index 100% rename from dspl/blas_src/csyrk.f rename to dspl/libblas/SRC/csyrk.f diff --git a/dspl/blas_src/ctbmv.f b/dspl/libblas/SRC/ctbmv.f similarity index 100% rename from dspl/blas_src/ctbmv.f rename to dspl/libblas/SRC/ctbmv.f diff --git a/dspl/blas_src/ctbsv.f b/dspl/libblas/SRC/ctbsv.f similarity index 100% rename from dspl/blas_src/ctbsv.f rename to dspl/libblas/SRC/ctbsv.f diff --git a/dspl/blas_src/ctpmv.f b/dspl/libblas/SRC/ctpmv.f similarity index 100% rename from dspl/blas_src/ctpmv.f rename to dspl/libblas/SRC/ctpmv.f diff --git a/dspl/blas_src/ctpsv.f b/dspl/libblas/SRC/ctpsv.f similarity index 100% rename from dspl/blas_src/ctpsv.f rename to dspl/libblas/SRC/ctpsv.f diff --git a/dspl/blas_src/ctrmm.f b/dspl/libblas/SRC/ctrmm.f similarity index 100% rename from dspl/blas_src/ctrmm.f rename to dspl/libblas/SRC/ctrmm.f diff --git a/dspl/blas_src/ctrmv.f b/dspl/libblas/SRC/ctrmv.f similarity index 100% rename from dspl/blas_src/ctrmv.f rename to dspl/libblas/SRC/ctrmv.f diff --git a/dspl/blas_src/ctrsm.f b/dspl/libblas/SRC/ctrsm.f similarity index 100% rename from dspl/blas_src/ctrsm.f rename to dspl/libblas/SRC/ctrsm.f diff --git a/dspl/blas_src/ctrsv.f b/dspl/libblas/SRC/ctrsv.f similarity index 100% rename from dspl/blas_src/ctrsv.f rename to dspl/libblas/SRC/ctrsv.f diff --git a/dspl/blas_src/dasum.f b/dspl/libblas/SRC/dasum.f similarity index 100% rename from dspl/blas_src/dasum.f rename to dspl/libblas/SRC/dasum.f diff --git a/dspl/blas_src/daxpy.f b/dspl/libblas/SRC/daxpy.f similarity index 100% rename from dspl/blas_src/daxpy.f rename to dspl/libblas/SRC/daxpy.f diff --git a/dspl/blas_src/dcabs1.f b/dspl/libblas/SRC/dcabs1.f similarity index 100% rename from dspl/blas_src/dcabs1.f rename to dspl/libblas/SRC/dcabs1.f diff --git a/dspl/blas_src/dcopy.f b/dspl/libblas/SRC/dcopy.f similarity index 100% rename from dspl/blas_src/dcopy.f rename to dspl/libblas/SRC/dcopy.f diff --git a/dspl/blas_src/ddot.f b/dspl/libblas/SRC/ddot.f similarity index 100% rename from dspl/blas_src/ddot.f rename to dspl/libblas/SRC/ddot.f diff --git a/dspl/blas_src/dgbmv.f b/dspl/libblas/SRC/dgbmv.f similarity index 100% rename from dspl/blas_src/dgbmv.f rename to dspl/libblas/SRC/dgbmv.f diff --git a/dspl/blas_src/dgemm.f b/dspl/libblas/SRC/dgemm.f similarity index 100% rename from dspl/blas_src/dgemm.f rename to dspl/libblas/SRC/dgemm.f diff --git a/dspl/blas_src/dgemv.f b/dspl/libblas/SRC/dgemv.f similarity index 100% rename from dspl/blas_src/dgemv.f rename to dspl/libblas/SRC/dgemv.f diff --git a/dspl/blas_src/dger.f b/dspl/libblas/SRC/dger.f similarity index 100% rename from dspl/blas_src/dger.f rename to dspl/libblas/SRC/dger.f diff --git a/dspl/blas_src/dnrm2.f b/dspl/libblas/SRC/dnrm2.f similarity index 100% rename from dspl/blas_src/dnrm2.f rename to dspl/libblas/SRC/dnrm2.f diff --git a/dspl/blas_src/drot.f b/dspl/libblas/SRC/drot.f similarity index 100% rename from dspl/blas_src/drot.f rename to dspl/libblas/SRC/drot.f diff --git a/dspl/blas_src/drotg.f b/dspl/libblas/SRC/drotg.f similarity index 100% rename from dspl/blas_src/drotg.f rename to dspl/libblas/SRC/drotg.f diff --git a/dspl/blas_src/drotm.f b/dspl/libblas/SRC/drotm.f similarity index 100% rename from dspl/blas_src/drotm.f rename to dspl/libblas/SRC/drotm.f diff --git a/dspl/blas_src/drotmg.f b/dspl/libblas/SRC/drotmg.f similarity index 100% rename from dspl/blas_src/drotmg.f rename to dspl/libblas/SRC/drotmg.f diff --git a/dspl/blas_src/dsbmv.f b/dspl/libblas/SRC/dsbmv.f similarity index 100% rename from dspl/blas_src/dsbmv.f rename to dspl/libblas/SRC/dsbmv.f diff --git a/dspl/blas_src/dscal.f b/dspl/libblas/SRC/dscal.f similarity index 100% rename from dspl/blas_src/dscal.f rename to dspl/libblas/SRC/dscal.f diff --git a/dspl/blas_src/dsdot.f b/dspl/libblas/SRC/dsdot.f similarity index 100% rename from dspl/blas_src/dsdot.f rename to dspl/libblas/SRC/dsdot.f diff --git a/dspl/blas_src/dspmv.f b/dspl/libblas/SRC/dspmv.f similarity index 100% rename from dspl/blas_src/dspmv.f rename to dspl/libblas/SRC/dspmv.f diff --git a/dspl/blas_src/dspr.f b/dspl/libblas/SRC/dspr.f similarity index 100% rename from dspl/blas_src/dspr.f rename to dspl/libblas/SRC/dspr.f diff --git a/dspl/blas_src/dspr2.f b/dspl/libblas/SRC/dspr2.f similarity index 100% rename from dspl/blas_src/dspr2.f rename to dspl/libblas/SRC/dspr2.f diff --git a/dspl/blas_src/dswap.f b/dspl/libblas/SRC/dswap.f similarity index 100% rename from dspl/blas_src/dswap.f rename to dspl/libblas/SRC/dswap.f diff --git a/dspl/blas_src/dsymm.f b/dspl/libblas/SRC/dsymm.f similarity index 100% rename from dspl/blas_src/dsymm.f rename to dspl/libblas/SRC/dsymm.f diff --git a/dspl/blas_src/dsymv.f b/dspl/libblas/SRC/dsymv.f similarity index 100% rename from dspl/blas_src/dsymv.f rename to dspl/libblas/SRC/dsymv.f diff --git a/dspl/blas_src/dsyr.f b/dspl/libblas/SRC/dsyr.f similarity index 100% rename from dspl/blas_src/dsyr.f rename to dspl/libblas/SRC/dsyr.f diff --git a/dspl/blas_src/dsyr2.f b/dspl/libblas/SRC/dsyr2.f similarity index 100% rename from dspl/blas_src/dsyr2.f rename to dspl/libblas/SRC/dsyr2.f diff --git a/dspl/blas_src/dsyr2k.f b/dspl/libblas/SRC/dsyr2k.f similarity index 100% rename from dspl/blas_src/dsyr2k.f rename to dspl/libblas/SRC/dsyr2k.f diff --git a/dspl/blas_src/dsyrk.f b/dspl/libblas/SRC/dsyrk.f similarity index 100% rename from dspl/blas_src/dsyrk.f rename to dspl/libblas/SRC/dsyrk.f diff --git a/dspl/blas_src/dtbmv.f b/dspl/libblas/SRC/dtbmv.f similarity index 100% rename from dspl/blas_src/dtbmv.f rename to dspl/libblas/SRC/dtbmv.f diff --git a/dspl/blas_src/dtbsv.f b/dspl/libblas/SRC/dtbsv.f similarity index 100% rename from dspl/blas_src/dtbsv.f rename to dspl/libblas/SRC/dtbsv.f diff --git a/dspl/blas_src/dtpmv.f b/dspl/libblas/SRC/dtpmv.f similarity index 100% rename from dspl/blas_src/dtpmv.f rename to dspl/libblas/SRC/dtpmv.f diff --git a/dspl/blas_src/dtpsv.f b/dspl/libblas/SRC/dtpsv.f similarity index 100% rename from dspl/blas_src/dtpsv.f rename to dspl/libblas/SRC/dtpsv.f diff --git a/dspl/blas_src/dtrmm.f b/dspl/libblas/SRC/dtrmm.f similarity index 100% rename from dspl/blas_src/dtrmm.f rename to dspl/libblas/SRC/dtrmm.f diff --git a/dspl/blas_src/dtrmv.f b/dspl/libblas/SRC/dtrmv.f similarity index 100% rename from dspl/blas_src/dtrmv.f rename to dspl/libblas/SRC/dtrmv.f diff --git a/dspl/blas_src/dtrsm.f b/dspl/libblas/SRC/dtrsm.f similarity index 100% rename from dspl/blas_src/dtrsm.f rename to dspl/libblas/SRC/dtrsm.f diff --git a/dspl/blas_src/dtrsv.f b/dspl/libblas/SRC/dtrsv.f similarity index 100% rename from dspl/blas_src/dtrsv.f rename to dspl/libblas/SRC/dtrsv.f diff --git a/dspl/blas_src/dzasum.f b/dspl/libblas/SRC/dzasum.f similarity index 100% rename from dspl/blas_src/dzasum.f rename to dspl/libblas/SRC/dzasum.f diff --git a/dspl/blas_src/dznrm2.f b/dspl/libblas/SRC/dznrm2.f similarity index 100% rename from dspl/blas_src/dznrm2.f rename to dspl/libblas/SRC/dznrm2.f diff --git a/dspl/blas_src/icamax.f b/dspl/libblas/SRC/icamax.f similarity index 100% rename from dspl/blas_src/icamax.f rename to dspl/libblas/SRC/icamax.f diff --git a/dspl/blas_src/idamax.f b/dspl/libblas/SRC/idamax.f similarity index 100% rename from dspl/blas_src/idamax.f rename to dspl/libblas/SRC/idamax.f diff --git a/dspl/blas_src/isamax.f b/dspl/libblas/SRC/isamax.f similarity index 100% rename from dspl/blas_src/isamax.f rename to dspl/libblas/SRC/isamax.f diff --git a/dspl/blas_src/izamax.f b/dspl/libblas/SRC/izamax.f similarity index 100% rename from dspl/blas_src/izamax.f rename to dspl/libblas/SRC/izamax.f diff --git a/dspl/blas_src/lsame.f b/dspl/libblas/SRC/lsame.f similarity index 100% rename from dspl/blas_src/lsame.f rename to dspl/libblas/SRC/lsame.f diff --git a/dspl/blas_src/make.inc b/dspl/libblas/SRC/make.inc similarity index 97% rename from dspl/blas_src/make.inc rename to dspl/libblas/SRC/make.inc index 7ca5f47..e3f9610 100644 --- a/dspl/blas_src/make.inc +++ b/dspl/libblas/SRC/make.inc @@ -31,4 +31,4 @@ RANLIB = ranlib # # The location and name of the Reference BLAS library. # -BLASLIB = blas$(PLAT).a +BLASLIB = ../libblas.a diff --git a/dspl/blas_src/sasum.f b/dspl/libblas/SRC/sasum.f similarity index 100% rename from dspl/blas_src/sasum.f rename to dspl/libblas/SRC/sasum.f diff --git a/dspl/blas_src/saxpy.f b/dspl/libblas/SRC/saxpy.f similarity index 100% rename from dspl/blas_src/saxpy.f rename to dspl/libblas/SRC/saxpy.f diff --git a/dspl/blas_src/scabs1.f b/dspl/libblas/SRC/scabs1.f similarity index 100% rename from dspl/blas_src/scabs1.f rename to dspl/libblas/SRC/scabs1.f diff --git a/dspl/blas_src/scasum.f b/dspl/libblas/SRC/scasum.f similarity index 100% rename from dspl/blas_src/scasum.f rename to dspl/libblas/SRC/scasum.f diff --git a/dspl/blas_src/scnrm2.f b/dspl/libblas/SRC/scnrm2.f similarity index 100% rename from dspl/blas_src/scnrm2.f rename to dspl/libblas/SRC/scnrm2.f diff --git a/dspl/blas_src/scopy.f b/dspl/libblas/SRC/scopy.f similarity index 100% rename from dspl/blas_src/scopy.f rename to dspl/libblas/SRC/scopy.f diff --git a/dspl/blas_src/sdot.f b/dspl/libblas/SRC/sdot.f similarity index 100% rename from dspl/blas_src/sdot.f rename to dspl/libblas/SRC/sdot.f diff --git a/dspl/blas_src/sdsdot.f b/dspl/libblas/SRC/sdsdot.f similarity index 100% rename from dspl/blas_src/sdsdot.f rename to dspl/libblas/SRC/sdsdot.f diff --git a/dspl/blas_src/sgbmv.f b/dspl/libblas/SRC/sgbmv.f similarity index 100% rename from dspl/blas_src/sgbmv.f rename to dspl/libblas/SRC/sgbmv.f diff --git a/dspl/blas_src/sgemm.f b/dspl/libblas/SRC/sgemm.f similarity index 100% rename from dspl/blas_src/sgemm.f rename to dspl/libblas/SRC/sgemm.f diff --git a/dspl/blas_src/sgemv.f b/dspl/libblas/SRC/sgemv.f similarity index 100% rename from dspl/blas_src/sgemv.f rename to dspl/libblas/SRC/sgemv.f diff --git a/dspl/blas_src/sger.f b/dspl/libblas/SRC/sger.f similarity index 100% rename from dspl/blas_src/sger.f rename to dspl/libblas/SRC/sger.f diff --git a/dspl/blas_src/snrm2.f b/dspl/libblas/SRC/snrm2.f similarity index 100% rename from dspl/blas_src/snrm2.f rename to dspl/libblas/SRC/snrm2.f diff --git a/dspl/blas_src/srot.f b/dspl/libblas/SRC/srot.f similarity index 100% rename from dspl/blas_src/srot.f rename to dspl/libblas/SRC/srot.f diff --git a/dspl/blas_src/srotg.f b/dspl/libblas/SRC/srotg.f similarity index 100% rename from dspl/blas_src/srotg.f rename to dspl/libblas/SRC/srotg.f diff --git a/dspl/blas_src/srotm.f b/dspl/libblas/SRC/srotm.f similarity index 100% rename from dspl/blas_src/srotm.f rename to dspl/libblas/SRC/srotm.f diff --git a/dspl/blas_src/srotmg.f b/dspl/libblas/SRC/srotmg.f similarity index 100% rename from dspl/blas_src/srotmg.f rename to dspl/libblas/SRC/srotmg.f diff --git a/dspl/blas_src/ssbmv.f b/dspl/libblas/SRC/ssbmv.f similarity index 100% rename from dspl/blas_src/ssbmv.f rename to dspl/libblas/SRC/ssbmv.f diff --git a/dspl/blas_src/sscal.f b/dspl/libblas/SRC/sscal.f similarity index 100% rename from dspl/blas_src/sscal.f rename to dspl/libblas/SRC/sscal.f diff --git a/dspl/blas_src/sspmv.f b/dspl/libblas/SRC/sspmv.f similarity index 100% rename from dspl/blas_src/sspmv.f rename to dspl/libblas/SRC/sspmv.f diff --git a/dspl/blas_src/sspr.f b/dspl/libblas/SRC/sspr.f similarity index 100% rename from dspl/blas_src/sspr.f rename to dspl/libblas/SRC/sspr.f diff --git a/dspl/blas_src/sspr2.f b/dspl/libblas/SRC/sspr2.f similarity index 100% rename from dspl/blas_src/sspr2.f rename to dspl/libblas/SRC/sspr2.f diff --git a/dspl/blas_src/sswap.f b/dspl/libblas/SRC/sswap.f similarity index 100% rename from dspl/blas_src/sswap.f rename to dspl/libblas/SRC/sswap.f diff --git a/dspl/blas_src/ssymm.f b/dspl/libblas/SRC/ssymm.f similarity index 100% rename from dspl/blas_src/ssymm.f rename to dspl/libblas/SRC/ssymm.f diff --git a/dspl/blas_src/ssymv.f b/dspl/libblas/SRC/ssymv.f similarity index 100% rename from dspl/blas_src/ssymv.f rename to dspl/libblas/SRC/ssymv.f diff --git a/dspl/blas_src/ssyr.f b/dspl/libblas/SRC/ssyr.f similarity index 100% rename from dspl/blas_src/ssyr.f rename to dspl/libblas/SRC/ssyr.f diff --git a/dspl/blas_src/ssyr2.f b/dspl/libblas/SRC/ssyr2.f similarity index 100% rename from dspl/blas_src/ssyr2.f rename to dspl/libblas/SRC/ssyr2.f diff --git a/dspl/blas_src/ssyr2k.f b/dspl/libblas/SRC/ssyr2k.f similarity index 100% rename from dspl/blas_src/ssyr2k.f rename to dspl/libblas/SRC/ssyr2k.f diff --git a/dspl/blas_src/ssyrk.f b/dspl/libblas/SRC/ssyrk.f similarity index 100% rename from dspl/blas_src/ssyrk.f rename to dspl/libblas/SRC/ssyrk.f diff --git a/dspl/blas_src/stbmv.f b/dspl/libblas/SRC/stbmv.f similarity index 100% rename from dspl/blas_src/stbmv.f rename to dspl/libblas/SRC/stbmv.f diff --git a/dspl/blas_src/stbsv.f b/dspl/libblas/SRC/stbsv.f similarity index 100% rename from dspl/blas_src/stbsv.f rename to dspl/libblas/SRC/stbsv.f diff --git a/dspl/blas_src/stpmv.f b/dspl/libblas/SRC/stpmv.f similarity index 100% rename from dspl/blas_src/stpmv.f rename to dspl/libblas/SRC/stpmv.f diff --git a/dspl/blas_src/stpsv.f b/dspl/libblas/SRC/stpsv.f similarity index 100% rename from dspl/blas_src/stpsv.f rename to dspl/libblas/SRC/stpsv.f diff --git a/dspl/blas_src/strmm.f b/dspl/libblas/SRC/strmm.f similarity index 100% rename from dspl/blas_src/strmm.f rename to dspl/libblas/SRC/strmm.f diff --git a/dspl/blas_src/strmv.f b/dspl/libblas/SRC/strmv.f similarity index 100% rename from dspl/blas_src/strmv.f rename to dspl/libblas/SRC/strmv.f diff --git a/dspl/blas_src/strsm.f b/dspl/libblas/SRC/strsm.f similarity index 100% rename from dspl/blas_src/strsm.f rename to dspl/libblas/SRC/strsm.f diff --git a/dspl/blas_src/strsv.f b/dspl/libblas/SRC/strsv.f similarity index 100% rename from dspl/blas_src/strsv.f rename to dspl/libblas/SRC/strsv.f diff --git a/dspl/blas_src/xerbla.f b/dspl/libblas/SRC/xerbla.f similarity index 100% rename from dspl/blas_src/xerbla.f rename to dspl/libblas/SRC/xerbla.f diff --git a/dspl/blas_src/xerbla_array.f b/dspl/libblas/SRC/xerbla_array.f similarity index 100% rename from dspl/blas_src/xerbla_array.f rename to dspl/libblas/SRC/xerbla_array.f diff --git a/dspl/blas_src/zaxpy.f b/dspl/libblas/SRC/zaxpy.f similarity index 100% rename from dspl/blas_src/zaxpy.f rename to dspl/libblas/SRC/zaxpy.f diff --git a/dspl/blas_src/zcopy.f b/dspl/libblas/SRC/zcopy.f similarity index 100% rename from dspl/blas_src/zcopy.f rename to dspl/libblas/SRC/zcopy.f diff --git a/dspl/blas_src/zdotc.f b/dspl/libblas/SRC/zdotc.f similarity index 100% rename from dspl/blas_src/zdotc.f rename to dspl/libblas/SRC/zdotc.f diff --git a/dspl/blas_src/zdotu.f b/dspl/libblas/SRC/zdotu.f similarity index 100% rename from dspl/blas_src/zdotu.f rename to dspl/libblas/SRC/zdotu.f diff --git a/dspl/blas_src/zdrot.f b/dspl/libblas/SRC/zdrot.f similarity index 100% rename from dspl/blas_src/zdrot.f rename to dspl/libblas/SRC/zdrot.f diff --git a/dspl/blas_src/zdscal.f b/dspl/libblas/SRC/zdscal.f similarity index 100% rename from dspl/blas_src/zdscal.f rename to dspl/libblas/SRC/zdscal.f diff --git a/dspl/blas_src/zgbmv.f b/dspl/libblas/SRC/zgbmv.f similarity index 100% rename from dspl/blas_src/zgbmv.f rename to dspl/libblas/SRC/zgbmv.f diff --git a/dspl/blas_src/zgemm.f b/dspl/libblas/SRC/zgemm.f similarity index 100% rename from dspl/blas_src/zgemm.f rename to dspl/libblas/SRC/zgemm.f diff --git a/dspl/blas_src/zgemv.f b/dspl/libblas/SRC/zgemv.f similarity index 100% rename from dspl/blas_src/zgemv.f rename to dspl/libblas/SRC/zgemv.f diff --git a/dspl/blas_src/zgerc.f b/dspl/libblas/SRC/zgerc.f similarity index 100% rename from dspl/blas_src/zgerc.f rename to dspl/libblas/SRC/zgerc.f diff --git a/dspl/blas_src/zgeru.f b/dspl/libblas/SRC/zgeru.f similarity index 100% rename from dspl/blas_src/zgeru.f rename to dspl/libblas/SRC/zgeru.f diff --git a/dspl/blas_src/zhbmv.f b/dspl/libblas/SRC/zhbmv.f similarity index 100% rename from dspl/blas_src/zhbmv.f rename to dspl/libblas/SRC/zhbmv.f diff --git a/dspl/blas_src/zhemm.f b/dspl/libblas/SRC/zhemm.f similarity index 100% rename from dspl/blas_src/zhemm.f rename to dspl/libblas/SRC/zhemm.f diff --git a/dspl/blas_src/zhemv.f b/dspl/libblas/SRC/zhemv.f similarity index 100% rename from dspl/blas_src/zhemv.f rename to dspl/libblas/SRC/zhemv.f diff --git a/dspl/blas_src/zher.f b/dspl/libblas/SRC/zher.f similarity index 100% rename from dspl/blas_src/zher.f rename to dspl/libblas/SRC/zher.f diff --git a/dspl/blas_src/zher2.f b/dspl/libblas/SRC/zher2.f similarity index 100% rename from dspl/blas_src/zher2.f rename to dspl/libblas/SRC/zher2.f diff --git a/dspl/blas_src/zher2k.f b/dspl/libblas/SRC/zher2k.f similarity index 100% rename from dspl/blas_src/zher2k.f rename to dspl/libblas/SRC/zher2k.f diff --git a/dspl/blas_src/zherk.f b/dspl/libblas/SRC/zherk.f similarity index 100% rename from dspl/blas_src/zherk.f rename to dspl/libblas/SRC/zherk.f diff --git a/dspl/blas_src/zhpmv.f b/dspl/libblas/SRC/zhpmv.f similarity index 100% rename from dspl/blas_src/zhpmv.f rename to dspl/libblas/SRC/zhpmv.f diff --git a/dspl/blas_src/zhpr.f b/dspl/libblas/SRC/zhpr.f similarity index 100% rename from dspl/blas_src/zhpr.f rename to dspl/libblas/SRC/zhpr.f diff --git a/dspl/blas_src/zhpr2.f b/dspl/libblas/SRC/zhpr2.f similarity index 100% rename from dspl/blas_src/zhpr2.f rename to dspl/libblas/SRC/zhpr2.f diff --git a/dspl/blas_src/zrotg.f b/dspl/libblas/SRC/zrotg.f similarity index 100% rename from dspl/blas_src/zrotg.f rename to dspl/libblas/SRC/zrotg.f diff --git a/dspl/blas_src/zscal.f b/dspl/libblas/SRC/zscal.f similarity index 100% rename from dspl/blas_src/zscal.f rename to dspl/libblas/SRC/zscal.f diff --git a/dspl/blas_src/zswap.f b/dspl/libblas/SRC/zswap.f similarity index 100% rename from dspl/blas_src/zswap.f rename to dspl/libblas/SRC/zswap.f diff --git a/dspl/blas_src/zsymm.f b/dspl/libblas/SRC/zsymm.f similarity index 100% rename from dspl/blas_src/zsymm.f rename to dspl/libblas/SRC/zsymm.f diff --git a/dspl/blas_src/zsyr2k.f b/dspl/libblas/SRC/zsyr2k.f similarity index 100% rename from dspl/blas_src/zsyr2k.f rename to dspl/libblas/SRC/zsyr2k.f diff --git a/dspl/blas_src/zsyrk.f b/dspl/libblas/SRC/zsyrk.f similarity index 100% rename from dspl/blas_src/zsyrk.f rename to dspl/libblas/SRC/zsyrk.f diff --git a/dspl/blas_src/ztbmv.f b/dspl/libblas/SRC/ztbmv.f similarity index 100% rename from dspl/blas_src/ztbmv.f rename to dspl/libblas/SRC/ztbmv.f diff --git a/dspl/blas_src/ztbsv.f b/dspl/libblas/SRC/ztbsv.f similarity index 100% rename from dspl/blas_src/ztbsv.f rename to dspl/libblas/SRC/ztbsv.f diff --git a/dspl/blas_src/ztpmv.f b/dspl/libblas/SRC/ztpmv.f similarity index 100% rename from dspl/blas_src/ztpmv.f rename to dspl/libblas/SRC/ztpmv.f diff --git a/dspl/blas_src/ztpsv.f b/dspl/libblas/SRC/ztpsv.f similarity index 100% rename from dspl/blas_src/ztpsv.f rename to dspl/libblas/SRC/ztpsv.f diff --git a/dspl/blas_src/ztrmm.f b/dspl/libblas/SRC/ztrmm.f similarity index 100% rename from dspl/blas_src/ztrmm.f rename to dspl/libblas/SRC/ztrmm.f diff --git a/dspl/blas_src/ztrmv.f b/dspl/libblas/SRC/ztrmv.f similarity index 100% rename from dspl/blas_src/ztrmv.f rename to dspl/libblas/SRC/ztrmv.f diff --git a/dspl/blas_src/ztrsm.f b/dspl/libblas/SRC/ztrsm.f similarity index 100% rename from dspl/blas_src/ztrsm.f rename to dspl/libblas/SRC/ztrsm.f diff --git a/dspl/blas_src/ztrsv.f b/dspl/libblas/SRC/ztrsv.f similarity index 100% rename from dspl/blas_src/ztrsv.f rename to dspl/libblas/SRC/ztrsv.f diff --git a/dspl/liblapack/INSTALL/.gitignore b/dspl/liblapack/INSTALL/.gitignore new file mode 100644 index 0000000..819556b --- /dev/null +++ b/dspl/liblapack/INSTALL/.gitignore @@ -0,0 +1,9 @@ +*.o +*.so +*.dll +*.exe +*.txt +*.dat +*.bin +*.csv +*.a \ No newline at end of file diff --git a/dspl/liblapack/INSTALL/LAPACK_version.f b/dspl/liblapack/INSTALL/LAPACK_version.f new file mode 100644 index 0000000..0902e12 --- /dev/null +++ b/dspl/liblapack/INSTALL/LAPACK_version.f @@ -0,0 +1,41 @@ +*> \brief \b LAPACK_VERSION +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM LAPACK_VERSION +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + PROGRAM LAPACK_VERSION +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + INTEGER MAJOR, MINOR, PATCH +* .. +* .. External Subroutines .. + EXTERNAL ILAVER +* + CALL ILAVER ( MAJOR, MINOR, PATCH ) + WRITE(*,*) "LAPACK ",MAJOR,".",MINOR,".",PATCH +* + END diff --git a/dspl/liblapack/INSTALL/Makefile b/dspl/liblapack/INSTALL/Makefile new file mode 100644 index 0000000..150a061 --- /dev/null +++ b/dspl/liblapack/INSTALL/Makefile @@ -0,0 +1,50 @@ +include ../make.inc + +all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion + +testlsame: lsame.o lsametst.o + $(LOADER) $(LOADOPTS) -o $@ $^ + +testslamch: slamch.o lsame.o slamchtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ + +testdlamch: dlamch.o lsame.o dlamchtst.o + $(LOADER) $(LOADOPTS) -o $@ $^ + +testsecond: second_$(TIMER).o secondtst.o + @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" + $(LOADER) $(LOADOPTS) -o $@ $^ + +testdsecnd: dsecnd_$(TIMER).o dsecndtst.o + @echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)" + $(LOADER) $(LOADOPTS) -o $@ $^ + +testieee: tstiee.o + $(LOADER) $(LOADOPTS) -o $@ $^ + +testversion: ilaver.o LAPACK_version.o + $(LOADER) $(LOADOPTS) -o $@ $^ + +run: all + ./testlsame + ./testslamch + ./testdlamch + ./testsecond + ./testdsecnd + ./testieee + ./testversion + +clean: cleanobj cleanexe cleantest +cleanobj: + rm -f *.o +cleanexe: + rm -f test* +cleantest: + rm -f core + +.SUFFIXES: .o .f +.f.o: + $(FORTRAN) $(OPTS) -c -o $@ $< + +slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< diff --git a/dspl/liblapack/INSTALL/dlamch.f b/dspl/liblapack/INSTALL/dlamch.f new file mode 100644 index 0000000..76f875c --- /dev/null +++ b/dspl/liblapack/INSTALL/dlamch.f @@ -0,0 +1,189 @@ +*> \brief \b DLAMCH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMCH determines double precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by DLAMCH: +*> = 'E' or 'e', DLAMCH := eps +*> = 'S' or 's , DLAMCH := sfmin +*> = 'B' or 'b', DLAMCH := base +*> = 'P' or 'p', DLAMCH := eps*base +*> = 'N' or 'n', DLAMCH := t +*> = 'R' or 'r', DLAMCH := rnd +*> = 'M' or 'm', DLAMCH := emin +*> = 'U' or 'u', DLAMCH := rmin +*> = 'L' or 'l', DLAMCH := emax +*> = 'O' or 'o', DLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION RND, EPS, SFMIN, SMALL, RMACH +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DIGITS, EPSILON, HUGE, MAXEXPONENT, + $ MINEXPONENT, RADIX, TINY +* .. +* .. Executable Statements .. +* +* +* Assume rounding, not chopping. Always. +* + RND = ONE +* + IF( ONE.EQ.RND ) THEN + EPS = EPSILON(ZERO) * 0.5 + ELSE + EPS = EPSILON(ZERO) + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + SFMIN = TINY(ZERO) + SMALL = ONE / HUGE(ZERO) + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = EPS * RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = DIGITS(ZERO) + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = MINEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = tiny(zero) + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = MAXEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = HUGE(ZERO) + ELSE + RMACH = ZERO + END IF +* + DLAMCH = RMACH + RETURN +* +* End of DLAMCH +* + END +************************************************************************ +*> \brief \b DLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date December 2016 +*> \ingroup auxOTHERauxiliary +*> +*> \param[in] A +*> \verbatim +*> A is a DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is a DOUBLE PRECISION +*> The values A and B. +*> \endverbatim +*> + DOUBLE PRECISION FUNCTION DLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + DLAMC3 = A + B +* + RETURN +* +* End of DLAMC3 +* + END +* +************************************************************************ diff --git a/dspl/liblapack/INSTALL/dlamchf77.f b/dspl/liblapack/INSTALL/dlamchf77.f new file mode 100644 index 0000000..3efd215 --- /dev/null +++ b/dspl/liblapack/INSTALL/dlamchf77.f @@ -0,0 +1,919 @@ +*> \brief \b DLAMCHF77 deprecated +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMCHF77 determines double precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by DLAMCH: +*> = 'E' or 'e', DLAMCH := eps +*> = 'S' or 's , DLAMCH := sfmin +*> = 'B' or 'b', DLAMCH := base +*> = 'P' or 'p', DLAMCH := eps*base +*> = 'N' or 'n', DLAMCH := t +*> = 'R' or 'r', DLAMCH := rnd +*> = 'M' or 'm', DLAMCH := emin +*> = 'U' or 'u', DLAMCH := rmin +*> = 'L' or 'l', DLAMCH := emax +*> = 'O' or 'o', DLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + DOUBLE PRECISION BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + DLAMCH = RMACH + FIRST = .FALSE. + RETURN +* +* End of DLAMCH +* + END +* +************************************************************************ +* +*> \brief \b DLAMC1 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC1 determines the machine parameters given by BETA, T, RND, and +*> IEEE1. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> The number of ( BETA ) digits in the mantissa. +*> \endverbatim +*> +*> \param[out] RND +*> \verbatim +*> Specifies whether proper rounding ( RND = .TRUE. ) or +*> chopping ( RND = .FALSE. ) occurs in addition. This may not +*> be a reliable guide to the way in which the machine performs +*> its arithmetic. +*> \endverbatim +*> +*> \param[out] IEEE1 +*> \verbatim +*> Specifies whether rounding appears to be done in the IEEE +*> 'round to nearest' style. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date April 2012 +*> \ingroup auxOTHERauxiliary +*> +*> \details \b Further \b Details +*> \verbatim +*> +*> The routine is based on the routine ENVRON by Malcolm and +*> incorporates suggestions by Gentleman and Marovich. See +*> +*> Malcolm M. A. (1972) Algorithms to reveal properties of +*> floating-point arithmetic. Comms. of the ACM, 15, 949-951. +*> +*> Gentleman W. M. and Marovich S. B. (1974) More on algorithms +*> that reveal properties of floating point arithmetic units. +*> Comms. of the ACM, 17, 276-277. +*> \endverbatim +*> + SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + DOUBLE PRECISION A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = DLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = DLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = DLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = DLAMC3( B / 2, -B / 100 ) + C = DLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = DLAMC3( B / 2, B / 100 ) + C = DLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = DLAMC3( B / 2, A ) + T2 = DLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = DLAMC3( A, ONE ) + C = DLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + FIRST = .FALSE. + RETURN +* +* End of DLAMC1 +* + END +* +************************************************************************ +* +*> \brief \b DLAMC2 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC2 determines the machine parameters specified in its argument +*> list. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date April 2012 +*> \ingroup auxOTHERauxiliary +*> +*> \param[out] BETA +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> The number of ( BETA ) digits in the mantissa. +*> \endverbatim +*> +*> \param[out] RND +*> \verbatim +*> Specifies whether proper rounding ( RND = .TRUE. ) or +*> chopping ( RND = .FALSE. ) occurs in addition. This may not +*> be a reliable guide to the way in which the machine performs +*> its arithmetic. +*> \endverbatim +*> +*> \param[out] EPS +*> \verbatim +*> The smallest positive number such that +*> fl( 1.0 - EPS ) .LT. 1.0, +*> where fl denotes the computed value. +*> \endverbatim +*> +*> \param[out] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow occurs. +*> \endverbatim +*> +*> \param[out] RMIN +*> \verbatim +*> The smallest normalized number for the machine, given by +*> BASE**( EMIN - 1 ), where BASE is the floating point value +*> of BETA. +*> \endverbatim +*> +*> \param[out] EMAX +*> \verbatim +*> The maximum exponent before overflow occurs. +*> \endverbatim +*> +*> \param[out] RMAX +*> \verbatim +*> The largest positive number for the machine, given by +*> BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +*> value of BETA. +*> \endverbatim +*> +*> \details \b Further \b Details +*> \verbatim +*> +*> The computation of EPS is based on a routine PARANOIA by +*> W. Kahan of the University of California at Berkeley. +*> \endverbatim + SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + DOUBLE PRECISION EPS, RMAX, RMIN +* .. +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + DOUBLE PRECISION A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL DLAMC1, DLAMC4, DLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function DLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL DLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = DLAMC3( B, -HALF ) + THIRD = DLAMC3( SIXTH, SIXTH ) + B = DLAMC3( THIRD, -HALF ) + B = DLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = DLAMC3( HALF, -C ) + B = DLAMC3( HALF, C ) + C = DLAMC3( HALF, -B ) + B = DLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = DLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = DLAMC3( ONE, SMALL ) + CALL DLAMC4( NGPMIN, ONE, LBETA ) + CALL DLAMC4( NGNMIN, -ONE, LBETA ) + CALL DLAMC4( GPMIN, A, LBETA ) + CALL DLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF + FIRST = .FALSE. +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine DLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = DLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call DLAMC5 to compute EMAX and RMAX. +* + CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of DLAMC2 +* + END +* +************************************************************************ +* +*> \brief \b DLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> +*> \param[in] A +*> +*> \param[in] B +*> \verbatim +*> The values A and B. +*> \endverbatim + + DOUBLE PRECISION FUNCTION DLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + DLAMC3 = A + B +* + RETURN +* +* End of DLAMC3 +* + END +* +************************************************************************ +* +*> \brief \b DLAMC4 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC4 is a service routine for DLAMC2. +*> \endverbatim +*> +*> \param[out] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow, computed by +*> setting A = START and dividing by BASE until the previous A +*> can not be recovered. +*> \endverbatim +*> +*> \param[in] START +*> \verbatim +*> The starting point for determining EMIN. +*> \endverbatim +*> +*> \param[in] BASE +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> + SUBROUTINE DLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + INTEGER BASE, EMIN + DOUBLE PRECISION START +* .. +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = DLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = DLAMC3( A / BASE, ZERO ) + C1 = DLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = DLAMC3( A*RBASE, ZERO ) + C2 = DLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of DLAMC4 +* + END +* +************************************************************************ +* +*> \brief \b DLAMC5 +*> \details +*> \b Purpose: +*> \verbatim +*> DLAMC5 attempts to compute RMAX, the largest machine floating-point +*> number, without overflow. It assumes that EMAX + abs(EMIN) sum +*> approximately to a power of 2. It will fail on machines where this +*> assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +*> EMAX = 28718). It will also fail if the value supplied for EMIN is +*> too large (i.e. too close to zero), probably with overflow. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> The base of floating-point arithmetic. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> The number of base BETA digits in the mantissa of a +*> floating-point value. +*> \endverbatim +*> +*> \param[in] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow. +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> A logical flag specifying whether or not the arithmetic +*> system is thought to comply with the IEEE standard. +*> \endverbatim +*> +*> \param[out] EMAX +*> \verbatim +*> The largest exponent before overflow +*> \endverbatim +*> +*> \param[out] RMAX +*> \verbatim +*> The largest machine floating-point number. +*> \endverbatim +*> + SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + DOUBLE PRECISION RMAX +* .. +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + DOUBLE PRECISION OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3 + EXTERNAL DLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = DLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = DLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of DLAMC5 +* + END diff --git a/dspl/liblapack/INSTALL/dlamchtst.f b/dspl/liblapack/INSTALL/dlamchtst.f new file mode 100644 index 0000000..7395688 --- /dev/null +++ b/dspl/liblapack/INSTALL/dlamchtst.f @@ -0,0 +1,67 @@ +*> \brief \b DLAMCHTST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DLAMCHTST +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== PROGRAM DLAMCHTST +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION BASE, EMAX, EMIN, EPS, PREC, RMAX, RMIN, RND, + $ SFMIN, T +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Epsilon' ) + SFMIN = DLAMCH( 'Safe minimum' ) + BASE = DLAMCH( 'Base' ) + PREC = DLAMCH( 'Precision' ) + T = DLAMCH( 'Number of digits in mantissa' ) + RND = DLAMCH( 'Rounding mode' ) + EMIN = DLAMCH( 'Minimum exponent' ) + RMIN = DLAMCH( 'Underflow threshold' ) + EMAX = DLAMCH( 'Largest exponent' ) + RMAX = DLAMCH( 'Overflow threshold' ) +* + WRITE( 6, * )' Epsilon = ', EPS + WRITE( 6, * )' Safe minimum = ', SFMIN + WRITE( 6, * )' Base = ', BASE + WRITE( 6, * )' Precision = ', PREC + WRITE( 6, * )' Number of digits in mantissa = ', T + WRITE( 6, * )' Rounding mode = ', RND + WRITE( 6, * )' Minimum exponent = ', EMIN + WRITE( 6, * )' Underflow threshold = ', RMIN + WRITE( 6, * )' Largest exponent = ', EMAX + WRITE( 6, * )' Overflow threshold = ', RMAX + WRITE( 6, * )' Reciprocal of safe minimum = ', 1 / SFMIN +* + END diff --git a/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME.f b/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME.f new file mode 100644 index 0000000..3537764 --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME.f @@ -0,0 +1,64 @@ +*> \brief \b DSECND Using ETIME +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns the user time for a process in seconds. +*> This version gets the time from the EXTERNAL system function ETIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME + EXTERNAL ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + DSECND = TARRAY( 1 ) + RETURN +* +* End of DSECND +* + END diff --git a/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME_.f b/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME_.f new file mode 100644 index 0000000..d0a3945 --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecnd_EXT_ETIME_.f @@ -0,0 +1,61 @@ +*> \brief \b DSECND Using ETIME_ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns the user time for a process in seconds. +*> This version gets the time from the system function ETIME_. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* February 2007 +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME_ + EXTERNAL ETIME_ +* .. +* .. Executable Statements .. +* + T1 = ETIME_( TARRAY ) + DSECND = TARRAY( 1 ) + RETURN +* +* End of DSECND +* + END diff --git a/dspl/liblapack/INSTALL/dsecnd_INT_CPU_TIME.f b/dspl/liblapack/INSTALL/dsecnd_INT_CPU_TIME.f new file mode 100644 index 0000000..26f8cb1 --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecnd_INT_CPU_TIME.f @@ -0,0 +1,61 @@ +*> \brief \b DSECND Using INTERNAL function CPU_TIME. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns the user time for a process in seconds. +*> This version gets the time from the INTERNAL function CPU_TIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. +* + REAL T +* +* .. Intrinsic Functions .. +* + INTRINSIC CPU_TIME +* +* .. Executable Statements .. * +* + CALL CPU_TIME( T ) + DSECND = T + RETURN +* +* End of DSECND +* + END diff --git a/dspl/liblapack/INSTALL/dsecnd_INT_ETIME.f b/dspl/liblapack/INSTALL/dsecnd_INT_ETIME.f new file mode 100644 index 0000000..22df7b4 --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecnd_INT_ETIME.f @@ -0,0 +1,63 @@ +*> \brief \b DSECND Using the INTERNAL function ETIME. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns the user time for a process in seconds. +*> This version gets the time from the INTERNAL function ETIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. Intrinsic Functions .. + REAL ETIME + INTRINSIC ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + DSECND = TARRAY( 1 ) + RETURN +* +* End of DSECND +* + END diff --git a/dspl/liblapack/INSTALL/dsecnd_NONE.f b/dspl/liblapack/INSTALL/dsecnd_NONE.f new file mode 100644 index 0000000..7635a96 --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecnd_NONE.f @@ -0,0 +1,52 @@ +*> \brief \b DSECND returns nothing +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DSECND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSECND returns nothing instead of returning the user time for a process in seconds. +*> If you are using that routine, it means that neither EXTERNAL ETIME, +*> EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on +*> your machine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DSECND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* + DSECND = 0.0D+0 + RETURN +* +* End of DSECND +* + END diff --git a/dspl/liblapack/INSTALL/dsecndtst.f b/dspl/liblapack/INSTALL/dsecndtst.f new file mode 100644 index 0000000..430a46a --- /dev/null +++ b/dspl/liblapack/INSTALL/dsecndtst.f @@ -0,0 +1,127 @@ +*> \brief \b DSECNDTST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM DSECNDTST +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== PROGRAM DSECNDTST +* +* -- LAPACK test routine (version 3.8.0) -- +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX, ITS + PARAMETER ( NMAX = 1000, ITS = 50000 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ALPHA, AVG, T1, T2, TNOSEC, TOTAL +* .. +* .. Local Arrays .. + DOUBLE PRECISION X( NMAX ), Y( NMAX ) +* .. +* .. External Functions .. + DOUBLE PRECISION DSECND + EXTERNAL DSECND +* .. +* .. External Subroutines .. + EXTERNAL MYSUB +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* .. Figure TOTAL flops .. + TOTAL = DBLE(NMAX) * DBLE(ITS) * 2.0 +* +* Initialize X and Y +* + DO 10 I = 1, NMAX + X( I ) = DBLE( 1 ) / DBLE( I ) + Y( I ) = DBLE( NMAX-I ) / DBLE( NMAX ) + 10 CONTINUE + ALPHA = 0.315D0 +* +* Time TOTAL SAXPY operations +* + T1 = DSECND( ) + DO 30 J = 1, ITS + DO 20 I = 1, NMAX + Y( I ) = Y( I ) + ALPHA*X( I ) + 20 CONTINUE + ALPHA = -ALPHA + 30 CONTINUE + T2 = DSECND( ) + TNOSEC = T2 - T1 + WRITE( 6, 9999 )TOTAL, TNOSEC + IF( TNOSEC.GT.0.0 ) THEN + WRITE( 6, 9998 )(TOTAL/1.0D6)/TNOSEC + ELSE + WRITE( 6, 9994 ) + END IF +* +* Time TOTAL DAXPY operations with DSECND in the outer loop +* + T1 = DSECND( ) + DO 50 J = 1, ITS + DO 40 I = 1, NMAX + Y( I ) = Y( I ) + ALPHA*X( I ) + 40 CONTINUE + ALPHA = -ALPHA + T2 = DSECND( ) + 50 CONTINUE +* +* Compute the time used in milliseconds used by an average call +* to DSECND. +* + WRITE( 6, 9997 )T2 - T1 + AVG = ( ( T2-T1 ) - TNOSEC ) * 1000.0D+00/DBLE( ITS ) + IF( AVG.GT.0.0) + $ WRITE( 6, 9996 )AVG +* +* Compute the equivalent number of floating point operations used +* by an average call to DSECND. +* + IF(( AVG.GT.0.0 ).AND.( TNOSEC.GT.0.0 )) + $ WRITE( 6, 9995 )(AVG/1000) * TOTAL / TNOSEC +* + 9999 FORMAT( ' Time for ', G10.3,' DAXPY ops = ', G10.3, ' seconds' ) + 9998 FORMAT( ' DAXPY performance rate = ', G10.3, ' mflops ' ) + 9997 FORMAT( ' Including DSECND, time = ', G10.3, ' seconds' ) + 9996 FORMAT( ' Average time for DSECND = ', G10.3, + $ ' milliseconds' ) + 9995 FORMAT( ' Equivalent floating point ops = ', G10.3, ' ops' ) + 9994 FORMAT( ' *** Warning: Time for operations was less or equal', + $ ' than zero => timing in TESTING might be dubious' ) + CALL MYSUB(NMAX,X,Y) + END + SUBROUTINE MYSUB(N,X,Y) + INTEGER N + DOUBLE PRECISION X(N), Y(N) + RETURN + END diff --git a/dspl/liblapack/INSTALL/ilaver.f b/dspl/liblapack/INSTALL/ilaver.f new file mode 100644 index 0000000..e1d59f4 --- /dev/null +++ b/dspl/liblapack/INSTALL/ilaver.f @@ -0,0 +1,66 @@ +*> \brief \b ILAVER returns the LAPACK version. +** +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) +* +* INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine returns the LAPACK version. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] VERS_MAJOR +*> return the lapack major version +*> +*> \param[out] VERS_MINOR +*> return the lapack minor version from the major version +*> +*> \param[out] VERS_PATCH +*> return the lapack patch version from the minor version +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* ===================================================================== +* + INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH +* ===================================================================== + VERS_MAJOR = 3 + VERS_MINOR = 8 + VERS_PATCH = 0 +* ===================================================================== +* + RETURN + END diff --git a/dspl/liblapack/INSTALL/lsame.f b/dspl/liblapack/INSTALL/lsame.f new file mode 100644 index 0000000..cead2c5 --- /dev/null +++ b/dspl/liblapack/INSTALL/lsame.f @@ -0,0 +1,125 @@ +*> \brief \b LSAME +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* LOGICAL FUNCTION LSAME( CA, CB ) +* +* .. Scalar Arguments .. +* CHARACTER CA, CB +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> LSAME returns .TRUE. if CA is the same letter as CB regardless of +*> case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CA +*> \verbatim +*> \endverbatim +*> +*> \param[in] CB +*> \verbatim +*> CA and CB specify the single characters to be compared. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION LSAME( CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER CA, CB +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Local Scalars .. + INTEGER INTA, INTB, ZCODE +* .. +* .. Executable Statements .. +* +* Test if the characters are equal +* + LSAME = CA.EQ.CB + IF( LSAME ) + $ RETURN +* +* Now test for equivalence if both characters are alphabetic. +* + ZCODE = ICHAR( 'Z' ) +* +* Use 'Z' rather than 'A' so that ASCII can be detected on Prime +* machines, on which ICHAR returns a value with bit 8 set. +* ICHAR('A') on Prime machines returns 193 which is the same as +* ICHAR('A') on an EBCDIC machine. +* + INTA = ICHAR( CA ) + INTB = ICHAR( CB ) +* + IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN +* +* ASCII is assumed - ZCODE is the ASCII code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32 + IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32 +* + ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN +* +* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or +* upper case 'Z'. +* + IF( INTA.GE.129 .AND. INTA.LE.137 .OR. + $ INTA.GE.145 .AND. INTA.LE.153 .OR. + $ INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64 + IF( INTB.GE.129 .AND. INTB.LE.137 .OR. + $ INTB.GE.145 .AND. INTB.LE.153 .OR. + $ INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64 +* + ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN +* +* ASCII is assumed, on Prime machines - ZCODE is the ASCII code +* plus 128 of either lower or upper case 'Z'. +* + IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32 + IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32 + END IF + LSAME = INTA.EQ.INTB +* +* RETURN +* +* End of LSAME +* + END diff --git a/dspl/liblapack/INSTALL/lsametst.f b/dspl/liblapack/INSTALL/lsametst.f new file mode 100644 index 0000000..aad0e58 --- /dev/null +++ b/dspl/liblapack/INSTALL/lsametst.f @@ -0,0 +1,88 @@ +*> \brief \b LSAMETST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* PROGRAM LSAMETST +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== PROGRAM LSAMETST +* +* -- LAPACK test routine (version 3.7.0) -- +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* .. Local Scalars .. + INTEGER I1, I2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ICHAR +* .. +* .. Executable Statements .. +* +* +* Determine the character set. +* + I1 = ICHAR( 'A' ) + I2 = ICHAR( 'a' ) + IF( I2-I1.EQ.32 ) THEN + WRITE( *, * ) ' ASCII character set' + ELSE + WRITE( *, * ) ' Non-ASCII character set, IOFF should be ',I2-I1 + END IF +* +* Test LSAME. +* + IF( .NOT. LSAME( 'A', 'A' ) ) + $ WRITE( *, 9999 )'A', 'A' + IF( .NOT. LSAME( 'A', 'a' ) ) + $ WRITE( *, 9999 )'A', 'a' + IF( .NOT. LSAME( 'a', 'A' ) ) + $ WRITE( *, 9999 )'a', 'A' + IF( .NOT. LSAME( 'a', 'a' ) ) + $ WRITE( *, 9999 )'a', 'a' + IF( LSAME( 'A', 'B' ) ) + $ WRITE( *, 9998 )'A', 'B' + IF( LSAME( 'A', 'b' ) ) + $ WRITE( *, 9998 )'A', 'b' + IF( LSAME( 'a', 'B' ) ) + $ WRITE( *, 9998 )'a', 'B' + IF( LSAME( 'a', 'b' ) ) + $ WRITE( *, 9998 )'a', 'b' + IF( LSAME( 'O', '/' ) ) + $ WRITE( *, 9998 )'O', '/' + IF( LSAME( '/', 'O' ) ) + $ WRITE( *, 9998 )'/', 'O' + IF( LSAME( 'o', '/' ) ) + $ WRITE( *, 9998 )'o', '/' + IF( LSAME( '/', 'o' ) ) + $ WRITE( *, 9998 )'/', 'o' + WRITE( *, * )' Tests completed' +* + 9999 FORMAT( ' *** Error: LSAME( ', A1, ', ', A1, ') is .FALSE.' ) + 9998 FORMAT( ' *** Error: LSAME( ', A1, ', ', A1, ') is .TRUE.' ) + END diff --git a/dspl/liblapack/INSTALL/make.inc.ALPHA b/dspl/liblapack/INSTALL/make.inc.ALPHA new file mode 100644 index 0000000..0ceeaa1 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.ALPHA @@ -0,0 +1,82 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O4 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = -O4 -fpe1 +DRVOPTS = $(OPTS) +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = ../../librefblas.a +BLASLIB = -ldxml +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.HPPA b/dspl/liblapack/INSTALL/make.inc.HPPA new file mode 100644 index 0000000..8eabbbd --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.HPPA @@ -0,0 +1,82 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = +O4 +U77 +DRVOPTS = $(OPTS) -K +NOOPT = +U77 + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = -Aa +U77 + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = ../../librefblas.a +BLASLIB = -lblas +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.IRIX64 b/dspl/liblapack/INSTALL/make.inc.IRIX64 new file mode 100644 index 0000000..d9e71e1 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.IRIX64 @@ -0,0 +1,85 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON +#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON +DRVOPTS = $(OPTS) -static +NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON +#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON +#LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = -lblas +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.O2K b/dspl/liblapack/INSTALL/make.inc.O2K new file mode 100644 index 0000000..3ffcada --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.O2K @@ -0,0 +1,86 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = -O3 -64 -mips4 -r10000 +#OPTS = -O3 -64 -mips4 -r10000 -mp +DRVOPTS = $(OPTS) -static +NOOPT = -64 -mips4 -r10000 +#NOOPT = -64 -mips4 -r10000 -mp + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = -O3 -64 -mips4 -r10000 +#LOADOPTS = -O3 -64 -mips4 -r10000 -mp + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = -lblas +#BLASLIB = -lblas_mp +#BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.SGI5 b/dspl/liblapack/INSTALL/make.inc.SGI5 new file mode 100644 index 0000000..c7019ac --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.SGI5 @@ -0,0 +1,82 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /sbin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O4 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = -O4 +DRVOPTS = $(OPTS) -static +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = -lblas +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.SUN4 b/dspl/liblapack/INSTALL/make.inc.SUN4 new file mode 100644 index 0000000..4e44f1b --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.SUN4 @@ -0,0 +1,82 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +OPTS = -dalign -O4 -fast +DRVOPTS = $(OPTS) +NOOPT = + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +LOADOPTS = -dalign -O4 -fast + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = -lblas +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.SUN4SOL2 b/dspl/liblapack/INSTALL/make.inc.SUN4SOL2 new file mode 100644 index 0000000..e6d79ad --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.SUN4SOL2 @@ -0,0 +1,87 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = cc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = f77 +#OPTS = -O4 -u -f -mt +#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa +OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa +DRVOPTS = $(OPTS) +NOOPT = -u -f +#NOOPT = -u -f -mt + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = f77 +#LOADOPTS = -mt +LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = ../../librefblas.a +#BLASLIB = -xlic_lib=sunperf_mt +BLASLIB = -xlic_lib=sunperf +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.XLF b/dspl/liblapack/INSTALL/make.inc.XLF new file mode 100644 index 0000000..9466ee3 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.XLF @@ -0,0 +1,83 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = xlc +CFLAGS = -O3 -qnosave + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = xlf +OPTS = -O3 -qfixed -qnosave +# For -O2, add -qstrict=none +DRVOPTS = $(OPTS) +NOOPT = -O0 -qfixed -qnosave + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = xlf +LOADOPTS = -qnosave + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = ../../librefblas.a +BLASLIB = -lessl +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.gfortran b/dspl/liblapack/INSTALL/make.inc.gfortran new file mode 100644 index 0000000..39d98d4 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.gfortran @@ -0,0 +1,85 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# November 2017 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = gcc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +# Note: During a regular execution, LAPACK might create NaN and Inf +# and handle these quantities appropriately. As a consequence, one +# should not compile LAPACK with flags such as -ffpe-trap=overflow. +# +FORTRAN = gfortran +OPTS = -O2 -frecursive +DRVOPTS = $(OPTS) +NOOPT = -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = gfortran +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.gfortran_debug b/dspl/liblapack/INSTALL/make.inc.gfortran_debug new file mode 100644 index 0000000..10e6381 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.gfortran_debug @@ -0,0 +1,85 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# November 2017 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = gcc +CFLAGS = -g + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +# Note: During a regular execution, LAPACK might create NaN and Inf +# and handle these quantities appropriately. As a consequence, one +# should not compile LAPACK with flags such as -ffpe-trap=overflow. +# +FORTRAN = gfortran -fimplicit-none -g -frecursive +OPTS = +DRVOPTS = $(OPTS) +NOOPT = -g -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = gfortran -g +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.ifort b/dspl/liblapack/INSTALL/make.inc.ifort new file mode 100644 index 0000000..b067bd4 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.ifort @@ -0,0 +1,81 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = icc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = ifort +OPTS = -O3 -fp-model strict -assume protect_parens +DRVOPTS = $(OPTS) +NOOPT = -O0 -fp-model strict -assume protect_parens + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = ifort +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.pgf95 b/dspl/liblapack/INSTALL/make.inc.pgf95 new file mode 100644 index 0000000..a9a5cec --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.pgf95 @@ -0,0 +1,81 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = pgcc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = pgf95 +OPTS = -O3 +DRVOPTS = $(OPTS) +NOOPT = -O0 + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = $(FORTRAN) +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/make.inc.pghpf b/dspl/liblapack/INSTALL/make.inc.pghpf new file mode 100644 index 0000000..1d9bf54 --- /dev/null +++ b/dspl/liblapack/INSTALL/make.inc.pghpf @@ -0,0 +1,82 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.7.0 # +# December 2016 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = pghpc +CFLAGS = + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +FORTRAN = pghpf +OPTS = -O4 -Mnohpfc -Mdclchk +DRVOPTS = $(OPTS) +NOOPT = -Mnohpfc -Mdclchk + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = pghpf +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = echo + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +#TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +#BLASLIB = -lessl +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/dspl/liblapack/INSTALL/second_EXT_ETIME.f b/dspl/liblapack/INSTALL/second_EXT_ETIME.f new file mode 100644 index 0000000..43044cd --- /dev/null +++ b/dspl/liblapack/INSTALL/second_EXT_ETIME.f @@ -0,0 +1,63 @@ +*> \brief \b SECOND Using ETIME +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns the user time for a process in seconds. +*> This version gets the time from the EXTERNAL system function ETIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME + EXTERNAL ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + SECOND = TARRAY( 1 ) + RETURN +* +* End of SECOND +* + END diff --git a/dspl/liblapack/INSTALL/second_EXT_ETIME_.f b/dspl/liblapack/INSTALL/second_EXT_ETIME_.f new file mode 100644 index 0000000..cb7869e --- /dev/null +++ b/dspl/liblapack/INSTALL/second_EXT_ETIME_.f @@ -0,0 +1,62 @@ +*> \brief \b SECOND Using ETIME_ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns the user time for a process in seconds. +*> This version gets the time from the system function ETIME_. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* February 2007 +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. External Functions .. + REAL ETIME_ + EXTERNAL ETIME_ +* .. +* .. Executable Statements .. +* + T1 = ETIME_( TARRAY ) + SECOND = TARRAY( 1 ) + RETURN +* +* End of SECOND +* + END + diff --git a/dspl/liblapack/INSTALL/second_INT_CPU_TIME.f b/dspl/liblapack/INSTALL/second_INT_CPU_TIME.f new file mode 100644 index 0000000..067151a --- /dev/null +++ b/dspl/liblapack/INSTALL/second_INT_CPU_TIME.f @@ -0,0 +1,61 @@ +*> \brief \b SECOND Using INTERNAL function CPU_TIME. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns the user time for a process in seconds. +*> This version gets the time from the INTERNAL function CPU_TIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. +* + REAL T +* +* .. Intrinsic Functions .. +* + INTRINSIC CPU_TIME +* +* .. Executable Statements .. * +* + CALL CPU_TIME( T ) + SECOND = T + RETURN +* +* End of SECOND +* + END diff --git a/dspl/liblapack/INSTALL/second_INT_ETIME.f b/dspl/liblapack/INSTALL/second_INT_ETIME.f new file mode 100644 index 0000000..454718d --- /dev/null +++ b/dspl/liblapack/INSTALL/second_INT_ETIME.f @@ -0,0 +1,63 @@ +*> \brief \b SECOND Using the INTERNAL function ETIME. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns the user time for a process in seconds. +*> This version gets the time from the INTERNAL function ETIME. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. + REAL T1 +* .. +* .. Local Arrays .. + REAL TARRAY( 2 ) +* .. +* .. Intrinsic Functions .. + REAL ETIME + INTRINSIC ETIME +* .. +* .. Executable Statements .. +* + T1 = ETIME( TARRAY ) + SECOND = TARRAY( 1 ) + RETURN +* +* End of SECOND +* + END diff --git a/dspl/liblapack/INSTALL/second_NONE.f b/dspl/liblapack/INSTALL/second_NONE.f new file mode 100644 index 0000000..ea983b6 --- /dev/null +++ b/dspl/liblapack/INSTALL/second_NONE.f @@ -0,0 +1,52 @@ +*> \brief \b SECOND returns nothing +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SECOND( ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SECOND returns nothing instead of returning the user time for a process in seconds. +*> If you are using that routine, it means that neither EXTERNAL ETIME, +*> EXTERNAL ETIME_, INTERNAL ETIME, INTERNAL CPU_TIME is available on +*> your machine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SECOND( ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* + SECOND = 0.0E+0 + RETURN +* +* End of SECOND +* + END diff --git a/dspl/liblapack/INSTALL/secondtst.f b/dspl/liblapack/INSTALL/secondtst.f new file mode 100644 index 0000000..9eaa181 --- /dev/null +++ b/dspl/liblapack/INSTALL/secondtst.f @@ -0,0 +1,123 @@ +*> \brief \b SECONDTST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== PROGRAM SECONDTST +* +* -- LAPACK test routine (version 3.8.0) -- +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NMAX, ITS + PARAMETER ( NMAX = 1000, ITS = 50000 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ALPHA, AVG, T1, T2, TNOSEC, TOTAL +* .. +* .. Local Arrays .. + REAL X( NMAX ), Y( NMAX ) +* .. +* .. External Functions .. + REAL SECOND + EXTERNAL SECOND +* .. +* .. External Subroutines .. + EXTERNAL MYSUB +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* .. Figure TOTAL flops .. + TOTAL = REAL(NMAX) * REAL(ITS) * 2.0 +* +* Initialize X and Y +* + DO 10 I = 1, NMAX + X( I ) = REAL( 1 ) / REAL( I ) + Y( I ) = REAL( NMAX-I ) / REAL( NMAX ) + 10 CONTINUE + ALPHA = 0.315 +* +* Time TOTAL SAXPY operations +* + T1 = SECOND( ) + DO 30 J = 1, ITS + DO 20 I = 1, NMAX + Y( I ) = Y( I ) + ALPHA*X( I ) + 20 CONTINUE + ALPHA = -ALPHA + 30 CONTINUE + T2 = SECOND( ) + TNOSEC = T2 - T1 + WRITE( 6, 9999 )TOTAL, TNOSEC + IF( TNOSEC.GT.0.0 ) THEN + WRITE( 6, 9998 )(TOTAL/1.0E6)/TNOSEC + ELSE + WRITE( 6, 9994 ) + END IF +* +* Time TOTAL SAXPY operations with SECOND in the outer loop +* + T1 = SECOND( ) + DO 50 J = 1, ITS + DO 40 I = 1, NMAX + Y( I ) = Y( I ) + ALPHA*X( I ) + 40 CONTINUE + ALPHA = -ALPHA + T2 = SECOND( ) + 50 CONTINUE +* +* Compute the time used in milliseconds used by an average call +* to SECOND. +* + WRITE( 6, 9997 )T2 - T1 + AVG = ( ( T2-T1 ) - TNOSEC ) * 1000.0E+00/REAL( ITS ) + IF( AVG.GT.0.0) + $ WRITE( 6, 9996 )AVG +* +* Compute the equivalent number of floating point operations used +* by an average call to SECOND. +* + IF(( AVG.GT.0.0 ).AND.( TNOSEC.GT.0.0 )) + $ WRITE( 6, 9995 )(AVG/1000) * TOTAL / TNOSEC +* + 9999 FORMAT( ' Time for ', G10.3,' SAXPY ops = ', G10.3, ' seconds' ) + 9998 FORMAT( ' SAXPY performance rate = ', G10.3, ' mflops ' ) + 9997 FORMAT( ' Including SECOND, time = ', G10.3, ' seconds' ) + 9996 FORMAT( ' Average time for SECOND = ', G10.3, + $ ' milliseconds' ) + 9995 FORMAT( ' Equivalent floating point ops = ', G10.3, ' ops' ) + 9994 FORMAT( ' *** Warning: Time for operations was less or equal', + $ ' than zero => timing in TESTING might be dubious' ) + CALL MYSUB(NMAX,X,Y) + END + SUBROUTINE MYSUB(N,X,Y) + INTEGER N + REAL X(N), Y(N) + RETURN + END diff --git a/dspl/liblapack/INSTALL/slamch.f b/dspl/liblapack/INSTALL/slamch.f new file mode 100644 index 0000000..3282fa6 --- /dev/null +++ b/dspl/liblapack/INSTALL/slamch.f @@ -0,0 +1,192 @@ +*> \brief \b SLAMCH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SLAMCH( CMACH ) +* +* .. Scalar Arguments .. +* CHARACTER CMACH +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMCH determines single precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by SLAMCH: +*> = 'E' or 'e', SLAMCH := eps +*> = 'S' or 's , SLAMCH := sfmin +*> = 'B' or 'b', SLAMCH := base +*> = 'P' or 'p', SLAMCH := eps*base +*> = 'N' or 'n', SLAMCH := t +*> = 'R' or 'r', SLAMCH := rnd +*> = 'M' or 'm', SLAMCH := emin +*> = 'U' or 'u', SLAMCH := rmin +*> = 'L' or 'l', SLAMCH := emax +*> = 'O' or 'o', SLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL RND, EPS, SFMIN, SMALL, RMACH +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DIGITS, EPSILON, HUGE, MAXEXPONENT, + $ MINEXPONENT, RADIX, TINY +* .. +* .. Executable Statements .. +* +* +* Assume rounding, not chopping. Always. +* + RND = ONE +* + IF( ONE.EQ.RND ) THEN + EPS = EPSILON(ZERO) * 0.5 + ELSE + EPS = EPSILON(ZERO) + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + SFMIN = TINY(ZERO) + SMALL = ONE / HUGE(ZERO) + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = EPS * RADIX(ZERO) + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = DIGITS(ZERO) + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = MINEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = tiny(zero) + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = MAXEXPONENT(ZERO) + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = HUGE(ZERO) + ELSE + RMACH = ZERO + END IF +* + SLAMCH = RMACH + RETURN +* +* End of SLAMCH +* + END +************************************************************************ +*> \brief \b SLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date December 2016 +*> \ingroup auxOTHERauxiliary +*> +*> \param[in] A +*> \verbatim +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> The values A and B. +*> \endverbatim +*> +* + REAL FUNCTION SLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + REAL A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + SLAMC3 = A + B +* + RETURN +* +* End of SLAMC3 +* + END +* +************************************************************************ diff --git a/dspl/liblapack/INSTALL/slamchf77.f b/dspl/liblapack/INSTALL/slamchf77.f new file mode 100644 index 0000000..fe7cc4e --- /dev/null +++ b/dspl/liblapack/INSTALL/slamchf77.f @@ -0,0 +1,924 @@ +*> \brief \b SLAMCHF77 deprecated +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SLAMCH( CMACH ) +* +* .. Scalar Arguments .. +* CHARACTER CMACH +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMCH determines single precision machine parameters. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] CMACH +*> \verbatim +*> Specifies the value to be returned by SLAMCH: +*> = 'E' or 'e', SLAMCH := eps +*> = 'S' or 's , SLAMCH := sfmin +*> = 'B' or 'b', SLAMCH := base +*> = 'P' or 'p', SLAMCH := eps*base +*> = 'N' or 'n', SLAMCH := t +*> = 'R' or 'r', SLAMCH := rnd +*> = 'M' or 'm', SLAMCH := emin +*> = 'U' or 'u', SLAMCH := rmin +*> = 'L' or 'l', SLAMCH := emax +*> = 'O' or 'o', SLAMCH := rmax +*> where +*> eps = relative machine precision +*> sfmin = safe minimum, such that 1/sfmin does not overflow +*> base = base of the machine +*> prec = eps*base +*> t = number of (base) digits in the mantissa +*> rnd = 1.0 when rounding occurs in addition, 0.0 otherwise +*> emin = minimum exponent before (gradual) underflow +*> rmin = underflow threshold - base**(emin-1) +*> emax = largest exponent before overflow +*> rmax = overflow threshold - (base**emax)*(1-eps) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAMCH( CMACH ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER CMACH +* .. +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FIRST, LRND + INTEGER BETA, IMAX, IMIN, IT + REAL BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN, + $ RND, SFMIN, SMALL, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAMC2 +* .. +* .. Save statement .. + SAVE FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN, + $ EMAX, RMAX, PREC +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + CALL SLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX ) + BASE = BETA + T = IT + IF( LRND ) THEN + RND = ONE + EPS = ( BASE**( 1-IT ) ) / 2 + ELSE + RND = ZERO + EPS = BASE**( 1-IT ) + END IF + PREC = EPS*BASE + EMIN = IMIN + EMAX = IMAX + SFMIN = RMIN + SMALL = ONE / RMAX + IF( SMALL.GE.SFMIN ) THEN +* +* Use SMALL plus a bit, to avoid the possibility of rounding +* causing overflow when computing 1/sfmin. +* + SFMIN = SMALL*( ONE+EPS ) + END IF + END IF +* + IF( LSAME( CMACH, 'E' ) ) THEN + RMACH = EPS + ELSE IF( LSAME( CMACH, 'S' ) ) THEN + RMACH = SFMIN + ELSE IF( LSAME( CMACH, 'B' ) ) THEN + RMACH = BASE + ELSE IF( LSAME( CMACH, 'P' ) ) THEN + RMACH = PREC + ELSE IF( LSAME( CMACH, 'N' ) ) THEN + RMACH = T + ELSE IF( LSAME( CMACH, 'R' ) ) THEN + RMACH = RND + ELSE IF( LSAME( CMACH, 'M' ) ) THEN + RMACH = EMIN + ELSE IF( LSAME( CMACH, 'U' ) ) THEN + RMACH = RMIN + ELSE IF( LSAME( CMACH, 'L' ) ) THEN + RMACH = EMAX + ELSE IF( LSAME( CMACH, 'O' ) ) THEN + RMACH = RMAX + END IF +* + SLAMCH = RMACH + FIRST = .FALSE. + RETURN +* +* End of SLAMCH +* + END +* +************************************************************************ +* +*> \brief \b SLAMC1 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC1 determines the machine parameters given by BETA, T, RND, and +*> IEEE1. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> The number of ( BETA ) digits in the mantissa. +*> \endverbatim +*> +*> \param[out] RND +*> \verbatim +*> Specifies whether proper rounding ( RND = .TRUE. ) or +*> chopping ( RND = .FALSE. ) occurs in addition. This may not +*> be a reliable guide to the way in which the machine performs +*> its arithmetic. +*> \endverbatim +*> +*> \param[out] IEEE1 +*> \verbatim +*> Specifies whether rounding appears to be done in the IEEE +*> 'round to nearest' style. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date April 2012 +*> \ingroup auxOTHERauxiliary +*> +*> \details \b Further \b Details +*> \verbatim +*> +*> The routine is based on the routine ENVRON by Malcolm and +*> incorporates suggestions by Gentleman and Marovich. See +*> +*> Malcolm M. A. (1972) Algorithms to reveal properties of +*> floating-point arithmetic. Comms. of the ACM, 15, 949-951. +*> +*> Gentleman W. M. and Marovich S. B. (1974) More on algorithms +*> that reveal properties of floating point arithmetic units. +*> Comms. of the ACM, 17, 276-277. +*> \endverbatim +*> + SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL IEEE1, RND + INTEGER BETA, T +* .. +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, LIEEE1, LRND + INTEGER LBETA, LT + REAL A, B, C, F, ONE, QTR, SAVEC, T1, T2 +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Save statement .. + SAVE FIRST, LIEEE1, LBETA, LRND, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ONE = 1 +* +* LBETA, LIEEE1, LT and LRND are the local values of BETA, +* IEEE1, T and RND. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* Compute a = 2.0**m with the smallest positive integer m such +* that +* +* fl( a + 1.0 ) = a. +* + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 10 CONTINUE + IF( C.EQ.ONE ) THEN + A = 2*A + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 10 + END IF +*+ END WHILE +* +* Now compute b = 2.0**m with the smallest positive integer m +* such that +* +* fl( a + b ) .gt. a. +* + B = 1 + C = SLAMC3( A, B ) +* +*+ WHILE( C.EQ.A )LOOP + 20 CONTINUE + IF( C.EQ.A ) THEN + B = 2*B + C = SLAMC3( A, B ) + GO TO 20 + END IF +*+ END WHILE +* +* Now compute the base. a and c are neighbouring floating point +* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so +* their difference is beta. Adding 0.25 to c is to ensure that it +* is truncated to beta and not ( beta - 1 ). +* + QTR = ONE / 4 + SAVEC = C + C = SLAMC3( C, -A ) + LBETA = C + QTR +* +* Now determine whether rounding or chopping occurs, by adding a +* bit less than beta/2 and a bit more than beta/2 to a. +* + B = LBETA + F = SLAMC3( B / 2, -B / 100 ) + C = SLAMC3( F, A ) + IF( C.EQ.A ) THEN + LRND = .TRUE. + ELSE + LRND = .FALSE. + END IF + F = SLAMC3( B / 2, B / 100 ) + C = SLAMC3( F, A ) + IF( ( LRND ) .AND. ( C.EQ.A ) ) + $ LRND = .FALSE. +* +* Try and decide whether rounding is done in the IEEE 'round to +* nearest' style. B/2 is half a unit in the last place of the two +* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit +* zero, and SAVEC is odd. Thus adding B/2 to A should not change +* A, but adding B/2 to SAVEC should change SAVEC. +* + T1 = SLAMC3( B / 2, A ) + T2 = SLAMC3( B / 2, SAVEC ) + LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND +* +* Now find the mantissa, t. It should be the integer part of +* log to the base beta of a, however it is safer to determine t +* by powering. So we find t as the smallest positive integer for +* which +* +* fl( beta**t + 1.0 ) = 1.0. +* + LT = 0 + A = 1 + C = 1 +* +*+ WHILE( C.EQ.ONE )LOOP + 30 CONTINUE + IF( C.EQ.ONE ) THEN + LT = LT + 1 + A = A*LBETA + C = SLAMC3( A, ONE ) + C = SLAMC3( C, -A ) + GO TO 30 + END IF +*+ END WHILE +* + END IF +* + BETA = LBETA + T = LT + RND = LRND + IEEE1 = LIEEE1 + FIRST = .FALSE. + RETURN +* +* End of SLAMC1 +* + END +* +************************************************************************ +* +*> \brief \b SLAMC2 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC2 determines the machine parameters specified in its argument +*> list. +*> \endverbatim +*> \author LAPACK is a software package provided by Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. +*> \date April 2012 +*> \ingroup auxOTHERauxiliary +*> +*> \param[out] BETA +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> The number of ( BETA ) digits in the mantissa. +*> \endverbatim +*> +*> \param[out] RND +*> \verbatim +*> Specifies whether proper rounding ( RND = .TRUE. ) or +*> chopping ( RND = .FALSE. ) occurs in addition. This may not +*> be a reliable guide to the way in which the machine performs +*> its arithmetic. +*> \endverbatim +*> +*> \param[out] EPS +*> \verbatim +*> The smallest positive number such that +*> fl( 1.0 - EPS ) .LT. 1.0, +*> where fl denotes the computed value. +*> \endverbatim +*> +*> \param[out] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow occurs. +*> \endverbatim +*> +*> \param[out] RMIN +*> \verbatim +*> The smallest normalized number for the machine, given by +*> BASE**( EMIN - 1 ), where BASE is the floating point value +*> of BETA. +*> \endverbatim +*> +*> \param[out] EMAX +*> \verbatim +*> The maximum exponent before overflow occurs. +*> \endverbatim +*> +*> \param[out] RMAX +*> \verbatim +*> The largest positive number for the machine, given by +*> BASE**EMAX * ( 1 - EPS ), where BASE is the floating point +*> value of BETA. +*> \endverbatim +*> +*> \details \b Further \b Details +*> \verbatim +*> +*> The computation of EPS is based on a routine PARANOIA by +*> W. Kahan of the University of California at Berkeley. +*> \endverbatim + SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL RND + INTEGER BETA, EMAX, EMIN, T + REAL EPS, RMAX, RMIN +* .. +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND + INTEGER GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT, + $ NGNMIN, NGPMIN + REAL A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE, + $ SIXTH, SMALL, THIRD, TWO, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. External Subroutines .. + EXTERNAL SLAMC1, SLAMC4, SLAMC5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Save statement .. + SAVE FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX, + $ LRMIN, LT +* .. +* .. Data statements .. + DATA FIRST / .TRUE. / , IWARN / .FALSE. / +* .. +* .. Executable Statements .. +* + IF( FIRST ) THEN + ZERO = 0 + ONE = 1 + TWO = 2 +* +* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of +* BETA, T, RND, EPS, EMIN and RMIN. +* +* Throughout this routine we use the function SLAMC3 to ensure +* that relevant values are stored and not held in registers, or +* are not affected by optimizers. +* +* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. +* + CALL SLAMC1( LBETA, LT, LRND, LIEEE1 ) +* +* Start to find EPS. +* + B = LBETA + A = B**( -LT ) + LEPS = A +* +* Try some tricks to see whether or not this is the correct EPS. +* + B = TWO / 3 + HALF = ONE / 2 + SIXTH = SLAMC3( B, -HALF ) + THIRD = SLAMC3( SIXTH, SIXTH ) + B = SLAMC3( THIRD, -HALF ) + B = SLAMC3( B, SIXTH ) + B = ABS( B ) + IF( B.LT.LEPS ) + $ B = LEPS +* + LEPS = 1 +* +*+ WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP + 10 CONTINUE + IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN + LEPS = B + C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) + C = SLAMC3( HALF, -C ) + B = SLAMC3( HALF, C ) + C = SLAMC3( HALF, -B ) + B = SLAMC3( HALF, C ) + GO TO 10 + END IF +*+ END WHILE +* + IF( A.LT.LEPS ) + $ LEPS = A +* +* Computation of EPS complete. +* +* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). +* Keep dividing A by BETA until (gradual) underflow occurs. This +* is detected when we cannot recover the previous A. +* + RBASE = ONE / LBETA + SMALL = ONE + DO 20 I = 1, 3 + SMALL = SLAMC3( SMALL*RBASE, ZERO ) + 20 CONTINUE + A = SLAMC3( ONE, SMALL ) + CALL SLAMC4( NGPMIN, ONE, LBETA ) + CALL SLAMC4( NGNMIN, -ONE, LBETA ) + CALL SLAMC4( GPMIN, A, LBETA ) + CALL SLAMC4( GNMIN, -A, LBETA ) + IEEE = .FALSE. +* + IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN + IF( NGPMIN.EQ.GPMIN ) THEN + LEMIN = NGPMIN +* ( Non twos-complement machines, no gradual underflow; +* e.g., VAX ) + ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN + LEMIN = NGPMIN - 1 + LT + IEEE = .TRUE. +* ( Non twos-complement machines, with gradual underflow; +* e.g., IEEE standard followers ) + ELSE + LEMIN = MIN( NGPMIN, GPMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN + IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) +* ( Twos-complement machines, no gradual underflow; +* e.g., CYBER 205 ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND. + $ ( GPMIN.EQ.GNMIN ) ) THEN + IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN + LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT +* ( Twos-complement machines with gradual underflow; +* no known machine ) + ELSE + LEMIN = MIN( NGPMIN, NGNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF +* + ELSE + LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) +* ( A guess; no known machine ) + IWARN = .TRUE. + END IF + FIRST = .FALSE. +*** +* Comment out this if block if EMIN is ok + IF( IWARN ) THEN + FIRST = .TRUE. + WRITE( 6, FMT = 9999 )LEMIN + END IF +*** +* +* Assume IEEE arithmetic if we found denormalised numbers above, +* or if arithmetic seems to round in the IEEE style, determined +* in routine SLAMC1. A true IEEE machine should have both things +* true; however, faulty machines may have one or the other. +* + IEEE = IEEE .OR. LIEEE1 +* +* Compute RMIN by successive division by BETA. We could compute +* RMIN as BASE**( EMIN - 1 ), but some machines underflow during +* this computation. +* + LRMIN = 1 + DO 30 I = 1, 1 - LEMIN + LRMIN = SLAMC3( LRMIN*RBASE, ZERO ) + 30 CONTINUE +* +* Finally, call SLAMC5 to compute EMAX and RMAX. +* + CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) + END IF +* + BETA = LBETA + T = LT + RND = LRND + EPS = LEPS + EMIN = LEMIN + RMIN = LRMIN + EMAX = LEMAX + RMAX = LRMAX +* + RETURN +* + 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-', + $ ' EMIN = ', I8, / + $ ' If, after inspection, the value EMIN looks', + $ ' acceptable please comment out ', + $ / ' the IF block as marked within the code of routine', + $ ' SLAMC2,', / ' otherwise supply EMIN explicitly.', / ) +* +* End of SLAMC2 +* + END +* +************************************************************************ +* +*> \brief \b SLAMC3 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC3 is intended to force A and B to be stored prior to doing +*> the addition of A and B , for use in situations where optimizers +*> might hold one of these in a register. +*> \endverbatim +*> +*> \param[in] A +*> +*> \param[in] B +*> \verbatim +*> The values A and B. +*> \endverbatim + + REAL FUNCTION SLAMC3( A, B ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + REAL A, B +* .. +* ===================================================================== +* +* .. Executable Statements .. +* + SLAMC3 = A + B +* + RETURN +* +* End of SLAMC3 +* + END +* +************************************************************************ +* +*> \brief \b SLAMC4 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC4 is a service routine for SLAMC2. +*> \endverbatim +*> +*> \param[out] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow, computed by +*> setting A = START and dividing by BASE until the previous A +*> can not be recovered. +*> \endverbatim +*> +*> \param[in] START +*> \verbatim +*> The starting point for determining EMIN. +*> \endverbatim +*> +*> \param[in] BASE +*> \verbatim +*> The base of the machine. +*> \endverbatim +*> + SUBROUTINE SLAMC4( EMIN, START, BASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + INTEGER BASE + INTEGER EMIN + REAL START +* .. +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I + REAL A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Executable Statements .. +* + A = START + ONE = 1 + RBASE = ONE / BASE + ZERO = 0 + EMIN = 1 + B1 = SLAMC3( A*RBASE, ZERO ) + C1 = A + C2 = A + D1 = A + D2 = A +*+ WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. +* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP + 10 CONTINUE + IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND. + $ ( D2.EQ.A ) ) THEN + EMIN = EMIN - 1 + A = B1 + B1 = SLAMC3( A / BASE, ZERO ) + C1 = SLAMC3( B1*BASE, ZERO ) + D1 = ZERO + DO 20 I = 1, BASE + D1 = D1 + B1 + 20 CONTINUE + B2 = SLAMC3( A*RBASE, ZERO ) + C2 = SLAMC3( B2 / RBASE, ZERO ) + D2 = ZERO + DO 30 I = 1, BASE + D2 = D2 + B2 + 30 CONTINUE + GO TO 10 + END IF +*+ END WHILE +* + RETURN +* +* End of SLAMC4 +* + END +* +************************************************************************ +* +*> \brief \b SLAMC5 +*> \details +*> \b Purpose: +*> \verbatim +*> SLAMC5 attempts to compute RMAX, the largest machine floating-point +*> number, without overflow. It assumes that EMAX + abs(EMIN) sum +*> approximately to a power of 2. It will fail on machines where this +*> assumption does not hold, for example, the Cyber 205 (EMIN = -28625, +*> EMAX = 28718). It will also fail if the value supplied for EMIN is +*> too large (i.e. too close to zero), probably with overflow. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> The base of floating-point arithmetic. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> The number of base BETA digits in the mantissa of a +*> floating-point value. +*> \endverbatim +*> +*> \param[in] EMIN +*> \verbatim +*> The minimum exponent before (gradual) underflow. +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> A logical flag specifying whether or not the arithmetic +*> system is thought to comply with the IEEE standard. +*> \endverbatim +*> +*> \param[out] EMAX +*> \verbatim +*> The largest exponent before overflow +*> \endverbatim +*> +*> \param[out] RMAX +*> \verbatim +*> The largest machine floating-point number. +*> \endverbatim +*> + SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2010 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER BETA, EMAX, EMIN, P + REAL RMAX +* .. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP + REAL OLDY, RECBAS, Y, Z +* .. +* .. External Functions .. + REAL SLAMC3 + EXTERNAL SLAMC3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* First compute LEXP and UEXP, two powers of 2 that bound +* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum +* approximately to the bound that is closest to abs(EMIN). +* (EMAX is the exponent of the required number RMAX). +* + LEXP = 1 + EXBITS = 1 + 10 CONTINUE + TRY = LEXP*2 + IF( TRY.LE.( -EMIN ) ) THEN + LEXP = TRY + EXBITS = EXBITS + 1 + GO TO 10 + END IF + IF( LEXP.EQ.-EMIN ) THEN + UEXP = LEXP + ELSE + UEXP = TRY + EXBITS = EXBITS + 1 + END IF +* +* Now -LEXP is less than or equal to EMIN, and -UEXP is greater +* than or equal to EMIN. EXBITS is the number of bits needed to +* store the exponent. +* + IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN + EXPSUM = 2*LEXP + ELSE + EXPSUM = 2*UEXP + END IF +* +* EXPSUM is the exponent range, approximately equal to +* EMAX - EMIN + 1 . +* + EMAX = EXPSUM + EMIN - 1 + NBITS = 1 + EXBITS + P +* +* NBITS is the total number of bits needed to store a +* floating-point number. +* + IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN +* +* Either there are an odd number of bits used to store a +* floating-point number, which is unlikely, or some bits are +* not used in the representation of numbers, which is possible, +* (e.g. Cray machines) or the mantissa has an implicit bit, +* (e.g. IEEE machines, Dec Vax machines), which is perhaps the +* most likely. We have to assume the last alternative. +* If this is true, then we need to reduce EMAX by one because +* there must be some way of representing zero in an implicit-bit +* system. On machines like Cray, we are reducing EMAX by one +* unnecessarily. +* + EMAX = EMAX - 1 + END IF +* + IF( IEEE ) THEN +* +* Assume we are on an IEEE machine which reserves one exponent +* for infinity and NaN. +* + EMAX = EMAX - 1 + END IF +* +* Now create RMAX, the largest machine number, which should +* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . +* +* First compute 1.0 - BETA**(-P), being careful that the +* result is less than 1.0 . +* + RECBAS = ONE / BETA + Z = BETA - ONE + Y = ZERO + DO 20 I = 1, P + Z = Z*RECBAS + IF( Y.LT.ONE ) + $ OLDY = Y + Y = SLAMC3( Y, Z ) + 20 CONTINUE + IF( Y.GE.ONE ) + $ Y = OLDY +* +* Now multiply by BETA**EMAX to get RMAX. +* + DO 30 I = 1, EMAX + Y = SLAMC3( Y*BETA, ZERO ) + 30 CONTINUE +* + RMAX = Y + RETURN +* +* End of SLAMC5 +* + END diff --git a/dspl/liblapack/INSTALL/slamchtst.f b/dspl/liblapack/INSTALL/slamchtst.f new file mode 100644 index 0000000..da8bc99 --- /dev/null +++ b/dspl/liblapack/INSTALL/slamchtst.f @@ -0,0 +1,63 @@ +*> \brief \b SLAMCHTST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== PROGRAM SLAMCHTST +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* ===================================================================== +* +* .. Local Scalars .. + REAL BASE, EMAX, EMIN, EPS, RMAX, RMIN, RND, SFMIN, + $ T, PREC +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + SFMIN = SLAMCH( 'Safe minimum' ) + BASE = SLAMCH( 'Base' ) + PREC = SLAMCH( 'Precision' ) + T = SLAMCH( 'Number of digits in mantissa' ) + RND = SLAMCH( 'Rounding mode' ) + EMIN = SLAMCH( 'Minimum exponent' ) + RMIN = SLAMCH( 'Underflow threshold' ) + EMAX = SLAMCH( 'Largest exponent' ) + RMAX = SLAMCH( 'Overflow threshold' ) +* + WRITE( 6, * )' Epsilon = ', EPS + WRITE( 6, * )' Safe minimum = ', SFMIN + WRITE( 6, * )' Base = ', BASE + WRITE( 6, * )' Precision = ', PREC + WRITE( 6, * )' Number of digits in mantissa = ', T + WRITE( 6, * )' Rounding mode = ', RND + WRITE( 6, * )' Minimum exponent = ', EMIN + WRITE( 6, * )' Underflow threshold = ', RMIN + WRITE( 6, * )' Largest exponent = ', EMAX + WRITE( 6, * )' Overflow threshold = ', RMAX + WRITE( 6, * )' Reciprocal of safe minimum = ', 1 / SFMIN +* + END diff --git a/dspl/liblapack/INSTALL/tstiee.f b/dspl/liblapack/INSTALL/tstiee.f new file mode 100644 index 0000000..210abe4 --- /dev/null +++ b/dspl/liblapack/INSTALL/tstiee.f @@ -0,0 +1,770 @@ +*> \brief \b TSTIEE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + PROGRAM TSTIEE +* +* -- LAPACK test routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Scalars .. + INTEGER IEEEOK +* .. +* .. Executable Statements .. +* + WRITE( 6, FMT = * ) + $ 'We are about to check whether infinity arithmetic' + WRITE( 6, FMT = * )'can be trusted. If this test hangs, set' + WRITE( 6, FMT = * ) + $ 'ILAENV = 0 for ISPEC = 10 in LAPACK/SRC/ilaenv.f' +* + IEEEOK = ILAENV( 10, 'ILAENV', 'N', 1, 2, 3, 4 ) + WRITE( 6, FMT = * ) +* + IF( IEEEOK.EQ.0 ) THEN + WRITE( 6, FMT = * ) + $ 'Infinity arithmetic did not perform per the ieee spec' + ELSE + WRITE( 6, FMT = * ) + $ 'Infinity arithmetic performed as per the ieee spec.' + WRITE( 6, FMT = * ) + $ 'However, this is not an exhaustive test and does not' + WRITE( 6, FMT = * ) + $ 'guarantee that infinity arithmetic meets the', + $ ' ieee spec.' + END IF +* + WRITE( 6, FMT = * ) + WRITE( 6, FMT = * ) + $ 'We are about to check whether NaN arithmetic' + WRITE( 6, FMT = * )'can be trusted. If this test hangs, set' + WRITE( 6, FMT = * ) + $ 'ILAENV = 0 for ISPEC = 11 in LAPACK/SRC/ilaenv.f' + IEEEOK = ILAENV( 11, 'ILAENV', 'N', 1, 2, 3, 4 ) +* + WRITE( 6, FMT = * ) + IF( IEEEOK.EQ.0 ) THEN + WRITE( 6, FMT = * ) + $ 'NaN arithmetic did not perform per the ieee spec' + ELSE + WRITE( 6, FMT = * )'NaN arithmetic performed as per the ieee', + $ ' spec.' + WRITE( 6, FMT = * ) + $ 'However, this is not an exhaustive test and does not' + WRITE( 6, FMT = * )'guarantee that NaN arithmetic meets the', + $ ' ieee spec.' + END IF + WRITE( 6, FMT = * ) +* + END + INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, + $ N4 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* Purpose +* ======= +* +* ILAENV is called from the LAPACK routines to choose problem-dependent +* parameters for the local environment. See ISPEC for a description of +* the parameters. +* +* This version provides a set of parameters which should give good, +* but not optimal, performance on many of the currently available +* computers. Users are encouraged to modify this subroutine to set +* the tuning parameters for their particular machine using the option +* and problem size information in the arguments. +* +* This routine will not function correctly if it is converted to all +* lower case. Converting it to all upper case is allowed. +* +* Arguments: +* ========== +* +* ISPEC (input) INTEGER +* Specifies the parameter to be returned as the value of +* ILAENV. +* = 1: the optimal blocksize; if this value is 1, an unblocked +* algorithm will give the best performance. +* = 2: the minimum block size for which the block routine +* should be used; if the usable block size is less than +* this value, an unblocked routine should be used. +* = 3: the crossover point (in a block routine, for N less +* than this value, an unblocked routine should be used) +* = 4: the number of shifts, used in the nonsymmetric +* eigenvalue routines +* = 5: the minimum column dimension for blocking to be used; +* rectangular blocks must have dimension at least k by m, +* where k is given by ILAENV(2,...) and m by ILAENV(5,...) +* = 6: the crossover point for the SVD (when reducing an m by n +* matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds +* this value, a QR factorization is used first to reduce +* the matrix to a triangular form.) +* = 7: the number of processors +* = 8: the crossover point for the multishift QR and QZ methods +* for nonsymmetric eigenvalue problems. +* = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* =10: ieee NaN arithmetic can be trusted not to trap +* =11: infinity arithmetic can be trusted not to trap +* +* NAME (input) CHARACTER*(*) +* The name of the calling subroutine, in either upper case or +* lower case. +* +* OPTS (input) CHARACTER*(*) +* The character options to the subroutine NAME, concatenated +* into a single character string. For example, UPLO = 'U', +* TRANS = 'T', and DIAG = 'N' for a triangular routine would +* be specified as OPTS = 'UTN'. +* +* N1 (input) INTEGER +* N2 (input) INTEGER +* N3 (input) INTEGER +* N4 (input) INTEGER +* Problem dimensions for the subroutine NAME; these may not all +* be required. +* +* (ILAENV) (output) INTEGER +* >= 0: the value of the parameter specified by ISPEC +* < 0: if ILAENV = -k, the k-th argument had an illegal value. +* +* Further Details +* =============== +* +* The following conventions have been used when calling ILAENV from the +* LAPACK routines: +* 1) OPTS is a concatenation of all of the character options to +* subroutine NAME, in the same order that they appear in the +* argument list for NAME, even if they are not used in determining +* the value of the parameter specified by ISPEC. +* 2) The problem dimensions N1, N2, N3, N4 are specified in the order +* that they appear in the argument list for NAME. N1 is used +* first, N2 second, and so on, and unused problem dimensions are +* passed a value of -1. +* 3) The parameter value returned by ILAENV is checked for validity in +* the calling subroutine. For example, ILAENV is used to retrieve +* the optimal blocksize for STRTRI as follows: +* +* NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) +* IF( NB.LE.1 ) NB = MAX( 1, N ) +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL CNAME, SNAME + CHARACTER*1 C1 + CHARACTER*2 C2, C4 + CHARACTER*3 C3 + CHARACTER*6 SUBNAM + INTEGER I, IC, IZ, NB, NBMIN, NX +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, INT, MIN, REAL +* .. +* .. External Functions .. + INTEGER IEEECK + EXTERNAL IEEECK +* .. +* .. Executable Statements .. +* + GO TO ( 100, 100, 100, 400, 500, 600, 700, 800, 900, 1000, + $ 1100 ) ISPEC +* +* Invalid value for ISPEC +* + ILAENV = -1 + RETURN +* + 100 CONTINUE +* +* Convert NAME to upper case if the first character is lower case. +* + ILAENV = 1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1:1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1:1 ) = CHAR( IC-32 ) + DO 10 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I:I ) = CHAR( IC-32 ) + 10 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1:1 ) = CHAR( IC+64 ) + DO 20 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) + $ SUBNAM( I:I ) = CHAR( IC+64 ) + 20 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1:1 ) = CHAR( IC-32 ) + DO 30 I = 2, 6 + IC = ICHAR( SUBNAM( I:I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I:I ) = CHAR( IC-32 ) + 30 CONTINUE + END IF + END IF +* + C1 = SUBNAM( 1:1 ) + SNAME = C1.EQ.'S' .OR. C1.EQ.'D' + CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' + IF( .NOT.( CNAME .OR. SNAME ) ) + $ RETURN + C2 = SUBNAM( 2:3 ) + C3 = SUBNAM( 4:6 ) + C4 = C3( 2:3 ) +* + GO TO ( 110, 200, 300 ) ISPEC +* + 110 CONTINUE +* +* ISPEC = 1: block size +* +* In these examples, separate code is provided for setting NB for +* real and complex. We assume that NB will take the same value in +* single or double precision. +* + NB = 1 +* + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'PO' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( SNAME .AND. C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRF' ) THEN + NB = 64 + ELSE IF( C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NB = 32 + END IF + END IF + ELSE IF( C2.EQ.'GB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'PB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'TR' ) THEN + IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'LA' ) THEN + IF( C3.EQ.'UUM' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN + IF( C3.EQ.'EBZ' ) THEN + NB = 1 + END IF + END IF + ILAENV = NB + RETURN +* + 200 CONTINUE +* +* ISPEC = 2: minimum block size +* + NBMIN = 2 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NBMIN = 8 + ELSE + NBMIN = 8 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1:1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NBMIN = 2 + END IF + END IF + END IF + ILAENV = NBMIN + RETURN +* + 300 CONTINUE +* +* ISPEC = 3: crossover point +* + NX = 0 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NX = 128 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1:1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. + $ C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. + $ C4.EQ.'BR' ) THEN + NX = 128 + END IF + END IF + END IF + ILAENV = NX + RETURN +* + 400 CONTINUE +* +* ISPEC = 4: number of shifts (used by xHSEQR) +* + ILAENV = 6 + RETURN +* + 500 CONTINUE +* +* ISPEC = 5: minimum column dimension (not used) +* + ILAENV = 2 + RETURN +* + 600 CONTINUE +* +* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) +* + ILAENV = INT( REAL( MIN( N1, N2 ) )*1.6E0 ) + RETURN +* + 700 CONTINUE +* +* ISPEC = 7: number of processors (not used) +* + ILAENV = 1 + RETURN +* + 800 CONTINUE +* +* ISPEC = 8: crossover point for multishift (used by xHSEQR) +* + ILAENV = 50 + RETURN +* + 900 CONTINUE +* +* ISPEC = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* + ILAENV = 25 + RETURN +* + 1000 CONTINUE +* +* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap +* + ILAENV = 1 + IF (ILAENV .EQ. 1) THEN + ILAENV = IEEECK( 0, 0.0, 1.0 ) + ENDIF + RETURN +* + 1100 CONTINUE +* +* ISPEC = 11: infinity arithmetic can be trusted not to trap +* + ILAENV = 1 + IF (ILAENV .EQ. 1) THEN + ILAENV = IEEECK( 1, 0.0, 1.0 ) + ENDIF + RETURN +* +* End of ILAENV +* + END + INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. +* November 2006 +* +* .. Scalar Arguments .. + INTEGER ISPEC + REAL ZERO, ONE +* .. +* +* Purpose +* ======= +* +* IEEECK is called from the ILAENV to verify that Inifinity and +* possibly NaN arithmetic is safe (i.e. will not trap). +* +* Arguments: +* ========== +* +* ISPEC (input) INTEGER +* Specifies whether to test just for inifinity arithmetic +* or whether to test for infinity and NaN arithmetic. +* = 0: Verify infinity arithmetic only. +* = 1: Verify infinity and NaN arithmetic. +* +* ZERO (input) REAL +* Must contain the value 0.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* ONE (input) REAL +* Must contain the value 1.0 +* This is passed to prevent the compiler from optimizing +* away this code. +* +* RETURN VALUE: INTEGER +* = 0: Arithmetic failed to produce the correct answers +* = 1: Arithmetic produced the correct answers +* +* .. Local Scalars .. + REAL POSINF, NEGINF, NAN1, NAN2, NAN3, NAN4, NAN5, NAN6, NEGZRO, + $ NEWZRO +* .. +* .. Executable Statements .. + IEEECK = 1 + + POSINF = ONE /ZERO + IF ( POSINF .LE. ONE ) THEN + IEEECK = 0 + RETURN + ENDIF + + NEGINF = -ONE / ZERO + IF ( NEGINF .GE. ZERO ) THEN + IEEECK = 0 + RETURN + ENDIF + + NEGZRO = ONE / ( NEGINF + ONE ) + IF ( NEGZRO .NE. ZERO ) THEN + IEEECK = 0 + RETURN + ENDIF + + NEGINF = ONE / NEGZRO + IF ( NEGINF .GE. ZERO ) THEN + IEEECK = 0 + RETURN + ENDIF + + NEWZRO = NEGZRO + ZERO + IF ( NEWZRO .NE. ZERO ) THEN + IEEECK = 0 + RETURN + ENDIF + + POSINF = ONE / NEWZRO + IF ( POSINF .LE. ONE ) THEN + IEEECK = 0 + RETURN + ENDIF + + NEGINF = NEGINF * POSINF + IF ( NEGINF .GE. ZERO ) THEN + IEEECK = 0 + RETURN + ENDIF + + POSINF = POSINF * POSINF + IF ( POSINF .LE. ONE ) THEN + IEEECK = 0 + RETURN + ENDIF + + + +* +* Return if we were only asked to check infinity arithmetic +* + IF (ISPEC .EQ. 0 ) RETURN + + NAN1 = POSINF + NEGINF + + NAN2 = POSINF / NEGINF + + NAN3 = POSINF / POSINF + + NAN4 = POSINF * ZERO + + NAN5 = NEGINF * NEGZRO + + NAN6 = NAN5 * 0.0 + + IF ( NAN1 .EQ. NAN1 ) THEN + IEEECK = 0 + RETURN + ENDIF + + IF ( NAN2 .EQ. NAN2 ) THEN + IEEECK = 0 + RETURN + ENDIF + + IF ( NAN3 .EQ. NAN3 ) THEN + IEEECK = 0 + RETURN + ENDIF + + IF ( NAN4 .EQ. NAN4 ) THEN + IEEECK = 0 + RETURN + ENDIF + + IF ( NAN5 .EQ. NAN5 ) THEN + IEEECK = 0 + RETURN + ENDIF + + IF ( NAN6 .EQ. NAN6 ) THEN + IEEECK = 0 + RETURN + ENDIF + + RETURN + END diff --git a/dspl/liblapack/SRC/.gitignore b/dspl/liblapack/SRC/.gitignore new file mode 100644 index 0000000..819556b --- /dev/null +++ b/dspl/liblapack/SRC/.gitignore @@ -0,0 +1,9 @@ +*.o +*.so +*.dll +*.exe +*.txt +*.dat +*.bin +*.csv +*.a \ No newline at end of file diff --git a/dspl/liblapack/SRC/DEPRECATED/cgegs.f b/dspl/liblapack/SRC/DEPRECATED/cgegs.f new file mode 100644 index 0000000..4e75e20 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cgegs.f @@ -0,0 +1,531 @@ +*> \brief CGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, +* VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGGES. +*> +*> CGEGS computes the eigenvalues, Schur form, and, optionally, the +*> left and or/right Schur vectors of a complex matrix pair (A,B). +*> Given two square matrices A and B, the generalized Schur +*> factorization has the form +*> +*> A = Q*S*Z**H, B = Q*T*Z**H +*> +*> where Q and Z are unitary matrices and S and T are upper triangular. +*> The columns of Q are the left Schur vectors +*> and the columns of Z are the right Schur vectors. +*> +*> If only the eigenvalues of (A,B) are needed, the driver routine +*> CGEGV should be used instead. See CGEGV for a description of the +*> eigenvalues of the generalized nonsymmetric eigenvalue problem +*> (GNEP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors (returned in VSL). +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors (returned in VSR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the matrix A. +*> On exit, the upper triangular matrix S from the generalized +*> Schur factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the matrix B. +*> On exit, the upper triangular matrix T from the generalized +*> Schur factorization. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. ALPHA(j) = S(j,j), the diagonal element of the Schur +*> form of A. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> The non-negative real scalars beta that define the +*> eigenvalues of GNEP. BETA(j) = T(j,j), the diagonal element +*> of the triangular factor T. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX array, dimension (LDVSL,N) +*> If JOBVSL = 'V', the matrix of left Schur vectors Q. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX array, dimension (LDVSR,N) +*> If JOBVSR = 'V', the matrix of right Schur vectors Z. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for CGEQRF, CUNMQR, and CUNGQR.) Then compute: +*> NB -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR; +*> the optimal LWORK is N*(NB+1). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from CGGBAL +*> =N+2: error return from CGEQRF +*> =N+3: error return from CUNMQR +*> =N+4: error return from CUNGQR +*> =N+5: error return from CGGHRD +*> =N+6: error return from CHGEQZ (other than failed +*> iteration) +*> =N+7: error return from CGGBAK (computing VSL) +*> =N+8: error return from CGGBAK (computing VSR) +*> =N+9: error return from CLASCL (various places) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWORK, ITAU, IWORK, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CUNGQR, CUNMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = N*(NB+1) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL CLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + IWORK = 1 + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWORK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL CLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL CLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGEGS +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/cgegv.f b/dspl/liblapack/SRC/DEPRECATED/cgegv.f new file mode 100644 index 0000000..81def0d --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cgegv.f @@ -0,0 +1,706 @@ +*> \brief CGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGGEV. +*> +*> CGEGV computes the eigenvalues and, optionally, the left and/or right +*> eigenvectors of a complex matrix pair (A,B). +*> Given two square matrices A and B, +*> the generalized nonsymmetric eigenvalue problem (GNEP) is to find the +*> eigenvalues lambda and corresponding (non-zero) eigenvectors x such +*> that +*> A*x = lambda*B*x. +*> +*> An alternate form is to find the eigenvalues mu and corresponding +*> eigenvectors y such that +*> mu*A*y = B*y. +*> +*> These two forms are equivalent with mu = 1/lambda and x = y if +*> neither lambda nor mu is zero. In order to deal with the case that +*> lambda or mu is zero or small, two values alpha and beta are returned +*> for each eigenvalue, such that lambda = alpha/beta and +*> mu = beta/alpha. +*> +*> The vectors x and y in the above equations are right eigenvectors of +*> the matrix pair (A,B). Vectors u and v satisfying +*> u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B +*> are left eigenvectors of (A,B). +*> +*> Note: this routine performs "full balancing" on A and B +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors (returned +*> in VL). +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors (returned +*> in VR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the matrix A. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit A +*> contains the Schur form of A from the generalized Schur +*> factorization of the pair (A,B) after balancing. If no +*> eigenvectors were computed, then only the diagonal elements +*> of the Schur form will be correct. See CGGHRD and CHGEQZ +*> for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the matrix B. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the +*> upper triangular matrix obtained from B in the generalized +*> Schur factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only the diagonal +*> elements of B will be correct. See CGGHRD and CHGEQZ for +*> details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> The complex scalars beta that define the eigenvalues of GNEP. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored +*> in the columns of VL, in the same order as their eigenvalues. +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors x(j) are stored +*> in the columns of VR, in the same order as their eigenvalues. +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for CGEQRF, CUNMQR, and CUNGQR.) Then compute: +*> NB -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR; +*> The optimal LWORK is MAX( 2*N, N*(NB+1) ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from CGGBAL +*> =N+2: error return from CGEQRF +*> =N+3: error return from CUNMQR +*> =N+4: error return from CUNGQR +*> =N+5: error return from CGGHRD +*> =N+6: error return from CHGEQZ (other than failed +*> iteration) +*> =N+7: error return from CTGEVC +*> =N+8: error return from CGGBAK (computing VL) +*> =N+9: error return from CGGBAK (computing VR) +*> =N+10: error return from CLASCL (various calls) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing +*> --------- +*> +*> This driver calls CGGBAL to both permute and scale rows and columns +*> of A and B. The permutations PL and PR are chosen so that PL*A*PR +*> and PL*B*R will be upper triangular except for the diagonal blocks +*> A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +*> possible. The diagonal scaling matrices DL and DR are chosen so +*> that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +*> one (except for the elements that start out zero.) +*> +*> After the eigenvalues and eigenvectors of the balanced matrices +*> have been computed, CGGBAK transforms the eigenvectors back to what +*> they would have been (in perfect arithmetic) if they had not been +*> balanced. +*> +*> Contents of A and B on Exit +*> -------- -- - --- - -- ---- +*> +*> If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +*> both), then on exit the arrays A and B will contain the complex Schur +*> form[*] of the "balanced" versions of A and B. If no eigenvectors +*> are computed, then only the diagonal blocks will be correct. +*> +*> [*] In other words, upper triangular form. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI, + $ SALFAR, SBETA, SCALE, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = MAX( 2*N, N*(NB+1) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL CLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL CLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Also "balance" the matrix. +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 80 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWORK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 80 + END IF +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 80 + END IF +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 80 + END IF + END IF +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 80 + END IF +* +* Perform QZ algorithm +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 80 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 80 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 80 + END IF + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 80 + END IF + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 70 JC = 1, N + ABSAR = ABS( REAL( ALPHA( JC ) ) ) + ABSAI = ABS( AIMAG( ALPHA( JC ) ) ) + ABSB = ABS( REAL( BETA( JC ) ) ) + SALFAR = ANRM*REAL( ALPHA( JC ) ) + SALFAI = ANRM*AIMAG( ALPHA( JC ) ) + SBETA = BNRM*REAL( BETA( JC ) ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in imaginary part of ALPHA +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI ) + END IF +* +* Check for significant underflow in real part of ALPHA +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) / + $ MAX( SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) / + $ MAX( SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHA, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*REAL( ALPHA( JC ) ) )*ANRM + SALFAI = ( SCALE*AIMAG( ALPHA( JC ) ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHA( JC ) = CMPLX( SALFAR, SALFAI ) + BETA( JC ) = SBETA + 70 CONTINUE +* + 80 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGEGV +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/cgelsx.f b/dspl/liblapack/SRC/DEPRECATED/cgelsx.f new file mode 100644 index 0000000..22d2358 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cgelsx.f @@ -0,0 +1,447 @@ +*> \brief CGELSX solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGELSY. +*> +*> CGELSX computes the minimum-norm solution to a complex linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by unitary transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**H [ inv(T11)*Q1**H*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of elements N+1:M in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is an +*> initial column, otherwise it is a free column. Before +*> the QR factorization of A, all initial columns are +*> permuted to the leading positions; only the remaining +*> free columns are moved as a result of column pivoting +*> during the factorization. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (min(M,N) + max( N, 2*min(M,N)+NRHS )), +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, DONE = ZERO, + $ NTDONE = ONE ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + REAL ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM + COMPLEX C1, C2, S1, S2, T1, T2 +* .. +* .. External Subroutines .. + EXTERNAL CGEQPF, CLAIC1, CLASCL, CLASET, CLATZM, CTRSM, + $ CTZRQF, CUNM2R, SLABAD, XERBLA +* .. +* .. External Functions .. + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL CGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK, + $ INFO ) +* +* complex workspace MN+N. Real workspace 2*N. Details of Householder +* rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL CTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL CUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**H * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL CLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ CONJG( WORK( MN+I ) ), B( I, 1 ), + $ B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of CGELSX +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/cgeqpf.f b/dspl/liblapack/SRC/DEPRECATED/cgeqpf.f new file mode 100644 index 0000000..2cbd195 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cgeqpf.f @@ -0,0 +1,313 @@ +*> \brief \b CGEQPF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQPF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGEQP3. +*> +*> CGEQPF computes a QR factorization with column pivoting of a +*> complex M-by-N matrix A: A*P = Q*R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper triangular matrix R; the elements +*> below the diagonal, together with the array TAU, +*> represent the unitary matrix Q as a product of +*> min(m,n) elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(n) +*> +*> Each H(i) has the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +*> +*> The matrix P is represented in jpvt as follows: If +*> jpvt(j) = i +*> then the jth column of P is the ith canonical unit vector. +*> +*> Partial column norm updating strategy modified by +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +*> -- April 2011 -- +*> For more details see LAPACK Working Note 176. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL TEMP, TEMP2, TOL3Z + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARF, CLARFG, CSWAP, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL CSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL CGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL CUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A, + $ LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + RWORK( I ) = SCNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + RWORK( N+I ) = RWORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, RWORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + RWORK( PVT ) = RWORK( I ) + RWORK( N+PVT ) = RWORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + AII = A( I, I ) + CALL CLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + A( I, I ) = AII +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = CMPLX( ONE ) + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( RWORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / RWORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + RWORK( J ) = SCNRM2( M-I, A( I+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + ELSE + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + END IF + ELSE + RWORK( J ) = RWORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of CGEQPF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/cggsvd.f b/dspl/liblapack/SRC/DEPRECATED/cggsvd.f new file mode 100644 index 0000000..e7ece14 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cggsvd.f @@ -0,0 +1,466 @@ +*> \brief CGGSVD computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGSVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL ALPHA( * ), BETA( * ), RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGGSVD3. +*> +*> CGGSVD computes the generalized singular value decomposition (GSVD) +*> of an M-by-N complex matrix A and P-by-N complex matrix B: +*> +*> U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are unitary matrices. +*> Let K+L = the effective numerical rank of the +*> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper +*> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +*> matrices and of the following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the unitary +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**H. +*> If ( A**H,B**H)**H has orthnormal columns, then the GSVD of A and B is also +*> equal to the CS decomposition of A and B. Furthermore, the GSVD can +*> be used to derive the solution of the eigenvalue problem: +*> A**H*A x = lambda* B**H*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, and D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains part of the triangular matrix R if +*> M-K-L < 0. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(3*N,M,P)+N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine CTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA REAL +*> TOLB REAL +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**H,B**H)**H. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE CGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + EXTERNAL LSAME, CLANGE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGGSVP, CTGSJA, SCOPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = CLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = CLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL SCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of CGGSVD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/cggsvp.f b/dspl/liblapack/SRC/DEPRECATED/cggsvp.f new file mode 100644 index 0000000..24bb09a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/cggsvp.f @@ -0,0 +1,536 @@ +*> \brief \b CGGSVP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGSVP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, RWORK, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CGGSVP3. +*> +*> CGGSVP computes unitary matrices U, V and Q such that +*> +*> N-K-L K L +*> U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**H*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> CGGSVD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,M) +*> If JOBU = 'U', U contains the unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,P) +*> If JOBV = 'V', V contains the unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(3*N,M,P)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> The subroutine uses LAPACK subroutine CGEQPF for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +* ===================================================================== + SUBROUTINE CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J + COMPLEX T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEQPF, CGEQR2, CGERQ2, CLACPY, CLAPMT, CLASET, + $ CUNG2R, CUNM2R, CUNMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL CGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL CLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( CABS1( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL CLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL CLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL CUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL CLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL CLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL CGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL CLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**H +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL CGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( CABS1( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL CUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL CLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL CLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL CUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL CLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL CLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL CGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL CLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL CGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL CUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of CGGSVP +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/clahrd.f b/dspl/liblapack/SRC/DEPRECATED/clahrd.f new file mode 100644 index 0000000..7be3f2a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/clahrd.f @@ -0,0 +1,292 @@ +*> \brief \b CLAHRD reduces the first nb columns of a general rectangular matrix A so that elements below the k-th subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CLAHR2. +*> +*> CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by a unitary similarity transformation +*> Q**H * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**H, and also the matrix Y = A * V * T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**H) * (A - Y*V**H). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a h a a a ) +*> ( a h a a a ) +*> ( a h a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CLACGV, CLARFG, CSCAL, + $ CTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V**H +* + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T**H * V**H to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**H * b1 +* + CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) +* +* w := w + V2**H *b2 +* + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE, + $ T( 1, NB ), 1 ) +* +* w := T**H *w +* + CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1, + $ T, LDT, T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL CGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL CTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + EI = A( K+I, I ) + CALL CLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL CGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ), + $ 1 ) + CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL CSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of CLAHRD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/clatzm.f b/dspl/liblapack/SRC/DEPRECATED/clatzm.f new file mode 100644 index 0000000..377eef7 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/clatzm.f @@ -0,0 +1,225 @@ +*> \brief \b CLATZM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATZM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CUNMRZ. +*> +*> CLATZM applies a Householder matrix generated by CTZRQF to a matrix. +*> +*> Let P = I - tau*u*u**H, u = ( 1 ), +*> ( v ) +*> where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +*> SIDE = 'R'. +*> +*> If SIDE equals 'L', let +*> C = [ C1 ] 1 +*> [ C2 ] m-1 +*> n +*> Then C is overwritten by P*C. +*> +*> If SIDE equals 'R', let +*> C = [ C1, C2 ] m +*> 1 n-1 +*> Then C is overwritten by C*P. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form P * C +*> = 'R': form C * P +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of P. V is not used +*> if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0 +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of P. +*> \endverbatim +*> +*> \param[in,out] C1 +*> \verbatim +*> C1 is COMPLEX array, dimension +*> (LDC,N) if SIDE = 'L' +*> (M,1) if SIDE = 'R' +*> On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +*> if SIDE = 'R'. +*> +*> On exit, the first row of P*C if SIDE = 'L', or the first +*> column of C*P if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C2 +*> \verbatim +*> C2 is COMPLEX array, dimension +*> (LDC, N) if SIDE = 'L' +*> (LDC, N-1) if SIDE = 'R' +*> On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +*> m x (n - 1) matrix C2 if SIDE = 'R'. +*> +*> On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +*> if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the arrays C1 and C2. +*> LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := ( C1 + v**H * C2 )**H +* + CALL CCOPY( N, C1, LDC, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) + CALL CGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w**H +* [ C2 ] [ C2 ] [ v ] +* + CALL CLACGV( N, WORK, 1 ) + CALL CAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL CGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL CCOPY( M, C1, 1, WORK, 1 ) + CALL CGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v**H] +* + CALL CAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL CGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of CLATZM +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/ctzrqf.f b/dspl/liblapack/SRC/DEPRECATED/ctzrqf.f new file mode 100644 index 0000000..5ed47d7 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/ctzrqf.f @@ -0,0 +1,241 @@ +*> \brief \b CTZRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTZRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTZRQF( M, N, A, LDA, TAU, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine CTZRZF. +*> +*> CTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +*> to upper triangular form by means of unitary transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N unitary matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), whose conjugate transpose is used to +*> introduce zeros into the (m - k + 1)th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**H, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an ( n - m ) element vector. +*> tau and z( k ) are chosen to annihilate the elements of the kth row +*> of X. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A, such that the elements of z( k ) are +*> in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 + COMPLEX ALPHA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CLACGV, CLARFG, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = CZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + A( K, K ) = CONJG( A( K, K ) ) + CALL CLACGV( N-M, A( K, M1 ), LDA ) + ALPHA = A( K, K ) + CALL CLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) ) + A( K, K ) = ALPHA + TAU( K ) = CONJG( TAU( K ) ) +* + IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN +* +* We now perform the operation A := A*P( k )**H. +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL CCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL CGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, CONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - conjg(tau)*w +* and B := B - conjg(tau)*w*z( k )**H. +* + CALL CAXPY( K-1, -CONJG( TAU( K ) ), TAU, 1, A( 1, K ), + $ 1 ) + CALL CGERC( K-1, N-M, -CONJG( TAU( K ) ), TAU, 1, + $ A( K, M1 ), LDA, A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CTZRQF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dgegs.f b/dspl/liblapack/SRC/DEPRECATED/dgegs.f new file mode 100644 index 0000000..eb40448 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dgegs.f @@ -0,0 +1,541 @@ +*> \brief DGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, +* ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGGES. +*> +*> DGEGS computes the eigenvalues, real Schur form, and, optionally, +*> left and or/right Schur vectors of a real matrix pair (A,B). +*> Given two square matrices A and B, the generalized real Schur +*> factorization has the form +*> +*> A = Q*S*Z**T, B = Q*T*Z**T +*> +*> where Q and Z are orthogonal matrices, T is upper triangular, and S +*> is an upper quasi-triangular matrix with 1-by-1 and 2-by-2 diagonal +*> blocks, the 2-by-2 blocks corresponding to complex conjugate pairs +*> of eigenvalues of (A,B). The columns of Q are the left Schur vectors +*> and the columns of Z are the right Schur vectors. +*> +*> If only the eigenvalues of (A,B) are needed, the driver routine +*> DGEGV should be used instead. See DGEGV for a description of the +*> eigenvalues of the generalized nonsymmetric eigenvalue problem +*> (GNEP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors (returned in VSL). +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors (returned in VSR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the matrix A. +*> On exit, the upper quasi-triangular matrix S from the +*> generalized real Schur factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the matrix B. +*> On exit, the upper triangular matrix T from the generalized +*> real Schur factorization. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue +*> of GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th +*> eigenvalue is real; if positive, then the j-th and (j+1)-st +*> eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is DOUBLE PRECISION array, dimension (LDVSL,N) +*> If JOBVSL = 'V', the matrix of left Schur vectors Q. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is DOUBLE PRECISION array, dimension (LDVSR,N) +*> If JOBVSR = 'V', the matrix of right Schur vectors Z. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,4*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: +*> NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR +*> The optimal LWORK is 2*N + N*(NB+1). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from DGGBAL +*> =N+2: error return from DGEQRF +*> =N+3: error return from DORMQR +*> =N+4: error return from DORGQR +*> =N+5: error return from DGGHRD +*> =N+6: error return from DHGEQZ (other than failed +*> iteration) +*> =N+7: error return from DGGBAK (computing VSL) +*> =N+8: error return from DGGBAK (computing VSR) +*> =N+9: error return from DLASCL (various places) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN, + $ LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 4*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + N*( NB+1 ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (2*N words -- "work..." not actually used) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL DLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGEGS +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dgegv.f b/dspl/liblapack/SRC/DEPRECATED/dgegv.f new file mode 100644 index 0000000..e4d469a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dgegv.f @@ -0,0 +1,769 @@ +*> \brief DGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, +* BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGGEV. +*> +*> DGEGV computes the eigenvalues and, optionally, the left and/or right +*> eigenvectors of a real matrix pair (A,B). +*> Given two square matrices A and B, +*> the generalized nonsymmetric eigenvalue problem (GNEP) is to find the +*> eigenvalues lambda and corresponding (non-zero) eigenvectors x such +*> that +*> +*> A*x = lambda*B*x. +*> +*> An alternate form is to find the eigenvalues mu and corresponding +*> eigenvectors y such that +*> +*> mu*A*y = B*y. +*> +*> These two forms are equivalent with mu = 1/lambda and x = y if +*> neither lambda nor mu is zero. In order to deal with the case that +*> lambda or mu is zero or small, two values alpha and beta are returned +*> for each eigenvalue, such that lambda = alpha/beta and +*> mu = beta/alpha. +*> +*> The vectors x and y in the above equations are right eigenvectors of +*> the matrix pair (A,B). Vectors u and v satisfying +*> +*> u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B +*> +*> are left eigenvectors of (A,B). +*> +*> Note: this routine performs "full balancing" on A and B +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors (returned +*> in VL). +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors (returned +*> in VR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the matrix A. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit A +*> contains the real Schur form of A from the generalized Schur +*> factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only the diagonal +*> blocks from the Schur form will be correct. See DGGHRD and +*> DHGEQZ for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the matrix B. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the +*> upper triangular matrix obtained from B in the generalized +*> Schur factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only those elements of +*> B corresponding to the diagonal blocks from the Schur form of +*> A will be correct. See DGGHRD and DHGEQZ for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue of +*> GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th +*> eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored +*> in the columns of VL, in the same order as their eigenvalues. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j). +*> If the j-th and (j+1)-st eigenvalues form a complex conjugate +*> pair, then +*> u(j) = VL(:,j) + i*VL(:,j+1) +*> and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors x(j) are stored +*> in the columns of VR, in the same order as their eigenvalues. +*> If the j-th eigenvalue is real, then x(j) = VR(:,j). +*> If the j-th and (j+1)-st eigenvalues form a complex conjugate +*> pair, then +*> x(j) = VR(:,j) + i*VR(:,j+1) +*> and +*> x(j+1) = VR(:,j) - i*VR(:,j+1). +*> +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvalues +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,8*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for DGEQRF, DORMQR, and DORGQR.) Then compute: +*> NB -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR; +*> The optimal LWORK is: +*> 2*N + MAX( 6*N, N*(NB+1) ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from DGGBAL +*> =N+2: error return from DGEQRF +*> =N+3: error return from DORMQR +*> =N+4: error return from DORGQR +*> =N+5: error return from DGGHRD +*> =N+6: error return from DHGEQZ (other than failed +*> iteration) +*> =N+7: error return from DTGEVC +*> =N+8: error return from DGGBAK (computing VL) +*> =N+9: error return from DGGBAK (computing VR) +*> =N+10: error return from DLASCL (various calls) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing +*> --------- +*> +*> This driver calls DGGBAL to both permute and scale rows and columns +*> of A and B. The permutations PL and PR are chosen so that PL*A*PR +*> and PL*B*R will be upper triangular except for the diagonal blocks +*> A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +*> possible. The diagonal scaling matrices DL and DR are chosen so +*> that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +*> one (except for the elements that start out zero.) +*> +*> After the eigenvalues and eigenvectors of the balanced matrices +*> have been computed, DGGBAK transforms the eigenvectors back to what +*> they would have been (in perfect arithmetic) if they had not been +*> balanced. +*> +*> Contents of A and B on Exit +*> -------- -- - --- - -- ---- +*> +*> If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +*> both), then on exit the arrays A and B will contain the real Schur +*> form[*] of the "balanced" versions of A and B. If no eigenvectors +*> are computed, then only the diagonal blocks will be correct. +*> +*> [*] See DHGEQZ, DGEGS, or read the book "Matrix Computations", +*> by Golub & van Loan, pub. by Johns Hopkins U. Press. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN, + $ SALFAI, SALFAR, SBETA, SCALE, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY, + $ DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 8*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + MAX( 6*N, N*( NB+1 ) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN + ONEPLS = ONE + ( 4*EPS ) +* +* Scale A +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL DLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL DLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (8*N words -- "work" requires 6*N words) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 120 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWORK + IWORK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 120 + END IF +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 120 + END IF +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 120 + END IF + END IF +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 120 + END IF +* +* Perform QZ algorithm +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 120 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors (DTGEVC requires 6*N words of workspace) +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 120 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 120 + END IF + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 120 + END IF + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 110 JC = 1, N + ABSAR = ABS( ALPHAR( JC ) ) + ABSAI = ABS( ALPHAI( JC ) ) + ABSB = ABS( BETA( JC ) ) + SALFAR = ANRM*ALPHAR( JC ) + SALFAI = ANRM*ALPHAI( JC ) + SBETA = BNRM*BETA( JC ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in ALPHAI +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAI ) +* + ELSE IF( SALFAI.EQ.ZERO ) THEN +* +* If insignificant underflow in ALPHAI, then make the +* conjugate eigenvalue real. +* + IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN + ALPHAI( JC-1 ) = ZERO + ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN + ALPHAI( JC+1 ) = ZERO + END IF + END IF +* +* Check for significant underflow in ALPHAR +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) / + $ MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM + SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHAR( JC ) = SALFAR + ALPHAI( JC ) = SALFAI + BETA( JC ) = SBETA + 110 CONTINUE +* + 120 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGEGV +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dgelsx.f b/dspl/liblapack/SRC/DEPRECATED/dgelsx.f new file mode 100644 index 0000000..70e5ce7 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dgelsx.f @@ -0,0 +1,435 @@ +*> \brief DGELSX solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGELSY. +*> +*> DGELSX computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by orthogonal transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**T [ inv(T11)*Q1**T*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of elements N+1:M in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is an +*> initial column, otherwise it is a free column. Before +*> the QR factorization of A, all initial columns are +*> permuted to the leading positions; only the remaining +*> free columns are moved as a result of column pivoting +*> during the factorization. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, DONE = ZERO, + $ NTDONE = ONE ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGEQPF, DLAIC1, DLASCL, DLASET, DLATZM, DORM2R, + $ DTRSM, DTZRQF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL DGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO ) +* +* workspace 3*N. Details of Householder rotations stored +* in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL DTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL DORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL DLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB, + $ WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of DGELSX +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dgeqpf.f b/dspl/liblapack/SRC/DEPRECATED/dgeqpf.f new file mode 100644 index 0000000..58ef560 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dgeqpf.f @@ -0,0 +1,306 @@ +*> \brief \b DGEQPF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQPF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGEQP3. +*> +*> DGEQPF computes a QR factorization with column pivoting of a +*> real M-by-N matrix A: A*P = Q*R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper triangular matrix R; the elements +*> below the diagonal, together with the array TAU, +*> represent the orthogonal matrix Q as a product of +*> min(m,n) elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(n) +*> +*> Each H(i) has the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +*> +*> The matrix P is represented in jpvt as follows: If +*> jpvt(j) = i +*> then the jth column of P is the ith canonical unit vector. +*> +*> Partial column norm updating strategy modified by +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +*> -- April 2011 -- +*> For more details see LAPACK Working Note 176. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + DOUBLE PRECISION AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARF, DLARFG, DORM2R, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL IDAMAX, DLAMCH, DNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL DSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL DGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL DORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU, + $ A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + WORK( I ) = DNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + WORK( N+I ) = WORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + IDAMAX( N-I+1, WORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + WORK( PVT ) = WORK( I ) + WORK( N+PVT ) = WORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + IF( I.LT.M ) THEN + CALL DLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) ) + ELSE + CALL DLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK( 2*N+1 ) ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( WORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / WORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( WORK( J ) / WORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + WORK( J ) = DNRM2( M-I, A( I+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + ELSE + WORK( J ) = ZERO + WORK( N+J ) = ZERO + END IF + ELSE + WORK( J ) = WORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of DGEQPF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dggsvd.f b/dspl/liblapack/SRC/DEPRECATED/dggsvd.f new file mode 100644 index 0000000..32d232c --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dggsvd.f @@ -0,0 +1,464 @@ +*> \brief DGGSVD computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGSVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGGSVD3. +*> +*> DGGSVD computes the generalized singular value decomposition (GSVD) +*> of an M-by-N real matrix A and P-by-N real matrix B: +*> +*> U**T*A*Q = D1*( 0 R ), V**T*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are orthogonal matrices. +*> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, +*> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +*> D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +*> following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the orthogonal +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**T. +*> If ( A**T,B**T)**T has orthonormal columns, then the GSVD of A and B is +*> also equal to the CS decomposition of A and B. Furthermore, the GSVD +*> can be used to derive the solution of the eigenvalue problem: +*> A**T*A x = lambda* B**T*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**T*A*X = ( 0 D1 ), V**T*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix R if M-K-L < 0. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (max(3*N,M,P)+N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine DTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA DOUBLE PRECISION +*> TOLB DOUBLE PRECISION +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A',B')**T. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MAZHEPS, +*> TOLB = MAX(P,N)*norm(B)*MAZHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGGSVP, DTGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = DLANGE( '1', M, N, A, LDA, WORK ) + BNORM = DLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL DCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of DGGSVD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dggsvp.f b/dspl/liblapack/SRC/DEPRECATED/dggsvp.f new file mode 100644 index 0000000..1cc52b7 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dggsvp.f @@ -0,0 +1,522 @@ +*> \brief \b DGGSVP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGSVP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DGGSVP3. +*> +*> DGGSVP computes orthogonal matrices U, V and Q such that +*> +*> N-K-L K L +*> U**T*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**T*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> DGGSVD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,M) +*> If JOBU = 'U', U contains the orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,P) +*> If JOBV = 'V', V contains the orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(3*N,M,P)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> The subroutine uses LAPACK subroutine DGEQPF for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +* ===================================================================== + SUBROUTINE DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEQPF, DGEQR2, DGERQ2, DLACPY, DLAPMT, DLASET, + $ DORG2R, DORM2R, DORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL DGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO ) +* +* Update A := A*P +* + CALL DLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL DLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL DLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL DORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL DLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL DLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL DGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**T +* + CALL DORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z**T +* + CALL DORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL DLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**T +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL DGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL DORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL DLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL DLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL DORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL DLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL DLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL DGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T +* + CALL DORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL DLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL DGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL DORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of DGGSVP +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dlahrd.f b/dspl/liblapack/SRC/DEPRECATED/dlahrd.f new file mode 100644 index 0000000..9aa04ee --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dlahrd.f @@ -0,0 +1,286 @@ +*> \brief \b DLAHRD reduces the first nb columns of a general rectangular matrix A so that elements below the k-th subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DLAHR2. +*> +*> DLAHRD reduces the first NB columns of a real general n-by-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an orthogonal similarity transformation +*> Q**T * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**T) * (A - Y*V**T). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a h a a a ) +*> ( a h a a a ) +*> ( a h a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DLARFG, DSCAL, DTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V**T +* + CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) +* +* Apply I - V * T**T * V**T to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**T * b1 +* + CALL DCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL DTRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**T *b2 +* + CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**T *w +* + CALL DTRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL DGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL DTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL DAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + CALL DLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL DGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL DSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL DSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of DLAHRD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dlatzm.f b/dspl/liblapack/SRC/DEPRECATED/dlatzm.f new file mode 100644 index 0000000..73434a2 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dlatzm.f @@ -0,0 +1,221 @@ +*> \brief \b DLATZM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATZM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DORMRZ. +*> +*> DLATZM applies a Householder matrix generated by DTZRQF to a matrix. +*> +*> Let P = I - tau*u*u**T, u = ( 1 ), +*> ( v ) +*> where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +*> SIDE = 'R'. +*> +*> If SIDE equals 'L', let +*> C = [ C1 ] 1 +*> [ C2 ] m-1 +*> n +*> Then C is overwritten by P*C. +*> +*> If SIDE equals 'R', let +*> C = [ C1, C2 ] m +*> 1 n-1 +*> Then C is overwritten by C*P. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form P * C +*> = 'R': form C * P +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of P. V is not used +*> if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0 +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of P. +*> \endverbatim +*> +*> \param[in,out] C1 +*> \verbatim +*> C1 is DOUBLE PRECISION array, dimension +*> (LDC,N) if SIDE = 'L' +*> (M,1) if SIDE = 'R' +*> On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +*> if SIDE = 'R'. +*> +*> On exit, the first row of P*C if SIDE = 'L', or the first +*> column of C*P if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C2 +*> \verbatim +*> C2 is DOUBLE PRECISION array, dimension +*> (LDC, N) if SIDE = 'L' +*> (LDC, N-1) if SIDE = 'R' +*> On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +*> m x (n - 1) matrix C2 if SIDE = 'R'. +*> +*> On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +*> if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the arrays C1 and C2. LDC >= (1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := (C1 + v**T * C2)**T +* + CALL DCOPY( N, C1, LDC, WORK, 1 ) + CALL DGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w**T +* [ C2 ] [ C2 ] [ v ] +* + CALL DAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL DGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL DCOPY( M, C1, 1, WORK, 1 ) + CALL DGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v**T] +* + CALL DAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL DGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of DLATZM +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/dtzrqf.f b/dspl/liblapack/SRC/DEPRECATED/dtzrqf.f new file mode 100644 index 0000000..c1cbdc6 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/dtzrqf.f @@ -0,0 +1,232 @@ +*> \brief \b DTZRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTZRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTZRQF( M, N, A, LDA, TAU, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine DTZRZF. +*> +*> DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +*> to upper triangular form by means of orthogonal transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**T, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an ( n - m ) element vector. +*> tau and z( k ) are chosen to annihilate the elements of the kth row +*> of X. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A, such that the elements of z( k ) are +*> in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER, DLARFG, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + CALL DLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) ) +* + IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN +* +* We now perform the operation A := A*P( k ). +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL DCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL DGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, ONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - tau*w +* and B := B - tau*w*z( k )**T. +* + CALL DAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 ) + CALL DGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA, + $ A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DTZRQF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sgegs.f b/dspl/liblapack/SRC/DEPRECATED/sgegs.f new file mode 100644 index 0000000..bf4fbc7 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sgegs.f @@ -0,0 +1,541 @@ +*> \brief SGEGS computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, +* ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGGES. +*> +*> SGEGS computes the eigenvalues, real Schur form, and, optionally, +*> left and or/right Schur vectors of a real matrix pair (A,B). +*> Given two square matrices A and B, the generalized real Schur +*> factorization has the form +*> +*> A = Q*S*Z**T, B = Q*T*Z**T +*> +*> where Q and Z are orthogonal matrices, T is upper triangular, and S +*> is an upper quasi-triangular matrix with 1-by-1 and 2-by-2 diagonal +*> blocks, the 2-by-2 blocks corresponding to complex conjugate pairs +*> of eigenvalues of (A,B). The columns of Q are the left Schur vectors +*> and the columns of Z are the right Schur vectors. +*> +*> If only the eigenvalues of (A,B) are needed, the driver routine +*> SGEGV should be used instead. See SGEGV for a description of the +*> eigenvalues of the generalized nonsymmetric eigenvalue problem +*> (GNEP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors (returned in VSL). +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors (returned in VSR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the matrix A. +*> On exit, the upper quasi-triangular matrix S from the +*> generalized real Schur factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the matrix B. +*> On exit, the upper triangular matrix T from the generalized +*> real Schur factorization. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue +*> of GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th +*> eigenvalue is real; if positive, then the j-th and (j+1)-st +*> eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is REAL array, dimension (LDVSL,N) +*> If JOBVSL = 'V', the matrix of left Schur vectors Q. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is REAL array, dimension (LDVSR,N) +*> If JOBVSR = 'V', the matrix of right Schur vectors Z. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,4*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: +*> NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR +*> The optimal LWORK is 2*N + N*(NB+1). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from SGGBAL +*> =N+2: error return from SGEQRF +*> =N+3: error return from SORMQR +*> =N+4: error return from SORGQR +*> =N+5: error return from SGGHRD +*> =N+6: error return from SHGEQZ (other than failed +*> iteration) +*> =N+7: error return from SGGBAK (computing VSL) +*> =N+8: error return from SGGBAK (computing VSR) +*> =N+9: error return from SLASCL (various places) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN, + $ LWKOPT, NB, NB1, NB2, NB3 + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 4*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N+N*(NB+1) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (2*N words -- "work..." not actually used) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL SLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGEGS +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sgegv.f b/dspl/liblapack/SRC/DEPRECATED/sgegv.f new file mode 100644 index 0000000..9ffda66 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sgegv.f @@ -0,0 +1,769 @@ +*> \brief SGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, +* BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGGEV. +*> +*> SGEGV computes the eigenvalues and, optionally, the left and/or right +*> eigenvectors of a real matrix pair (A,B). +*> Given two square matrices A and B, +*> the generalized nonsymmetric eigenvalue problem (GNEP) is to find the +*> eigenvalues lambda and corresponding (non-zero) eigenvectors x such +*> that +*> +*> A*x = lambda*B*x. +*> +*> An alternate form is to find the eigenvalues mu and corresponding +*> eigenvectors y such that +*> +*> mu*A*y = B*y. +*> +*> These two forms are equivalent with mu = 1/lambda and x = y if +*> neither lambda nor mu is zero. In order to deal with the case that +*> lambda or mu is zero or small, two values alpha and beta are returned +*> for each eigenvalue, such that lambda = alpha/beta and +*> mu = beta/alpha. +*> +*> The vectors x and y in the above equations are right eigenvectors of +*> the matrix pair (A,B). Vectors u and v satisfying +*> +*> u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B +*> +*> are left eigenvectors of (A,B). +*> +*> Note: this routine performs "full balancing" on A and B +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors (returned +*> in VL). +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors (returned +*> in VR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the matrix A. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit A +*> contains the real Schur form of A from the generalized Schur +*> factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only the diagonal +*> blocks from the Schur form will be correct. See SGGHRD and +*> SHGEQZ for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the matrix B. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the +*> upper triangular matrix obtained from B in the generalized +*> Schur factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only those elements of +*> B corresponding to the diagonal blocks from the Schur form of +*> A will be correct. See SGGHRD and SHGEQZ for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue of +*> GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. If ALPHAI(j) is zero, then the j-th +*> eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored +*> in the columns of VL, in the same order as their eigenvalues. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j). +*> If the j-th and (j+1)-st eigenvalues form a complex conjugate +*> pair, then +*> u(j) = VL(:,j) + i*VL(:,j+1) +*> and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors x(j) are stored +*> in the columns of VR, in the same order as their eigenvalues. +*> If the j-th eigenvalue is real, then x(j) = VR(:,j). +*> If the j-th and (j+1)-st eigenvalues form a complex conjugate +*> pair, then +*> x(j) = VR(:,j) + i*VR(:,j+1) +*> and +*> x(j+1) = VR(:,j) - i*VR(:,j+1). +*> +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvalues +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,8*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for SGEQRF, SORMQR, and SORGQR.) Then compute: +*> NB -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR; +*> The optimal LWORK is: +*> 2*N + MAX( 6*N, N*(NB+1) ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from SGGBAL +*> =N+2: error return from SGEQRF +*> =N+3: error return from SORMQR +*> =N+4: error return from SORGQR +*> =N+5: error return from SGGHRD +*> =N+6: error return from SHGEQZ (other than failed +*> iteration) +*> =N+7: error return from STGEVC +*> =N+8: error return from SGGBAK (computing VL) +*> =N+9: error return from SGGBAK (computing VR) +*> =N+10: error return from SLASCL (various calls) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing +*> --------- +*> +*> This driver calls SGGBAL to both permute and scale rows and columns +*> of A and B. The permutations PL and PR are chosen so that PL*A*PR +*> and PL*B*R will be upper triangular except for the diagonal blocks +*> A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +*> possible. The diagonal scaling matrices DL and DR are chosen so +*> that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +*> one (except for the elements that start out zero.) +*> +*> After the eigenvalues and eigenvectors of the balanced matrices +*> have been computed, SGGBAK transforms the eigenvectors back to what +*> they would have been (in perfect arithmetic) if they had not been +*> balanced. +*> +*> Contents of A and B on Exit +*> -------- -- - --- - -- ---- +*> +*> If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +*> both), then on exit the arrays A and B will contain the real Schur +*> form[*] of the "balanced" versions of A and B. If no eigenvectors +*> are computed, then only the diagonal blocks will be correct. +*> +*> [*] See SHGEQZ, SGEGS, or read the book "Matrix Computations", +*> by Golub & van Loan, pub. by Johns Hopkins U. Press. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + REAL ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN, + $ SALFAI, SALFAR, SBETA, SCALE, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY, + $ SLASCL, SLASET, SORGQR, SORMQR, STGEVC, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 8*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = 2*N + MAX( 6*N, N*(NB+1) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SAFMIN = SLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN + ONEPLS = ONE + ( 4*EPS ) +* +* Scale A +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL SLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL SLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Workspace layout: (8*N words -- "work" requires 6*N words) +* left_permutation, right_permutation, work... +* + ILEFT = 1 + IRIGHT = N + 1 + IWORK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 120 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* Workspace layout: ("work..." must have at least N words) +* left_permutation, right_permutation, tau, work... +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWORK + IWORK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 120 + END IF +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 120 + END IF +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 120 + END IF + END IF +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 120 + END IF +* +* Perform QZ algorithm +* Workspace layout: ("work..." must have at least 1 word) +* left_permutation, right_permutation, work... +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 120 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors (STGEVC requires 6*N words of workspace) +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 120 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 120 + END IF + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 120 + END IF + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SAFMIN ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 110 JC = 1, N + ABSAR = ABS( ALPHAR( JC ) ) + ABSAI = ABS( ALPHAI( JC ) ) + ABSB = ABS( BETA( JC ) ) + SALFAR = ANRM*ALPHAR( JC ) + SALFAI = ANRM*ALPHAI( JC ) + SBETA = BNRM*BETA( JC ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in ALPHAI +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAI ) +* + ELSE IF( SALFAI.EQ.ZERO ) THEN +* +* If insignificant underflow in ALPHAI, then make the +* conjugate eigenvalue real. +* + IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN + ALPHAI( JC-1 ) = ZERO + ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN + ALPHAI( JC+1 ) = ZERO + END IF + END IF +* +* Check for significant underflow in ALPHAR +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) / + $ MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) / + $ MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM + SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHAR( JC ) = SALFAR + ALPHAI( JC ) = SALFAI + BETA( JC ) = SBETA + 110 CONTINUE +* + 120 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGEGV +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sgelsx.f b/dspl/liblapack/SRC/DEPRECATED/sgelsx.f new file mode 100644 index 0000000..f45ff0f --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sgelsx.f @@ -0,0 +1,435 @@ +*> \brief SGELSX solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGELSY. +*> +*> SGELSX computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by orthogonal transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**T [ inv(T11)*Q1**T*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of elements N+1:M in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is an +*> initial column, otherwise it is a free column. Before +*> the QR factorization of A, all initial columns are +*> permuted to the leading positions; only the remaining +*> free columns are moved as a result of column pivoting +*> during the factorization. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (max( min(M,N)+3*N, 2*min(M,N)+NRHS )), +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, DONE = ZERO, + $ NTDONE = ONE ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + REAL ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2 +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGEQPF, SLABAD, SLAIC1, SLASCL, SLASET, SLATZM, + $ SORM2R, STRSM, STZRQF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL SGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO ) +* +* workspace 3*N. Details of Householder rotations stored +* in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL STZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL SORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL SLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB, + $ WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of SGELSX +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sgeqpf.f b/dspl/liblapack/SRC/DEPRECATED/sgeqpf.f new file mode 100644 index 0000000..45828d2 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sgeqpf.f @@ -0,0 +1,306 @@ +*> \brief \b SGEQPF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQPF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGEQP3. +*> +*> SGEQPF computes a QR factorization with column pivoting of a +*> real M-by-N matrix A: A*P = Q*R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper triangular matrix R; the elements +*> below the diagonal, together with the array TAU, +*> represent the orthogonal matrix Q as a product of +*> min(m,n) elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(n) +*> +*> Each H(i) has the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +*> +*> The matrix P is represented in jpvt as follows: If +*> jpvt(j) = i +*> then the jth column of P is the ith canonical unit vector. +*> +*> Partial column norm updating strategy modified by +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +*> -- April 2011 -- +*> For more details see LAPACK Working Note 176. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + REAL AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARF, SLARFG, SORM2R, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL SSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL SGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL SORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU, + $ A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + WORK( I ) = SNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + WORK( N+I ) = WORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + ISAMAX( N-I+1, WORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + WORK( PVT ) = WORK( I ) + WORK( N+PVT ) = WORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + IF( I.LT.M ) THEN + CALL SLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK( 2*N+1 ) ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( WORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / WORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( WORK( J ) / WORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + WORK( J ) = SNRM2( M-I, A( I+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + ELSE + WORK( J ) = ZERO + WORK( N+J ) = ZERO + END IF + ELSE + WORK( J ) = WORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of SGEQPF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sggsvd.f b/dspl/liblapack/SRC/DEPRECATED/sggsvd.f new file mode 100644 index 0000000..9ab0011 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sggsvd.f @@ -0,0 +1,464 @@ +*> \brief SGGSVD computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGSVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGGSVD3. +*> +*> SGGSVD computes the generalized singular value decomposition (GSVD) +*> of an M-by-N real matrix A and P-by-N real matrix B: +*> +*> U**T*A*Q = D1*( 0 R ), V**T*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are orthogonal matrices. +*> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, +*> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +*> D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +*> following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the orthogonal +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**T. +*> If ( A**T,B**T)**T has orthonormal columns, then the GSVD of A and B is +*> also equal to the CS decomposition of A and B. Furthermore, the GSVD +*> can be used to derive the solution of the eigenvalue problem: +*> A**T*A x = lambda* B**T*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**T*A*X = ( 0 D1 ), V**T*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix R if M-K-L < 0. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (max(3*N,M,P)+N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine STGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA REAL +*> TOLB REAL +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**T,B**T)**T. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL LSAME, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGGSVP, STGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = SLANGE( '1', M, N, A, LDA, WORK ) + BNORM = SLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL SCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of SGGSVD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/sggsvp.f b/dspl/liblapack/SRC/DEPRECATED/sggsvp.f new file mode 100644 index 0000000..b2a71a8 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/sggsvp.f @@ -0,0 +1,522 @@ +*> \brief \b SGGSVP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGSVP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SGGSVP3. +*> +*> SGGSVP computes orthogonal matrices U, V and Q such that +*> +*> N-K-L K L +*> U**T*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**T*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> SGGSVD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,M) +*> If JOBU = 'U', U contains the orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,P) +*> If JOBV = 'V', V contains the orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(3*N,M,P)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> The subroutine uses LAPACK subroutine SGEQPF for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +* ===================================================================== + SUBROUTINE SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEQPF, SGEQR2, SGERQ2, SLACPY, SLAPMT, SLASET, + $ SORG2R, SORM2R, SORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL SGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO ) +* +* Update A := A*P +* + CALL SLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL SLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL SLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL SORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL SLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL SLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL SGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**T +* + CALL SORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z**T +* + CALL SORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL SLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**T +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL SGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL SORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL SLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL SLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL SORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL SLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL SLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL SGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T +* + CALL SORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL SLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL SGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL SORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of SGGSVP +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/slahrd.f b/dspl/liblapack/SRC/DEPRECATED/slahrd.f new file mode 100644 index 0000000..54503de --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/slahrd.f @@ -0,0 +1,286 @@ +*> \brief \b SLAHRD reduces the first nb columns of a general rectangular matrix A so that elements below the k-th subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SLAHR2. +*> +*> SLAHRD reduces the first NB columns of a real general n-by-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an orthogonal similarity transformation +*> Q**T * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**T) * (A - Y*V**T). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a h a a a ) +*> ( a h a a a ) +*> ( a h a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLARFG, SSCAL, STRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V**T +* + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) +* +* Apply I - V * T**T * V**T to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**T * b1 +* + CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL STRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**T *b2 +* + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**T *w +* + CALL STRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL SGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL STRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL SGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL SSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of SLAHRD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/slatzm.f b/dspl/liblapack/SRC/DEPRECATED/slatzm.f new file mode 100644 index 0000000..d523f3a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/slatzm.f @@ -0,0 +1,221 @@ +*> \brief \b SLATZM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATZM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine SORMRZ. +*> +*> SLATZM applies a Householder matrix generated by STZRQF to a matrix. +*> +*> Let P = I - tau*u*u**T, u = ( 1 ), +*> ( v ) +*> where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +*> SIDE = 'R'. +*> +*> If SIDE equals 'L', let +*> C = [ C1 ] 1 +*> [ C2 ] m-1 +*> n +*> Then C is overwritten by P*C. +*> +*> If SIDE equals 'R', let +*> C = [ C1, C2 ] m +*> 1 n-1 +*> Then C is overwritten by C*P. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form P * C +*> = 'R': form C * P +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of P. V is not used +*> if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0 +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of P. +*> \endverbatim +*> +*> \param[in,out] C1 +*> \verbatim +*> C1 is REAL array, dimension +*> (LDC,N) if SIDE = 'L' +*> (M,1) if SIDE = 'R' +*> On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +*> if SIDE = 'R'. +*> +*> On exit, the first row of P*C if SIDE = 'L', or the first +*> column of C*P if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C2 +*> \verbatim +*> C2 is REAL array, dimension +*> (LDC, N) if SIDE = 'L' +*> (LDC, N-1) if SIDE = 'R' +*> On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +*> m x (n - 1) matrix C2 if SIDE = 'R'. +*> +*> On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +*> if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the arrays C1 and C2. LDC >= (1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := (C1 + v**T * C2)**T +* + CALL SCOPY( N, C1, LDC, WORK, 1 ) + CALL SGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w**T +* [ C2 ] [ C2 ] [ v ] +* + CALL SAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL SGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL SCOPY( M, C1, 1, WORK, 1 ) + CALL SGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v**T] +* + CALL SAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL SGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of SLATZM +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/stzrqf.f b/dspl/liblapack/SRC/DEPRECATED/stzrqf.f new file mode 100644 index 0000000..57c4419 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/stzrqf.f @@ -0,0 +1,232 @@ +*> \brief \b STZRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STZRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STZRQF( M, N, A, LDA, TAU, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine STZRZF. +*> +*> STZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +*> to upper triangular form by means of orthogonal transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**T, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an ( n - m ) element vector. +*> tau and z( k ) are chosen to annihilate the elements of the kth row +*> of X. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A, such that the elements of z( k ) are +*> in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER, SLARFG, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + CALL SLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) ) +* + IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN +* +* We now perform the operation A := A*P( k ). +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL SCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL SGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, ONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - tau*w +* and B := B - tau*w*z( k )**T. +* + CALL SAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 ) + CALL SGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA, + $ A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of STZRQF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zgegs.f b/dspl/liblapack/SRC/DEPRECATED/zgegs.f new file mode 100644 index 0000000..038e952 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zgegs.f @@ -0,0 +1,531 @@ +*> \brief ZGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, +* VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGGES. +*> +*> ZGEGS computes the eigenvalues, Schur form, and, optionally, the +*> left and or/right Schur vectors of a complex matrix pair (A,B). +*> Given two square matrices A and B, the generalized Schur +*> factorization has the form +*> +*> A = Q*S*Z**H, B = Q*T*Z**H +*> +*> where Q and Z are unitary matrices and S and T are upper triangular. +*> The columns of Q are the left Schur vectors +*> and the columns of Z are the right Schur vectors. +*> +*> If only the eigenvalues of (A,B) are needed, the driver routine +*> ZGEGV should be used instead. See ZGEGV for a description of the +*> eigenvalues of the generalized nonsymmetric eigenvalue problem +*> (GNEP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors (returned in VSL). +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors (returned in VSR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the matrix A. +*> On exit, the upper triangular matrix S from the generalized +*> Schur factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the matrix B. +*> On exit, the upper triangular matrix T from the generalized +*> Schur factorization. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. ALPHA(j) = S(j,j), the diagonal element of the Schur +*> form of A. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> The non-negative real scalars beta that define the +*> eigenvalues of GNEP. BETA(j) = T(j,j), the diagonal element +*> of the triangular factor T. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX*16 array, dimension (LDVSL,N) +*> If JOBVSL = 'V', the matrix of left Schur vectors Q. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX*16 array, dimension (LDVSR,N) +*> If JOBVSR = 'V', the matrix of right Schur vectors Z. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.) Then compute: +*> NB -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR; +*> the optimal LWORK is N*(NB+1). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from ZGGBAL +*> =N+2: error return from ZGEQRF +*> =N+3: error return from ZUNMQR +*> =N+4: error return from ZUNGQR +*> =N+5: error return from ZGGHRD +*> =N+6: error return from ZHGEQZ (other than failed +*> iteration) +*> =N+7: error return from ZGGBAK (computing VSL) +*> =N+8: error return from ZGGBAK (computing VSR) +*> =N+9: error return from ZLASCL (various places) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IRIGHT, IROWS, IRWORK, ITAU, IWORK, LOPT, + $ LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SAFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = N*( NB+1 ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEGS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SMLNUM = N*SAFMIN / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) THEN + CALL ZLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + IWORK = 1 + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 10 + END IF +* +* Reduce B to triangular form, and initialize VSL and/or VSR +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWORK + IWORK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 10 + END IF +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 10 + END IF +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 10 + END IF + END IF +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 10 + END IF +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWORK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 10 + END IF +* +* Apply permutation to VSL and VSR +* + IF( ILVSL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 10 + END IF + END IF + IF( ILVSR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 10 + END IF + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL ZLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + CALL ZLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + RETURN + END IF + END IF +* + 10 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGEGS +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zgegv.f b/dspl/liblapack/SRC/DEPRECATED/zgegv.f new file mode 100644 index 0000000..0f66295 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zgegv.f @@ -0,0 +1,706 @@ +*> \brief ZGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGGEV. +*> +*> ZGEGV computes the eigenvalues and, optionally, the left and/or right +*> eigenvectors of a complex matrix pair (A,B). +*> Given two square matrices A and B, +*> the generalized nonsymmetric eigenvalue problem (GNEP) is to find the +*> eigenvalues lambda and corresponding (non-zero) eigenvectors x such +*> that +*> A*x = lambda*B*x. +*> +*> An alternate form is to find the eigenvalues mu and corresponding +*> eigenvectors y such that +*> mu*A*y = B*y. +*> +*> These two forms are equivalent with mu = 1/lambda and x = y if +*> neither lambda nor mu is zero. In order to deal with the case that +*> lambda or mu is zero or small, two values alpha and beta are returned +*> for each eigenvalue, such that lambda = alpha/beta and +*> mu = beta/alpha. +*> +*> The vectors x and y in the above equations are right eigenvectors of +*> the matrix pair (A,B). Vectors u and v satisfying +*> u**H*A = lambda*u**H*B or mu*v**H*A = v**H*B +*> are left eigenvectors of (A,B). +*> +*> Note: this routine performs "full balancing" on A and B +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors (returned +*> in VL). +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors (returned +*> in VR). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the matrix A. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit A +*> contains the Schur form of A from the generalized Schur +*> factorization of the pair (A,B) after balancing. If no +*> eigenvectors were computed, then only the diagonal elements +*> of the Schur form will be correct. See ZGGHRD and ZHGEQZ +*> for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the matrix B. +*> If JOBVL = 'V' or JOBVR = 'V', then on exit B contains the +*> upper triangular matrix obtained from B in the generalized +*> Schur factorization of the pair (A,B) after balancing. +*> If no eigenvectors were computed, then only the diagonal +*> elements of B will be correct. See ZGGHRD and ZHGEQZ for +*> details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> The complex scalars beta that define the eigenvalues of GNEP. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored +*> in the columns of VL, in the same order as their eigenvalues. +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors x(j) are stored +*> in the columns of VR, in the same order as their eigenvalues. +*> Each eigenvector is scaled so that its largest component has +*> abs(real part) + abs(imag. part) = 1, except for eigenvectors +*> corresponding to an eigenvalue with alpha = beta = 0, which +*> are set to zero. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> To compute the optimal value of LWORK, call ILAENV to get +*> blocksizes (for ZGEQRF, ZUNMQR, and ZUNGQR.) Then compute: +*> NB -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and ZUNGQR; +*> The optimal LWORK is MAX( 2*N, N*(NB+1) ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: errors that usually indicate LAPACK problems: +*> =N+1: error return from ZGGBAL +*> =N+2: error return from ZGEQRF +*> =N+3: error return from ZUNMQR +*> =N+4: error return from ZUNGQR +*> =N+5: error return from ZGGHRD +*> =N+6: error return from ZHGEQZ (other than failed +*> iteration) +*> =N+7: error return from ZTGEVC +*> =N+8: error return from ZGGBAK (computing VL) +*> =N+9: error return from ZGGBAK (computing VR) +*> =N+10: error return from ZLASCL (various calls) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing +*> --------- +*> +*> This driver calls ZGGBAL to both permute and scale rows and columns +*> of A and B. The permutations PL and PR are chosen so that PL*A*PR +*> and PL*B*R will be upper triangular except for the diagonal blocks +*> A(i:j,i:j) and B(i:j,i:j), with i and j as close together as +*> possible. The diagonal scaling matrices DL and DR are chosen so +*> that the pair DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to +*> one (except for the elements that start out zero.) +*> +*> After the eigenvalues and eigenvectors of the balanced matrices +*> have been computed, ZGGBAK transforms the eigenvectors back to what +*> they would have been (in perfect arithmetic) if they had not been +*> balanced. +*> +*> Contents of A and B on Exit +*> -------- -- - --- - -- ---- +*> +*> If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or +*> both), then on exit the arrays A and B will contain the complex Schur +*> form[*] of the "balanced" versions of A and B. If no eigenvectors +*> are computed, then only the diagonal blocks will be correct. +*> +*> [*] In other words, upper triangular form. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILIMIT, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR, + $ LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3 + DOUBLE PRECISION ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM, + $ BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI, + $ SALFAR, SBETA, SCALE, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ, + $ ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + LWKMIN = MAX( 2*N, 1 ) + LWKOPT = LWKMIN + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* + IF( INFO.EQ.0 ) THEN + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 ) + NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LOPT = MAX( 2*N, N*( NB+1 ) ) + WORK( 1 ) = LOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SAFMIN = DLAMCH( 'S' ) + SAFMIN = SAFMIN + SAFMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ANRM1 = ANRM + ANRM2 = ONE + IF( ANRM.LT.ONE ) THEN + IF( SAFMAX*ANRM.LT.ONE ) THEN + ANRM1 = SAFMIN + ANRM2 = SAFMAX*ANRM + END IF + END IF +* + IF( ANRM.GT.ZERO ) THEN + CALL ZLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Scale B +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + BNRM1 = BNRM + BNRM2 = ONE + IF( BNRM.LT.ONE ) THEN + IF( SAFMAX*BNRM.LT.ONE ) THEN + BNRM1 = SAFMIN + BNRM2 = SAFMAX*BNRM + END IF + END IF +* + IF( BNRM.GT.ZERO ) THEN + CALL ZLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 10 + RETURN + END IF + END IF +* +* Permute the matrix to make it more nearly triangular +* Also "balance" the matrix. +* + ILEFT = 1 + IRIGHT = N + 1 + IRWORK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWORK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 1 + GO TO 80 + END IF +* +* Reduce B to triangular form, and initialize VL and/or VR +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWORK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWORK ), LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 2 + GO TO 80 + END IF +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ), + $ LWORK+1-IWORK, IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 3 + GO TO 80 + END IF +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK, + $ IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + INFO = N + 4 + GO TO 80 + END IF + END IF +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IINFO ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO ) + END IF + IF( IINFO.NE.0 ) THEN + INFO = N + 5 + GO TO 80 + END IF +* +* Perform QZ algorithm +* + IWORK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ), + $ LWORK+1-IWORK, RWORK( IRWORK ), IINFO ) + IF( IINFO.GE.0 ) + $ LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 ) + IF( IINFO.NE.0 ) THEN + IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN + INFO = IINFO + ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN + INFO = IINFO - N + ELSE + INFO = N + 6 + END IF + GO TO 80 + END IF +* + IF( ILV ) THEN +* +* Compute Eigenvectors +* + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ), + $ IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 7 + GO TO 80 + END IF +* +* Undo balancing on VL and VR, rescale +* + IF( ILVL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 8 + GO TO 80 + END IF + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = N + 9 + GO TO 80 + END IF + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SAFMIN ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling in alpha, beta +* +* Note: this does not give the alpha and beta for the unscaled +* problem. +* +* Un-scaling is limited to avoid underflow in alpha and beta +* if they are significant. +* + DO 70 JC = 1, N + ABSAR = ABS( DBLE( ALPHA( JC ) ) ) + ABSAI = ABS( DIMAG( ALPHA( JC ) ) ) + ABSB = ABS( DBLE( BETA( JC ) ) ) + SALFAR = ANRM*DBLE( ALPHA( JC ) ) + SALFAI = ANRM*DIMAG( ALPHA( JC ) ) + SBETA = BNRM*DBLE( BETA( JC ) ) + ILIMIT = .FALSE. + SCALE = ONE +* +* Check for significant underflow in imaginary part of ALPHA +* + IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI ) + END IF +* +* Check for significant underflow in real part of ALPHA +* + IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE. + $ MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) / + $ MAX( SAFMIN, ANRM2*ABSAR ) ) + END IF +* +* Check for significant underflow in BETA +* + IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE. + $ MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN + ILIMIT = .TRUE. + SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) / + $ MAX( SAFMIN, BNRM2*ABSB ) ) + END IF +* +* Check for possible overflow when limiting scaling +* + IF( ILIMIT ) THEN + TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ), + $ ABS( SBETA ) ) + IF( TEMP.GT.ONE ) + $ SCALE = SCALE / TEMP + IF( SCALE.LT.ONE ) + $ ILIMIT = .FALSE. + END IF +* +* Recompute un-scaled ALPHA, BETA if necessary. +* + IF( ILIMIT ) THEN + SALFAR = ( SCALE*DBLE( ALPHA( JC ) ) )*ANRM + SALFAI = ( SCALE*DIMAG( ALPHA( JC ) ) )*ANRM + SBETA = ( SCALE*BETA( JC ) )*BNRM + END IF + ALPHA( JC ) = DCMPLX( SALFAR, SALFAI ) + BETA( JC ) = SBETA + 70 CONTINUE +* + 80 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGEGV +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zgelsx.f b/dspl/liblapack/SRC/DEPRECATED/zgelsx.f new file mode 100644 index 0000000..fc8d288 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zgelsx.f @@ -0,0 +1,447 @@ +*> \brief ZGELSX solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGELSY. +*> +*> ZGELSX computes the minimum-norm solution to a complex linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by unitary transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**H [ inv(T11)*Q1**H*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of elements N+1:M in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is an +*> initial column, otherwise it is a free column. Before +*> the QR factorization of A, all initial columns are +*> permuted to the leading positions; only the remaining +*> free columns are moved as a result of column pivoting +*> during the factorization. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (min(M,N) + max( N, 2*min(M,N)+NRHS )), +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE, DONE, NTDONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, DONE = ZERO, + $ NTDONE = ONE ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM + COMPLEX*16 C1, C2, S1, S2, T1, T2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQPF, ZLAIC1, ZLASCL, ZLASET, ZLATZM, + $ ZTRSM, ZTZRQF, ZUNM2R +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max elements outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 100 + END IF +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL ZGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK, + $ INFO ) +* +* complex workspace MN+N. Real workspace 2*N. Details of Householder +* rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 100 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL ZTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO ) +* +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO ) +* +* workspace NRHS +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 I = RANK + 1, N + DO 30 J = 1, NRHS + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**H * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + DO 50 I = 1, RANK + CALL ZLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA, + $ DCONJG( WORK( MN+I ) ), B( I, 1 ), + $ B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) ) + 50 CONTINUE + END IF +* +* workspace NRHS +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 90 J = 1, NRHS + DO 60 I = 1, N + WORK( 2*MN+I ) = NTDONE + 60 CONTINUE + DO 80 I = 1, N + IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN + IF( JPVT( I ).NE.I ) THEN + K = I + T1 = B( K, J ) + T2 = B( JPVT( K ), J ) + 70 CONTINUE + B( JPVT( K ), J ) = T1 + WORK( 2*MN+K ) = DONE + T1 = T2 + K = JPVT( K ) + T2 = B( JPVT( K ), J ) + IF( JPVT( K ).NE.I ) + $ GO TO 70 + B( I, J ) = T1 + WORK( 2*MN+K ) = DONE + END IF + END IF + 80 CONTINUE + 90 CONTINUE +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 100 CONTINUE +* + RETURN +* +* End of ZGELSX +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zgeqpf.f b/dspl/liblapack/SRC/DEPRECATED/zgeqpf.f new file mode 100644 index 0000000..7dffc7a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zgeqpf.f @@ -0,0 +1,313 @@ +*> \brief \b ZGEQPF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQPF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGEQP3. +*> +*> ZGEQPF computes a QR factorization with column pivoting of a +*> complex M-by-N matrix A: A*P = Q*R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper triangular matrix R; the elements +*> below the diagonal, together with the array TAU, +*> represent the unitary matrix Q as a product of +*> min(m,n) elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(n) +*> +*> Each H(i) has the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i). +*> +*> The matrix P is represented in jpvt as follows: If +*> jpvt(j) = i +*> then the jth column of P is the ith canonical unit vector. +*> +*> Partial column norm updating strategy modified by +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +*> -- April 2011 -- +*> For more details see LAPACK Working Note 176. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MA, MN, PVT + DOUBLE PRECISION TEMP, TEMP2, TOL3Z + COMPLEX*16 AII +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQR2, ZLARF, ZLARFG, ZSWAP, ZUNM2R +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, DCONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL IDAMAX, DLAMCH, DZNRM2 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQPF', -INFO ) + RETURN + END IF +* + MN = MIN( M, N ) + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Move initial columns up front +* + ITEMP = 1 + DO 10 I = 1, N + IF( JPVT( I ).NE.0 ) THEN + IF( I.NE.ITEMP ) THEN + CALL ZSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 ) + JPVT( I ) = JPVT( ITEMP ) + JPVT( ITEMP ) = I + ELSE + JPVT( I ) = I + END IF + ITEMP = ITEMP + 1 + ELSE + JPVT( I ) = I + END IF + 10 CONTINUE + ITEMP = ITEMP - 1 +* +* Compute the QR factorization and update remaining columns +* + IF( ITEMP.GT.0 ) THEN + MA = MIN( ITEMP, M ) + CALL ZGEQR2( M, MA, A, LDA, TAU, WORK, INFO ) + IF( MA.LT.N ) THEN + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A, + $ LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO ) + END IF + END IF +* + IF( ITEMP.LT.MN ) THEN +* +* Initialize partial column norms. The first n elements of +* work store the exact column norms. +* + DO 20 I = ITEMP + 1, N + RWORK( I ) = DZNRM2( M-ITEMP, A( ITEMP+1, I ), 1 ) + RWORK( N+I ) = RWORK( I ) + 20 CONTINUE +* +* Compute factorization +* + DO 40 I = ITEMP + 1, MN +* +* Determine ith pivot column and swap if necessary +* + PVT = ( I-1 ) + IDAMAX( N-I+1, RWORK( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + RWORK( PVT ) = RWORK( I ) + RWORK( N+PVT ) = RWORK( N+I ) + END IF +* +* Generate elementary reflector H(i) +* + AII = A( I, I ) + CALL ZLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + A( I, I ) = AII +* + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = DCMPLX( ONE ) + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF +* +* Update partial column norms +* + DO 30 J = I + 1, N + IF( RWORK( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( I, J ) ) / RWORK( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( RWORK( J ) / RWORK( N+J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( M-I.GT.0 ) THEN + RWORK( J ) = DZNRM2( M-I, A( I+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + ELSE + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + END IF + ELSE + RWORK( J ) = RWORK( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE +* + 40 CONTINUE + END IF + RETURN +* +* End of ZGEQPF +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zggsvd.f b/dspl/liblapack/SRC/DEPRECATED/zggsvd.f new file mode 100644 index 0000000..dcbb892 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zggsvd.f @@ -0,0 +1,465 @@ +*> \brief ZGGSVD computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGSVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGGSVD3. +*> +*> ZGGSVD computes the generalized singular value decomposition (GSVD) +*> of an M-by-N complex matrix A and P-by-N complex matrix B: +*> +*> U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are unitary matrices. +*> Let K+L = the effective numerical rank of the +*> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper +*> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +*> matrices and of the following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the unitary +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**H. +*> If ( A**H,B**H)**H has orthnormal columns, then the GSVD of A and B is also +*> equal to the CS decomposition of A and B. Furthermore, the GSVD can +*> be used to derive the solution of the eigenvalue problem: +*> A**H*A x = lambda* B**H*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, and D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains part of the triangular matrix R if +*> M-K-L < 0. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(3*N,M,P)+N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine ZTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA DOUBLE PRECISION +*> TOLB DOUBLE PRECISION +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**H,B**H)**H. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MAZHEPS, +*> TOLB = MAX(P,N)*norm(B)*MAZHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, + $ RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV + INTEGER I, IBND, ISUB, J, NCYCLE + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, XERBLA, ZGGSVP, ZTGSJA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVD', -INFO ) + RETURN + END IF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = ZLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = ZLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL DCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + RETURN +* +* End of ZGGSVD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zggsvp.f b/dspl/liblapack/SRC/DEPRECATED/zggsvp.f new file mode 100644 index 0000000..f65de92 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zggsvp.f @@ -0,0 +1,539 @@ +*> \brief \b ZGGSVP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGSVP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, RWORK, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZGGSVP3. +*> +*> ZGGSVP computes unitary matrices U, V and Q such that +*> +*> N-K-L K L +*> U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**H*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> ZGGSVD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MAZHEPS, +*> TOLB = MAX(P,N)*norm(B)*MAZHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,M) +*> If JOBU = 'U', U contains the unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,P) +*> If JOBV = 'V', V contains the unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(3*N,M,P)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The subroutine uses LAPACK subroutine ZGEQPF for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV + INTEGER I, J + COMPLEX*16 T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQPF, ZGEQR2, ZGERQ2, ZLACPY, ZLAPMT, + $ ZLASET, ZUNG2R, ZUNM2R, ZUNMR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVP', -INFO ) + RETURN + END IF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL ZGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL ZLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( CABS1( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL ZLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL ZLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL ZUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL ZLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL ZLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL ZGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL ZLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**H +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL ZGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( CABS1( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL ZLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL ZLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL ZUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL ZLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL ZLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL ZGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL ZLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL ZGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL ZUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + RETURN +* +* End of ZGGSVP +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zlahrd.f b/dspl/liblapack/SRC/DEPRECATED/zlahrd.f new file mode 100644 index 0000000..fb63e96 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zlahrd.f @@ -0,0 +1,292 @@ +*> \brief \b ZLAHRD reduces the first nb columns of a general rectangular matrix A so that elements below the k-th subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZLAHR2. +*> +*> ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by a unitary similarity transformation +*> Q**H * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**H, and also the matrix Y = A * V * T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**H) * (A - Y*V**H). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a h a a a ) +*> ( a h a a a ) +*> ( a h a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 EI +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZLACGV, ZLARFG, ZSCAL, + $ ZTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(1:n,i) +* +* Compute i-th column of A - Y * V**H +* + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 ) + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T**H * V**H to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**H * b1 +* + CALL ZCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL ZTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) +* +* w := w + V2**H *b2 +* + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE, + $ T( 1, NB ), 1 ) +* +* w := T**H *w +* + CALL ZTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1, + $ T, LDT, T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL ZGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL ZTRMV( 'Lower', 'No transpose', 'Unit', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL ZAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(i) to annihilate +* A(k+i+1:n,i) +* + EI = A( K+I, I ) + CALL ZLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + A( K+I, I ) = ONE +* +* Compute Y(1:n,i) +* + CALL ZGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA, + $ A( K+I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE, + $ A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ), + $ 1 ) + CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1, + $ ONE, Y( 1, I ), 1 ) + CALL ZSCAL( N, TAU( I ), Y( 1, I ), 1 ) +* +* Compute T(1:i,i) +* + CALL ZSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* + RETURN +* +* End of ZLAHRD +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/zlatzm.f b/dspl/liblapack/SRC/DEPRECATED/zlatzm.f new file mode 100644 index 0000000..f53010a --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/zlatzm.f @@ -0,0 +1,225 @@ +*> \brief \b ZLATZM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATZM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZUNMRZ. +*> +*> ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. +*> +*> Let P = I - tau*u*u**H, u = ( 1 ), +*> ( v ) +*> where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if +*> SIDE = 'R'. +*> +*> If SIDE equals 'L', let +*> C = [ C1 ] 1 +*> [ C2 ] m-1 +*> n +*> Then C is overwritten by P*C. +*> +*> If SIDE equals 'R', let +*> C = [ C1, C2 ] m +*> 1 n-1 +*> Then C is overwritten by C*P. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form P * C +*> = 'R': form C * P +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of P. V is not used +*> if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0 +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of P. +*> \endverbatim +*> +*> \param[in,out] C1 +*> \verbatim +*> C1 is COMPLEX*16 array, dimension +*> (LDC,N) if SIDE = 'L' +*> (M,1) if SIDE = 'R' +*> On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 +*> if SIDE = 'R'. +*> +*> On exit, the first row of P*C if SIDE = 'L', or the first +*> column of C*P if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C2 +*> \verbatim +*> C2 is COMPLEX*16 array, dimension +*> (LDC, N) if SIDE = 'L' +*> (LDC, N-1) if SIDE = 'R' +*> On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the +*> m x (n - 1) matrix C2 if SIDE = 'R'. +*> +*> On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P +*> if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the arrays C1 and C2. +*> LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) ) + $ RETURN +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* w := ( C1 + v**H * C2 )**H +* + CALL ZCOPY( N, C1, LDC, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) + CALL ZGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* [ C1 ] := [ C1 ] - tau* [ 1 ] * w**H +* [ C2 ] [ C2 ] [ v ] +* + CALL ZLACGV( N, WORK, 1 ) + CALL ZAXPY( N, -TAU, WORK, 1, C1, LDC ) + CALL ZGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* w := C1 + C2 * v +* + CALL ZCOPY( M, C1, 1, WORK, 1 ) + CALL ZGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE, + $ WORK, 1 ) +* +* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v**H] +* + CALL ZAXPY( M, -TAU, WORK, 1, C1, 1 ) + CALL ZGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC ) + END IF +* + RETURN +* +* End of ZLATZM +* + END diff --git a/dspl/liblapack/SRC/DEPRECATED/ztzrqf.f b/dspl/liblapack/SRC/DEPRECATED/ztzrqf.f new file mode 100644 index 0000000..7bfb2e0 --- /dev/null +++ b/dspl/liblapack/SRC/DEPRECATED/ztzrqf.f @@ -0,0 +1,241 @@ +*> \brief \b ZTZRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTZRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTZRQF( M, N, A, LDA, TAU, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is deprecated and has been replaced by routine ZTZRZF. +*> +*> ZTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +*> to upper triangular form by means of unitary transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N unitary matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), whose conjugate transpose is used to +*> introduce zeros into the (m - k + 1)th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**H, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an ( n - m ) element vector. +*> tau and z( k ) are chosen to annihilate the elements of the kth row +*> of X. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A, such that the elements of z( k ) are +*> in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTZRQF( M, N, A, LDA, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K, M1 + COMPLEX*16 ALPHA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGERC, ZLACGV, + $ ZLARFG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTZRQF', -INFO ) + RETURN + END IF +* +* Perform the factorization. +* + IF( M.EQ.0 ) + $ RETURN + IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = CZERO + 10 CONTINUE + ELSE + M1 = MIN( M+1, N ) + DO 20 K = M, 1, -1 +* +* Use a Householder reflection to zero the kth row of A. +* First set up the reflection. +* + A( K, K ) = DCONJG( A( K, K ) ) + CALL ZLACGV( N-M, A( K, M1 ), LDA ) + ALPHA = A( K, K ) + CALL ZLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) ) + A( K, K ) = ALPHA + TAU( K ) = DCONJG( TAU( K ) ) +* + IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN +* +* We now perform the operation A := A*P( k )**H. +* +* Use the first ( k - 1 ) elements of TAU to store a( k ), +* where a( k ) consists of the first ( k - 1 ) elements of +* the kth column of A. Also let B denote the first +* ( k - 1 ) rows of the last ( n - m ) columns of A. +* + CALL ZCOPY( K-1, A( 1, K ), 1, TAU, 1 ) +* +* Form w = a( k ) + B*z( k ) in TAU. +* + CALL ZGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ), + $ LDA, A( K, M1 ), LDA, CONE, TAU, 1 ) +* +* Now form a( k ) := a( k ) - conjg(tau)*w +* and B := B - conjg(tau)*w*z( k )**H. +* + CALL ZAXPY( K-1, -DCONJG( TAU( K ) ), TAU, 1, A( 1, K ), + $ 1 ) + CALL ZGERC( K-1, N-M, -DCONJG( TAU( K ) ), TAU, 1, + $ A( K, M1 ), LDA, A( 1, M1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZTZRQF +* + END diff --git a/dspl/liblapack/SRC/Makefile b/dspl/liblapack/SRC/Makefile new file mode 100644 index 0000000..90fb8c6 --- /dev/null +++ b/dspl/liblapack/SRC/Makefile @@ -0,0 +1,572 @@ +include ../make.inc + +####################################################################### +# This is the makefile to create a library for LAPACK. +# The files are organized as follows: +# +# ALLAUX -- Auxiliary routines called from all precisions +# SCLAUX -- Auxiliary routines called from single precision +# DZLAUX -- Auxiliary routines called from double precision +# +# DSLASRC -- Double-single mixed precision real routines called from +# single, single-extra and double precision real LAPACK +# routines (i.e. from SLASRC, SXLASRC, DLASRC). +# ZCLASRC -- Double-single mixed precision complex routines called from +# single, single-extra and double precision complex LAPACK +# routines (i.e. from CLASRC, CXLASRC, ZLASRC). +# +# SLASRC -- Single precision real LAPACK routines +# SXLASRC -- Single precision real LAPACK routines using extra +# precision. +# CLASRC -- Single precision complex LAPACK routines +# CXLASRC -- Single precision complex LAPACK routines using extra +# precision. +# DLASRC -- Double precision real LAPACK routines +# DXLASRC -- Double precision real LAPACK routines using extra +# precision. +# ZLASRC -- Double precision complex LAPACK routines +# ZXLASRC -- Double precision complex LAPACK routines using extra +# precision. +# +# DEPRECATED -- Deprecated routines in all precisions +# +# The library can be set up to include routines for any combination +# of the four precisions. To create or add to the library, enter make +# followed by one or more of the precisions desired. Some examples: +# make single +# make single complex +# make single double complex complex16 +# Alternatively, the command +# make +# without any arguments creates a library of all four precisions. +# The library is called +# lapack.a +# and is created at the next higher directory level. +# +# To remove the object files after the library is created, enter +# make cleanobj +# On some systems, you can force the source files to be recompiled by +# entering (for example) +# make single FRC=FRC +# +# ***Note*** +# The functions lsame, second, dsecnd, slamch, and dlamch may have +# to be installed before compiling the library. Refer to the +# installation guide, LAPACK Working Note 41, for instructions. +# +####################################################################### + +ALLAUX = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ + iparmq.o iparam2stage.o \ + ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ + ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o + +SCLAUX = \ + sbdsdc.o \ + sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ + slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \ + slaed7.o slaed8.o slaed9.o slaeda.o slaev2.o slagtf.o \ + slagts.o slamrg.o slanst.o \ + slapy2.o slapy3.o slarnv.o \ + slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \ + slarrk.o slarrr.o slaneg.o \ + slartg.o slaruv.o slas2.o slascl.o \ + slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \ + slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \ + slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \ + slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \ + ssteqr.o ssterf.o slaisnan.o sisnan.o \ + slartgp.o slartgs.o \ + ../INSTALL/second_$(TIMER).o + +DZLAUX = \ + dbdsdc.o \ + dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ + dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ + dlaed7.o dlaed8.o dlaed9.o dlaeda.o dlaev2.o dlagtf.o \ + dlagts.o dlamrg.o dlanst.o \ + dlapy2.o dlapy3.o dlarnv.o \ + dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \ + dlarrk.o dlarrr.o dlaneg.o \ + dlartg.o dlaruv.o dlas2.o dlascl.o \ + dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \ + dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \ + dlaset.o dlasq1.o dlasq2.o dlasq3.o dlasq4.o dlasq5.o dlasq6.o \ + dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \ + dsteqr.o dsterf.o dlaisnan.o disnan.o \ + dlartgp.o dlartgs.o \ + ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o + +SLASRC = \ + sbdsvdx.o spotrf2.o sgetrf2.o \ + sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \ + sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ + sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ + sgehd2.o sgehrd.o sgelq2.o sgelqf.o \ + sgels.o sgelsd.o sgelss.o sgelsy.o sgeql2.o sgeqlf.o \ + sgeqp3.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \ + sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvdx.o sgesvx.o \ + sgetc2.o sgetf2.o sgetri.o \ + sggbak.o sggbal.o sgges.o sgges3.o sggesx.o \ + sggev.o sggev3.o sggevx.o \ + sggglm.o sgghrd.o sgghd3.o sgglse.o sggqrf.o \ + sggrqf.o sggsvd3.o sggsvp3.o sgtcon.o sgtrfs.o sgtsv.o \ + sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \ + shsein.o shseqr.o slabrd.o slacon.o slacn2.o \ + slaein.o slaexc.o slag2.o slags2.o slagtm.o slagv2.o slahqr.o \ + slahr2.o slaic1.o slaln2.o slals0.o slalsa.o slalsd.o \ + slangb.o slange.o slangt.o slanhs.o slansb.o slansp.o \ + slansy.o slantb.o slantp.o slantr.o slanv2.o \ + slapll.o slapmt.o \ + slaqgb.o slaqge.o slaqp2.o slaqps.o slaqsb.o slaqsp.o slaqsy.o \ + slaqr0.o slaqr1.o slaqr2.o slaqr3.o slaqr4.o slaqr5.o \ + slaqtr.o slar1v.o slar2v.o ilaslr.o ilaslc.o \ + slarf.o slarfb.o slarfg.o slarfgp.o slarft.o slarfx.o slarfy.o slargv.o \ + slarrv.o slartv.o \ + slarz.o slarzb.o slarzt.o slaswp.o slasy2.o slasyf.o slasyf_rook.o \ + slasyf_rk.o \ + slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o \ + slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \ + sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \ + sorgrq.o sorgtr.o sorm2l.o sorm2r.o sorm22.o \ + sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \ + sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \ + spbstf.o spbsv.o spbsvx.o \ + spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \ + sposvx.o spotf2.o spotri.o spstrf.o spstf2.o \ + sppcon.o sppequ.o \ + spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \ + spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \ + ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \ + ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \ + sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \ + ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \ + sstevx.o \ + ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ + ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ + ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ + ssyswapr.o ssytrs.o ssytrs2.o \ + ssyconv.o ssyconvf.o ssyconvf_rook.o \ + ssytf2_rook.o ssytrf_rook.o ssytrs_rook.o \ + ssytri_rook.o ssycon_rook.o ssysv_rook.o \ + ssytf2_rk.o ssytrf_rk.o ssytrs_3.o \ + ssytri_3.o ssytri_3x.o ssycon_3.o ssysv_rk.o \ + slasyf_aa.o ssysv_aa.o ssytrf_aa.o ssytrs_aa.o \ + ssysv_aa_2stage.o ssytrf_aa_2stage.o ssytrs_aa_2stage.o \ + stbcon.o \ + stbrfs.o stbtrs.o stgevc.o stgex2.o stgexc.o stgsen.o \ + stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \ + stptrs.o \ + strcon.o strevc.o strevc3.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \ + strti2.o strtri.o strtrs.o stzrzf.o sstemr.o \ + slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \ + stfttr.o stpttf.o stpttr.o strttf.o strttp.o \ + sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \ + sgeequb.o ssyequb.o spoequb.o sgbequb.o \ + sbbcsd.o slapmr.o sorbdb.o sorbdb1.o sorbdb2.o sorbdb3.o sorbdb4.o \ + sorbdb5.o sorbdb6.o sorcsd.o sorcsd2by1.o \ + sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \ + stpqrt.o stpqrt2.o stpmqrt.o stprfb.o \ + sgelqt.o sgelqt3.o sgemlqt.o \ + sgetsls.o sgeqr.o slatsqr.o slamtsqr.o sgemqr.o \ + sgelq.o slaswlq.o slamswlq.o sgemlq.o \ + stplqt.o stplqt2.o stpmlqt.o \ + ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ + ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ + ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o + +DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o + +ifdef USEXBLAS +SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ + sla_gercond.o sla_gerpvgrw.o ssysvxx.o ssyrfsx.o \ + sla_syrfsx_extended.o sla_syamv.o sla_syrcond.o sla_syrpvgrw.o \ + sposvxx.o sporfsx.o sla_porfsx_extended.o sla_porcond.o \ + sla_porpvgrw.o sgbsvxx.o sgbrfsx.o sla_gbrfsx_extended.o \ + sla_gbamv.o sla_gbrcond.o sla_gbrpvgrw.o sla_lin_berr.o slarscl2.o \ + slascl2.o sla_wwaddw.o +endif + +CLASRC = \ + cpotrf2.o cgetrf2.o \ + cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \ + cgbtf2.o cgbtrf.o cgbtrs.o cgebak.o cgebal.o cgebd2.o cgebrd.o \ + cgecon.o cgeequ.o cgees.o cgeesx.o cgeev.o cgeevx.o \ + cgehd2.o cgehrd.o cgelq2.o cgelqf.o \ + cgels.o cgelsd.o cgelss.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \ + cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \ + cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o cgesvdx.o \ + cgesvj.o cgejsv.o cgsvj0.o cgsvj1.o \ + cgesvx.o cgetc2.o cgetf2.o cgetri.o \ + cggbak.o cggbal.o cgges.o cgges3.o cggesx.o \ + cggev.o cggev3.o cggevx.o cggglm.o \ + cgghrd.o cgghd3.o cgglse.o cggqrf.o cggrqf.o \ + cggsvd3.o cggsvp3.o \ + cgtcon.o cgtrfs.o cgtsv.o cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \ + chbevd.o chbevx.o chbgst.o chbgv.o chbgvd.o chbgvx.o chbtrd.o \ + checon.o cheev.o cheevd.o cheevr.o cheevx.o chegs2.o chegst.o \ + chegv.o chegvd.o chegvx.o cherfs.o chesv.o chesvx.o chetd2.o \ + chetf2.o chetrd.o \ + chetrf.o chetri.o chetri2.o chetri2x.o cheswapr.o \ + chetrs.o chetrs2.o \ + chetf2_rook.o chetrf_rook.o chetri_rook.o \ + chetrs_rook.o checon_rook.o chesv_rook.o \ + chetf2_rk.o chetrf_rk.o chetri_3.o chetri_3x.o \ + chetrs_3.o checon_3.o chesv_rk.o \ + chesv_aa.o chetrf_aa.o chetrs_aa.o clahef_aa.o \ + chesv_aa_2stage.o chetrf_aa_2stage.o chetrs_aa_2stage.o \ + chgeqz.o chpcon.o chpev.o chpevd.o \ + chpevx.o chpgst.o chpgv.o chpgvd.o chpgvx.o chprfs.o chpsv.o \ + chpsvx.o \ + chptrd.o chptrf.o chptri.o chptrs.o chsein.o chseqr.o clabrd.o \ + clacgv.o clacon.o clacn2.o clacp2.o clacpy.o clacrm.o clacrt.o cladiv.o \ + claed0.o claed7.o claed8.o \ + claein.o claesy.o claev2.o clags2.o clagtm.o \ + clahef.o clahef_rook.o clahef_rk.o clahqr.o \ + clahr2.o claic1.o clals0.o clalsa.o clalsd.o clangb.o clange.o clangt.o \ + clanhb.o clanhe.o \ + clanhp.o clanhs.o clanht.o clansb.o clansp.o clansy.o clantb.o \ + clantp.o clantr.o clapll.o clapmt.o clarcm.o claqgb.o claqge.o \ + claqhb.o claqhe.o claqhp.o claqp2.o claqps.o claqsb.o \ + claqr0.o claqr1.o claqr2.o claqr3.o claqr4.o claqr5.o \ + claqsp.o claqsy.o clar1v.o clar2v.o ilaclr.o ilaclc.o \ + clarf.o clarfb.o clarfg.o clarft.o clarfgp.o \ + clarfx.o clarfy.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ + clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ + claswp.o clasyf.o clasyf_rook.o clasyf_rk.o clasyf_aa.o \ + clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \ + clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \ + cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \ + cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \ + cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \ + cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \ + crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \ + cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \ + cstegr.o cstein.o csteqr.o \ + csycon.o csymv.o \ + csyr.o csyrfs.o csysv.o csysvx.o csytf2.o csytrf.o csytri.o csytri2.o csytri2x.o \ + csyswapr.o csytrs.o csytrs2.o \ + csyconv.o csyconvf.o csyconvf_rook.o \ + csytf2_rook.o csytrf_rook.o csytrs_rook.o \ + csytri_rook.o csycon_rook.o csysv_rook.o \ + csytf2_rk.o csytrf_rk.o csytrf_aa.o csytrs_3.o csytrs_aa.o \ + csytri_3.o csytri_3x.o csycon_3.o csysv_rk.o csysv_aa.o \ + csysv_aa_2stage.o csytrf_aa_2stage.o csytrs_aa_2stage.o \ + ctbcon.o ctbrfs.o ctbtrs.o ctgevc.o ctgex2.o \ + ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \ + ctprfs.o ctptri.o \ + ctptrs.o ctrcon.o ctrevc.o ctrevc3.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \ + ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrzf.o cung2l.o cung2r.o \ + cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \ + cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o cunm22.o \ + cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \ + cunmtr.o cupgtr.o cupmtr.o icmax1.o scsum1.o cstemr.o \ + chfrk.o ctfttp.o clanhf.o cpftrf.o cpftri.o cpftrs.o ctfsm.o ctftri.o \ + ctfttr.o ctpttf.o ctpttr.o ctrttf.o ctrttp.o \ + cgeequb.o cgbequb.o csyequb.o cpoequb.o cheequb.o \ + cbbcsd.o clapmr.o cunbdb.o cunbdb1.o cunbdb2.o cunbdb3.o cunbdb4.o \ + cunbdb5.o cunbdb6.o cuncsd.o cuncsd2by1.o \ + cgeqrt.o cgeqrt2.o cgeqrt3.o cgemqrt.o \ + ctpqrt.o ctpqrt2.o ctpmqrt.o ctprfb.o \ + cgelqt.o cgelqt3.o cgemlqt.o \ + cgetsls.o cgeqr.o clatsqr.o clamtsqr.o cgemqr.o \ + cgelq.o claswlq.o clamswlq.o cgemlq.o \ + ctplqt.o ctplqt2.o ctpmlqt.o \ + chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \ + cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ + chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o + +ifdef USEXBLAS +CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ + cla_gercond_c.o cla_gercond_x.o cla_gerpvgrw.o \ + csysvxx.o csyrfsx.o cla_syrfsx_extended.o cla_syamv.o \ + cla_syrcond_c.o cla_syrcond_x.o cla_syrpvgrw.o \ + cposvxx.o cporfsx.o cla_porfsx_extended.o \ + cla_porcond_c.o cla_porcond_x.o cla_porpvgrw.o \ + cgbsvxx.o cgbrfsx.o cla_gbrfsx_extended.o cla_gbamv.o \ + cla_gbrcond_c.o cla_gbrcond_x.o cla_gbrpvgrw.o \ + chesvxx.o cherfsx.o cla_herfsx_extended.o cla_heamv.o \ + cla_hercond_c.o cla_hercond_x.o cla_herpvgrw.o \ + cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o +endif + +ZCLASRC = cpotrs.o cgetrs.o cpotrf.o cgetrf.o + +DLASRC = \ + dpotrf2.o dgetrf2.o \ + dbdsvdx.o \ + dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ + dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ + dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ + dgehd2.o dgehrd.o dgelq2.o dgelqf.o \ + dgels.o dgelsd.o dgelss.o dgelsy.o dgeql2.o dgeqlf.o \ + dgeqp3.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \ + dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvdx.o dgesvx.o \ + dgetc2.o dgetf2.o dgetrf.o dgetri.o \ + dgetrs.o dggbak.o dggbal.o dgges.o dgges3.o dggesx.o \ + dggev.o dggev3.o dggevx.o \ + dggglm.o dgghrd.o dgghd3.o dgglse.o dggqrf.o \ + dggrqf.o dggsvd3.o dggsvp3.o dgtcon.o dgtrfs.o dgtsv.o \ + dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \ + dhsein.o dhseqr.o dlabrd.o dlacon.o dlacn2.o \ + dlaein.o dlaexc.o dlag2.o dlags2.o dlagtm.o dlagv2.o dlahqr.o \ + dlahr2.o dlaic1.o dlaln2.o dlals0.o dlalsa.o dlalsd.o \ + dlangb.o dlange.o dlangt.o dlanhs.o dlansb.o dlansp.o \ + dlansy.o dlantb.o dlantp.o dlantr.o dlanv2.o \ + dlapll.o dlapmt.o \ + dlaqgb.o dlaqge.o dlaqp2.o dlaqps.o dlaqsb.o dlaqsp.o dlaqsy.o \ + dlaqr0.o dlaqr1.o dlaqr2.o dlaqr3.o dlaqr4.o dlaqr5.o \ + dlaqtr.o dlar1v.o dlar2v.o iladlr.o iladlc.o \ + dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o dlarfy.o \ + dlargv.o dlarrv.o dlartv.o \ + dlarz.o dlarzb.o dlarzt.o dlaswp.o dlasy2.o \ + dlasyf.o dlasyf_rook.o dlasyf_rk.o \ + dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlauu2.o \ + dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ + dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ + dorgrq.o dorgtr.o dorm2l.o dorm2r.o dorm22.o \ + dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ + dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ + dpbstf.o dpbsv.o dpbsvx.o \ + dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \ + dposvx.o dpotf2.o dpotrf.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \ + dppcon.o dppequ.o \ + dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \ + dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \ + dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ + dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ + dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ + dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ + dstevx.o \ + dsycon.o dsyev.o dsyevd.o dsyevr.o \ + dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ + dsysv.o dsysvx.o \ + dsytd2.o dsytf2.o dsytrd.o dsytrf.o dsytri.o dsytri2.o dsytri2x.o \ + dsyswapr.o dsytrs.o dsytrs2.o \ + dsyconv.o dsyconvf.o dsyconvf_rook.o \ + dsytf2_rook.o dsytrf_rook.o dsytrs_rook.o \ + dsytri_rook.o dsycon_rook.o dsysv_rook.o \ + dsytf2_rk.o dsytrf_rk.o dsytrs_3.o \ + dsytri_3.o dsytri_3x.o dsycon_3.o dsysv_rk.o \ + dlasyf_aa.o dsysv_aa.o dsytrf_aa.o dsytrs_aa.o \ + dsysv_aa_2stage.o dsytrf_aa_2stage.o dsytrs_aa_2stage.o \ + dtbcon.o dtbrfs.o dtbtrs.o dtgevc.o dtgex2.o dtgexc.o dtgsen.o \ + dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \ + dtptrs.o \ + dtrcon.o dtrevc.o dtrevc3.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \ + dtrti2.o dtrtri.o dtrtrs.o dtzrzf.o dstemr.o \ + dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \ + dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \ + dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \ + dgejsv.o dgesvj.o dgsvj0.o dgsvj1.o \ + dgeequb.o dsyequb.o dpoequb.o dgbequb.o \ + dbbcsd.o dlapmr.o dorbdb.o dorbdb1.o dorbdb2.o dorbdb3.o dorbdb4.o \ + dorbdb5.o dorbdb6.o dorcsd.o dorcsd2by1.o \ + dgeqrt.o dgeqrt2.o dgeqrt3.o dgemqrt.o \ + dtpqrt.o dtpqrt2.o dtpmqrt.o dtprfb.o \ + dgelqt.o dgelqt3.o dgemlqt.o \ + dgetsls.o dgeqr.o dlatsqr.o dlamtsqr.o dgemqr.o \ + dgelq.o dlaswlq.o dlamswlq.o dgemlq.o \ + dtplqt.o dtplqt2.o dtpmlqt.o \ + dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ + dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ + dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o + +ifdef USEXBLAS +DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ + dla_gercond.o dla_gerpvgrw.o dsysvxx.o dsyrfsx.o \ + dla_syrfsx_extended.o dla_syamv.o dla_syrcond.o dla_syrpvgrw.o \ + dposvxx.o dporfsx.o dla_porfsx_extended.o dla_porcond.o \ + dla_porpvgrw.o dgbsvxx.o dgbrfsx.o dla_gbrfsx_extended.o \ + dla_gbamv.o dla_gbrcond.o dla_gbrpvgrw.o dla_lin_berr.o dlarscl2.o \ + dlascl2.o dla_wwaddw.o +endif + +ZLASRC = \ + zpotrf2.o zgetrf2.o \ + zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \ + zgbtf2.o zgbtrf.o zgbtrs.o zgebak.o zgebal.o zgebd2.o zgebrd.o \ + zgecon.o zgeequ.o zgees.o zgeesx.o zgeev.o zgeevx.o \ + zgehd2.o zgehrd.o zgelq2.o zgelqf.o \ + zgels.o zgelsd.o zgelss.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \ + zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \ + zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvdx.o \ + zgesvj.o zgejsv.o zgsvj0.o zgsvj1.o \ + zgesvx.o zgetc2.o zgetf2.o zgetrf.o \ + zgetri.o zgetrs.o \ + zggbak.o zggbal.o zgges.o zgges3.o zggesx.o \ + zggev.o zggev3.o zggevx.o zggglm.o \ + zgghrd.o zgghd3.o zgglse.o zggqrf.o zggrqf.o \ + zggsvd3.o zggsvp3.o \ + zgtcon.o zgtrfs.o zgtsv.o zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \ + zhbevd.o zhbevx.o zhbgst.o zhbgv.o zhbgvd.o zhbgvx.o zhbtrd.o \ + zhecon.o zheev.o zheevd.o zheevr.o zheevx.o zhegs2.o zhegst.o \ + zhegv.o zhegvd.o zhegvx.o zherfs.o zhesv.o zhesvx.o zhetd2.o \ + zhetf2.o zhetrd.o \ + zhetrf.o zhetri.o zhetri2.o zhetri2x.o zheswapr.o \ + zhetrs.o zhetrs2.o \ + zhetf2_rook.o zhetrf_rook.o zhetri_rook.o \ + zhetrs_rook.o zhecon_rook.o zhesv_rook.o \ + zhetf2_rk.o zhetrf_rk.o zhetri_3.o zhetri_3x.o \ + zhetrs_3.o zhecon_3.o zhesv_rk.o \ + zhesv_aa.o zhetrf_aa.o zhetrs_aa.o zlahef_aa.o \ + zhesv_aa_2stage.o zhetrf_aa_2stage.o zhetrs_aa_2stage.o \ + zhgeqz.o zhpcon.o zhpev.o zhpevd.o \ + zhpevx.o zhpgst.o zhpgv.o zhpgvd.o zhpgvx.o zhprfs.o zhpsv.o \ + zhpsvx.o \ + zhptrd.o zhptrf.o zhptri.o zhptrs.o zhsein.o zhseqr.o zlabrd.o \ + zlacgv.o zlacon.o zlacn2.o zlacp2.o zlacpy.o zlacrm.o zlacrt.o zladiv.o \ + zlaed0.o zlaed7.o zlaed8.o \ + zlaein.o zlaesy.o zlaev2.o zlags2.o zlagtm.o \ + zlahef.o zlahef_rook.o zlahef_rk.o zlahqr.o \ + zlahr2.o zlaic1.o zlals0.o zlalsa.o zlalsd.o zlangb.o zlange.o \ + zlangt.o zlanhb.o \ + zlanhe.o \ + zlanhp.o zlanhs.o zlanht.o zlansb.o zlansp.o zlansy.o zlantb.o \ + zlantp.o zlantr.o zlapll.o zlapmt.o zlaqgb.o zlaqge.o \ + zlaqhb.o zlaqhe.o zlaqhp.o zlaqp2.o zlaqps.o zlaqsb.o \ + zlaqr0.o zlaqr1.o zlaqr2.o zlaqr3.o zlaqr4.o zlaqr5.o \ + zlaqsp.o zlaqsy.o zlar1v.o zlar2v.o ilazlr.o ilazlc.o \ + zlarcm.o zlarf.o zlarfb.o \ + zlarfg.o zlarft.o zlarfgp.o \ + zlarfx.o zlarfy.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ + zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ + zlassq.o zlaswp.o zlasyf.o zlasyf_rook.o zlasyf_rk.o zlasyf_aa.o \ + zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlauu2.o \ + zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \ + zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \ + zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \ + zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \ + zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \ + zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \ + zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \ + zstegr.o zstein.o zsteqr.o \ + zsycon.o zsymv.o \ + zsyr.o zsyrfs.o zsysv.o zsysvx.o zsytf2.o zsytrf.o zsytri.o zsytri2.o zsytri2x.o \ + zsyswapr.o zsytrs.o zsytrs2.o \ + zsyconv.o zsyconvf.o zsyconvf_rook.o \ + zsytf2_rook.o zsytrf_rook.o zsytrs_rook.o zsytrs_aa.o \ + zsytri_rook.o zsycon_rook.o zsysv_rook.o \ + zsysv_aa_2stage.o zsytrf_aa_2stage.o zsytrs_aa_2stage.o \ + zsytf2_rk.o zsytrf_rk.o zsytrf_aa.o zsytrs_3.o \ + zsytri_3.o zsytri_3x.o zsycon_3.o zsysv_rk.o zsysv_aa.o \ + ztbcon.o ztbrfs.o ztbtrs.o ztgevc.o ztgex2.o \ + ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \ + ztprfs.o ztptri.o \ + ztptrs.o ztrcon.o ztrevc.o ztrevc3.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \ + ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrzf.o zung2l.o \ + zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \ + zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o zunm22.o \ + zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ + zunmtr.o zupgtr.o \ + zupmtr.o izmax1.o dzsum1.o zstemr.o \ + zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \ + zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \ + ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \ + zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \ + zbbcsd.o zlapmr.o zunbdb.o zunbdb1.o zunbdb2.o zunbdb3.o zunbdb4.o \ + zunbdb5.o zunbdb6.o zuncsd.o zuncsd2by1.o \ + zgeqrt.o zgeqrt2.o zgeqrt3.o zgemqrt.o \ + ztpqrt.o ztpqrt2.o ztpmqrt.o ztprfb.o \ + ztplqt.o ztplqt2.o ztpmlqt.o \ + zgelqt.o zgelqt3.o zgemlqt.o \ + zgetsls.o zgeqr.o zlatsqr.o zlamtsqr.o zgemqr.o \ + zgelq.o zlaswlq.o zlamswlq.o zgemlq.o \ + zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ + zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ + zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o + +ifdef USEXBLAS +ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ + zla_gercond_c.o zla_gercond_x.o zla_gerpvgrw.o zsysvxx.o zsyrfsx.o \ + zla_syrfsx_extended.o zla_syamv.o zla_syrcond_c.o zla_syrcond_x.o \ + zla_syrpvgrw.o zposvxx.o zporfsx.o zla_porfsx_extended.o \ + zla_porcond_c.o zla_porcond_x.o zla_porpvgrw.o zgbsvxx.o zgbrfsx.o \ + zla_gbrfsx_extended.o zla_gbamv.o zla_gbrcond_c.o zla_gbrcond_x.o \ + zla_gbrpvgrw.o zhesvxx.o zherfsx.o zla_herfsx_extended.o \ + zla_heamv.o zla_hercond_c.o zla_hercond_x.o zla_herpvgrw.o \ + zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o +endif + +DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ + DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ + DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ + DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ + DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ + DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ + DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ + DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ + DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ + DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ + DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ + DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o + +ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \ + $(SCLAUX) $(DZLAUX) $(ALLAUX) + +ifdef USEXBLAS +ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) +endif + +ifdef BUILD_DEPRECATED +DEPRECATED = $(DEPRECSRC) +endif + +all: ../$(LAPACKLIB) + +../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $^ + $(RANLIB) ../$(LAPACKLIB) + +complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $^ + $(RANLIB) ../$(LAPACKLIB) + +double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $^ + $(RANLIB) ../$(LAPACKLIB) + +complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) + $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $^ + $(RANLIB) ../$(LAPACKLIB) + +$(ALLAUX): $(FRC) +$(SCLAUX): $(FRC) +$(DZLAUX): $(FRC) +$(SLASRC): $(FRC) +$(CLASRC): $(FRC) +$(DLASRC): $(FRC) +$(ZLASRC): $(FRC) +$(ZCLASRC): $(FRC) +$(DSLASRC): $(FRC) +ifdef USEXBLAS +$(SXLASRC): $(FRC) +$(CXLASRC): $(FRC) +$(DXLASRC): $(FRC) +$(ZXLASRC): $(FRC) +endif + +FRC: + @FRC=$(FRC) + +clean: cleanobj cleanlib +cleanobj: + rm -f *.o DEPRECATED/*.o +cleanlib: + rm -f ../$(LAPACKLIB) + +.f.o: + $(FORTRAN) $(OPTS) -c -o $@ $< +.F.o: + $(FORTRAN) $(OPTS) -c -o $@ $< + +slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< +zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c -o $@ $< diff --git a/dspl/liblapack/SRC/VARIANTS/Makefile b/dspl/liblapack/SRC/VARIANTS/Makefile new file mode 100644 index 0000000..9f14107 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/Makefile @@ -0,0 +1,66 @@ +include ../../make.inc + +####################################################################### +# This is the makefile to create a the variants libraries for LAPACK. +# The files are organized as follows: +# CHOLRL -- Right looking block version of the algorithm, calling Level 3 BLAS +# CHOLTOP -- Top looking block version of the algorithm, calling Level 3 BLAS +# LUCR -- Crout Level 3 BLAS version of LU factorization +# LULL -- left-looking Level 3 BLAS version of LU factorization +# QRLL -- left-looking Level 3 BLAS version of QR factorization +# LUREC -- an iterative version of Sivan Toledo's recursive LU algorithm[1]. +# For square matrices, this iterative versions should +# be within a factor of two of the optimum number of memory transfers. +# +# [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +# Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +# 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +####################################################################### + +CHOLRL = cholesky/RL/cpotrf.o cholesky/RL/dpotrf.o cholesky/RL/spotrf.o cholesky/RL/zpotrf.o + +CHOLTOP = cholesky/TOP/cpotrf.o cholesky/TOP/dpotrf.o cholesky/TOP/spotrf.o cholesky/TOP/zpotrf.o + +LUCR = lu/CR/cgetrf.o lu/CR/dgetrf.o lu/CR/sgetrf.o lu/CR/zgetrf.o + +LULL = lu/LL/cgetrf.o lu/LL/dgetrf.o lu/LL/sgetrf.o lu/LL/zgetrf.o + +LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o + +QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.o + + +all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a + +cholrl.a: $(CHOLRL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +choltop.a: $(CHOLTOP) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +lucr.a: $(LUCR) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +lull.a: $(LULL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +lurec.a: $(LUREC) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +qrll.a: $(QRLL) + $(ARCH) $(ARCHFLAGS) $@ $^ + $(RANLIB) $@ + +clean: cleanobj cleanlib +cleanobj: + rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) +cleanlib: + rm -f *.a + +.f.o: + $(FORTRAN) $(OPTS) -c -o $@ $< diff --git a/dspl/liblapack/SRC/VARIANTS/README b/dspl/liblapack/SRC/VARIANTS/README new file mode 100644 index 0000000..4d301cc --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/README @@ -0,0 +1,84 @@ + =============== + = README File = + =============== + +This README File is for the LAPACK driver variants. +It is composed of 5 sections: + - Description: contents a quick description of each of the variants. For a more detailed description please refer to LAWN XXX. + - Build + - Testing + - Linking your program + - Support + +Author: Peng DU and Julie LANGOU, May 2008 + +=============== += DESCRIPTION = +=============== + +This directory contains several variants of LAPACK routines in single/double/complex/double complex precision: + - [sdcz]getrf with LU Crout Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/CR + - [sdcz]getrf with LU Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/lu/LL + - [sdcz]getrf with Sivan Toledo's recursive LU algorithm [1] - Directory: SRC/VARIANTS/lu/REC + - [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL + - [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL + - [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP + +References:For a more detailed description please refer to + - [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), + 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 + - [2]LAWN XXX + +========= += BUILD = +========= + +These variants are compiled by default in the build process but they are not tested by default. +The build process creates one new library per variants in the four arithmetics (single real/double real/single complex/double complex). +The libraries are in the SRC/VARIANTS directory. + +Corresponding libraries created in SRC/VARIANTS: + - LU Crout : lucr.a + - LU Left Looking : lull.a + - LU Sivan Toledo's recursive : lurec.a + - QR Left Looking : qrll.a + - Cholesky Right Looking : cholrl.a + - Cholesky Top : choltop.a + + +=========== += TESTING = +=========== + +To test these variants you can type 'make variants-testing' +This will rerun the linear methods testings once per variants and append the short name of the variants to the output files. +You should then see the following files in the TESTING directory: +[scdz]test_cholrl.out +[scdz]test_choltop.out +[scdz]test_lucr.out +[scdz]test_lull.out +[scdz]test_lurec.out +[scdz]test_qrll.out + +======================== += LINKING YOUR PROGRAM = +======================== + +You just need to add the variants methods library in your linking sequence before your lapack libary. +Here is a quick example for LU + +Default using LU Right Looking version: + $(FORTRAN) -c myprog.f + $(FORTRAN) -o myexe myprog.o $(LAPACKLIB) $(BLASLIB) + +Using LU Left Looking version: + $(FORTRAN) -c myprog.f + $(FORTRAN) -o myexe myprog.o $(PATH TO LAPACK/SRC/VARIANTS)/lull.a $(LAPACKLIB) $(BLASLIB) + +=========== += SUPPORT = +=========== + +You can use either LAPACK forum or the LAPACK mailing list to get support. +LAPACK forum : http://icl.cs.utk.edu/lapack-forum +LAPACK mailing list : lapack@cs.utk.edu diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/RL/cpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/cpotrf.f new file mode 100644 index 0000000..8cec880 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/cpotrf.f @@ -0,0 +1,243 @@ +C> \brief \b CPOTRF VARIANT: right looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CPOTRF computes the Cholesky factorization of a real Hermitian +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**H * U, if UPLO = 'U', or +C> A = L * L**H, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the right looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**H*U or A = L*L**H. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CPOTF2, CHERK, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL CPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + CALL CHERK( 'Upper', 'Conjugate transpose', N-J-JB+1, + $ JB, -ONE, A( J, J+JB ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + + CALL CHERK( 'Lower', 'No Transpose', N-J-JB+1, JB, + $ -ONE, A( J+JB, J ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of CPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/RL/dpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/dpotrf.f new file mode 100644 index 0000000..400fbf7 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/dpotrf.f @@ -0,0 +1,242 @@ +C> \brief \b DPOTRF VARIANT: right looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**T * U, if UPLO = 'U', or +C> A = L * L**T, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the right looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**T*U or A = L*L**T. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL DPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + CALL DSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE, + $ A( J, J+JB ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + + CALL DSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB, + $ -ONE, A( J+JB, J ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of DPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/RL/spotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/spotrf.f new file mode 100644 index 0000000..090e528 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/spotrf.f @@ -0,0 +1,242 @@ +C> \brief \b SPOTRF VARIANT: right looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**T * U, if UPLO = 'U', or +C> A = L * L**T, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the right looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**T*U or A = L*L**T. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL SPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + CALL SSYRK( 'Upper', 'Transpose', N-J-JB+1, JB, -ONE, + $ A( J, J+JB ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + + CALL SSYRK( 'Lower', 'No Transpose', N-J-JB+1, JB, + $ -ONE, A( J+JB, J ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of SPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/RL/zpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/zpotrf.f new file mode 100644 index 0000000..149eaac --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/RL/zpotrf.f @@ -0,0 +1,243 @@ +C> \brief \b ZPOTRF VARIANT: right looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZPOTRF computes the Cholesky factorization of a real Hermitian +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**H * U, if UPLO = 'U', or +C> A = L * L**H, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the right looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**H*U or A = L*L**H. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL ZPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + CALL ZHERK( 'Upper', 'Conjugate transpose', N-J-JB+1, + $ JB, -ONE, A( J, J+JB ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + + CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + + IF( INFO.NE.0 ) + $ GO TO 30 + + IF( J+JB.LE.N ) THEN +* +* Updating the trailing submatrix. +* + CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + + CALL ZHERK( 'Lower', 'No Transpose', N-J-JB+1, JB, + $ -ONE, A( J+JB, J ), LDA, + $ ONE, A( J+JB, J+JB ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of ZPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/cpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/cpotrf.f new file mode 100644 index 0000000..fd2b13e --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/cpotrf.f @@ -0,0 +1,237 @@ +C> \brief \b CPOTRF VARIANT: top-looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**H * U, if UPLO = 'U', or +C> A = L * L**H, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the top-looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**H*U or A = L*L**H. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CPOTF2, CHERK, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL CPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate Transpose', + $ 'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA, + $ A( 1, J ), LDA ) + + CALL CHERK( 'Upper', 'Conjugate Transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL CTRSM( 'Right', 'Lower', 'Conjugate Transpose', + $ 'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA, + $ A( J, 1 ), LDA ) + + CALL CHERK( 'Lower', 'No Transpose', JB, J-1, + $ -ONE, A( J, 1 ), LDA, + $ ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of CPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/dpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/dpotrf.f new file mode 100644 index 0000000..dbb51c4 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/dpotrf.f @@ -0,0 +1,238 @@ +C> \brief \b DPOTRF VARIANT: top-looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**T * U, if UPLO = 'U', or +C> A = L * L**T, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the top-looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**T*U or A = L*L**T. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL DPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ J-1, JB, ONE, A( 1, 1 ), LDA, + $ A( 1, J ), LDA ) + + CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, + $ ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ JB, J-1, ONE, A( 1, 1 ), LDA, + $ A( J, 1 ), LDA ) + + CALL DSYRK( 'Lower', 'No Transpose', JB, J-1, + $ -ONE, A( J, 1 ), LDA, + $ ONE, A( J, J ), LDA ) + +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of DPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/spotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/spotrf.f new file mode 100644 index 0000000..81034df --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/spotrf.f @@ -0,0 +1,237 @@ +C> \brief \b SPOTRF VARIANT: top-looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**T * U, if UPLO = 'U', or +C> A = L * L**T, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the top-looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**T*U or A = L*L**T. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL SPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ J-1, JB, ONE, A( 1, 1 ), LDA, + $ A( 1, J ), LDA ) + + CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, + $ ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ JB, J-1, ONE, A( 1, 1 ), LDA, + $ A( J, 1 ), LDA ) + + CALL SSYRK( 'Lower', 'No Transpose', JB, J-1, + $ -ONE, A( J, 1 ), LDA, + $ ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of SPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/zpotrf.f b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/zpotrf.f new file mode 100644 index 0000000..0096670 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/cholesky/TOP/zpotrf.f @@ -0,0 +1,237 @@ +C> \brief \b ZPOTRF VARIANT: top-looking block version of the algorithm, calling Level 3 BLAS. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZPOTRF computes the Cholesky factorization of a real symmetric +C> positive definite matrix A. +C> +C> The factorization has the form +C> A = U**H * U, if UPLO = 'U', or +C> A = L * L**H, if UPLO = 'L', +C> where U is an upper triangular matrix and L is lower triangular. +C> +C> This is the top-looking block version of the algorithm, calling Level 3 BLAS. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] UPLO +C> \verbatim +C> UPLO is CHARACTER*1 +C> = 'U': Upper triangle of A is stored; +C> = 'L': Lower triangle of A is stored. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The order of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the symmetric matrix A. If UPLO = 'U', the leading +C> N-by-N upper triangular part of A contains the upper +C> triangular part of the matrix A, and the strictly lower +C> triangular part of A is not referenced. If UPLO = 'L', the +C> leading N-by-N lower triangular part of A contains the lower +C> triangular part of the matrix A, and the strictly upper +C> triangular part of A is not referenced. +C> \endverbatim +C> \verbatim +C> On exit, if INFO = 0, the factor U or L from the Cholesky +C> factorization A = U**H*U or A = L*L**H. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,N). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, the leading minor of order i is not +C> positive definite, and the factorization could not be +C> completed. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsPOcomputational +* +* ===================================================================== + SUBROUTINE ZPOTRF ( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZPOTF2, ZHERK, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL ZPOTF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U'*U. +* + DO 10 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate Transpose', + $ 'Non-unit', J-1, JB, CONE, A( 1, 1 ), LDA, + $ A( 1, J ), LDA ) + + CALL ZHERK( 'Upper', 'Conjugate Transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L'. +* + DO 20 J = 1, N, NB + + JB = MIN( NB, N-J+1 ) +* +* Compute the current block. +* + CALL ZTRSM( 'Right', 'Lower', 'Conjugate Transpose', + $ 'Non-unit', JB, J-1, CONE, A( 1, 1 ), LDA, + $ A( J, 1 ), LDA ) + + CALL ZHERK( 'Lower', 'No Transpose', JB, J-1, + $ -ONE, A( J, 1 ), LDA, + $ ONE, A( J, J ), LDA ) +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of ZPOTRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/CR/cgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/CR/cgetrf.f new file mode 100644 index 0000000..2bddb44 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/CR/cgetrf.f @@ -0,0 +1,223 @@ +C> \brief \b CGETRF VARIANT: Crout Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the Crout Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL CGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Update current block. +* + CALL CGEMM( 'No transpose', 'No transpose', + $ M-J+1, JB, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE, + $ A( J, J ), LDA ) + +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to column 1:J-1 +* + CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF ( J+JB.LE.N ) THEN +* +* Apply interchanges to column J+JB:N +* + CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* + CALL CGEMM( 'No transpose', 'No transpose', + $ JB, N-J-JB+1, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE, + $ A( J, J+JB ), LDA ) +* +* Compute block row of U. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + + 20 CONTINUE + + END IF + RETURN +* +* End of CGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/CR/dgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/CR/dgetrf.f new file mode 100644 index 0000000..ce0ab22 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/CR/dgetrf.f @@ -0,0 +1,223 @@ +C> \brief \b DGETRF VARIANT: Crout Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the Crout Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL DGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Update current block. +* + CALL DGEMM( 'No transpose', 'No transpose', + $ M-J+1, JB, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE, + $ A( J, J ), LDA ) + +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to column 1:J-1 +* + CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF ( J+JB.LE.N ) THEN +* +* Apply interchanges to column J+JB:N +* + CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* + CALL DGEMM( 'No transpose', 'No transpose', + $ JB, N-J-JB+1, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE, + $ A( J, J+JB ), LDA ) +* +* Compute block row of U. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + + 20 CONTINUE + + END IF + RETURN +* +* End of DGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/CR/sgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/CR/sgetrf.f new file mode 100644 index 0000000..bb65431 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/CR/sgetrf.f @@ -0,0 +1,223 @@ +C> \brief \b SGETRF VARIANT: Crout Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the Crout Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL SGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Update current block. +* + CALL SGEMM( 'No transpose', 'No transpose', + $ M-J+1, JB, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE, + $ A( J, J ), LDA ) + +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to column 1:J-1 +* + CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF ( J+JB.LE.N ) THEN +* +* Apply interchanges to column J+JB:N +* + CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', + $ JB, N-J-JB+1, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE, + $ A( J, J+JB ), LDA ) +* +* Compute block row of U. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + + 20 CONTINUE + + END IF + RETURN +* +* End of SGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/CR/zgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/CR/zgetrf.f new file mode 100644 index 0000000..39090e6 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/CR/zgetrf.f @@ -0,0 +1,223 @@ +C> \brief \b ZGETRF VARIANT: Crout Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the Crout Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL ZGETF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Update current block. +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ M-J+1, JB, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J ), LDA, ONE, + $ A( J, J ), LDA ) + +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to column 1:J-1 +* + CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF ( J+JB.LE.N ) THEN +* +* Apply interchanges to column J+JB:N +* + CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ JB, N-J-JB+1, J-1, -ONE, + $ A( J, 1 ), LDA, A( 1, J+JB ), LDA, ONE, + $ A( J, J+JB ), LDA ) +* +* Compute block row of U. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + + 20 CONTINUE + + END IF + RETURN +* +* End of ZGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/LL/cgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/LL/cgetrf.f new file mode 100644 index 0000000..77c7472 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/LL/cgetrf.f @@ -0,0 +1,248 @@ +C> \brief \b CGETRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, K, NB +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGETF2, CLASWP, CTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL CGETF2( M, N, A, LDA, IPIV, INFO ) + + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* +* Update before factoring the current panel +* + DO 30 K = 1, J-NB, NB +* +* Apply interchanges to rows K:K+NB-1. +* + CALL CLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 ) +* +* Compute block row of U. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ NB, JB, ONE, A( K, K ), LDA, + $ A( K, J ), LDA ) +* +* Update trailing submatrix. +* + CALL CGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, JB, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE, + $ A( K+NB, J ), LDA ) + 30 CONTINUE +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* + 20 CONTINUE + +* +* Apply interchanges to the left-overs +* + DO 40 K = 1, MIN( M, N ), NB + CALL CLASWP( K-1, A( 1, 1 ), LDA, K, + $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 ) + 40 CONTINUE +* +* Apply update to the M+1:N columns when N > M +* + IF ( N.GT.M ) THEN + + CALL CLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 ) + + DO 50 K = 1, M, NB + + JB = MIN( M-K+1, NB ) +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-M, ONE, A( K, K ), LDA, + $ A( K, M+1 ), LDA ) + +* + IF ( K+NB.LE.M ) THEN + CALL CGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, N-M, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE, + $ A( K+NB, M+1 ), LDA ) + END IF + 50 CONTINUE + END IF +* + END IF + RETURN +* +* End of CGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/LL/dgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/LL/dgetrf.f new file mode 100644 index 0000000..9f25abd --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/LL/dgetrf.f @@ -0,0 +1,247 @@ +C> \brief \b DGETRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, K, NB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGETF2, DLASWP, DTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL DGETF2( M, N, A, LDA, IPIV, INFO ) + + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Update before factoring the current panel +* + DO 30 K = 1, J-NB, NB +* +* Apply interchanges to rows K:K+NB-1. +* + CALL DLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 ) +* +* Compute block row of U. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ NB, JB, ONE, A( K, K ), LDA, + $ A( K, J ), LDA ) +* +* Update trailing submatrix. +* + CALL DGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, JB, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE, + $ A( K+NB, J ), LDA ) + 30 CONTINUE +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* + 20 CONTINUE + +* +* Apply interchanges to the left-overs +* + DO 40 K = 1, MIN( M, N ), NB + CALL DLASWP( K-1, A( 1, 1 ), LDA, K, + $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 ) + 40 CONTINUE +* +* Apply update to the M+1:N columns when N > M +* + IF ( N.GT.M ) THEN + + CALL DLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 ) + + DO 50 K = 1, M, NB + + JB = MIN( M-K+1, NB ) +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-M, ONE, A( K, K ), LDA, + $ A( K, M+1 ), LDA ) + +* + IF ( K+NB.LE.M ) THEN + CALL DGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, N-M, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE, + $ A( K+NB, M+1 ), LDA ) + END IF + 50 CONTINUE + END IF +* + END IF + RETURN +* +* End of DGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/LL/sgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/LL/sgetrf.f new file mode 100644 index 0000000..765cacc --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/LL/sgetrf.f @@ -0,0 +1,248 @@ +C> \brief \b SGETRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, K, NB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGETF2, SLASWP, STRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL SGETF2( M, N, A, LDA, IPIV, INFO ) + + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* +* Update before factoring the current panel +* + DO 30 K = 1, J-NB, NB +* +* Apply interchanges to rows K:K+NB-1. +* + CALL SLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 ) +* +* Compute block row of U. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ NB, JB, ONE, A( K, K ), LDA, + $ A( K, J ), LDA ) +* +* Update trailing submatrix. +* + CALL SGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, JB, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE, + $ A( K+NB, J ), LDA ) + 30 CONTINUE +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* + 20 CONTINUE + +* +* Apply interchanges to the left-overs +* + DO 40 K = 1, MIN( M, N ), NB + CALL SLASWP( K-1, A( 1, 1 ), LDA, K, + $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 ) + 40 CONTINUE +* +* Apply update to the M+1:N columns when N > M +* + IF ( N.GT.M ) THEN + + CALL SLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 ) + + DO 50 K = 1, M, NB + + JB = MIN( M-K+1, NB ) +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-M, ONE, A( K, K ), LDA, + $ A( K, M+1 ), LDA ) + +* + IF ( K+NB.LE.M ) THEN + CALL SGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, N-M, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE, + $ A( K+NB, M+1 ), LDA ) + END IF + 50 CONTINUE + END IF +* + END IF + RETURN +* +* End of SGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/LL/zgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/LL/zgetrf.f new file mode 100644 index 0000000..ef3fbf4 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/LL/zgetrf.f @@ -0,0 +1,248 @@ +C> \brief \b ZGETRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRF ( M, N, A, LDA, IPIV, INFO) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, K, NB +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZGETF2, ZLASWP, ZTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL ZGETF2( M, N, A, LDA, IPIV, INFO ) + + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* +* Update before factoring the current panel +* + DO 30 K = 1, J-NB, NB +* +* Apply interchanges to rows K:K+NB-1. +* + CALL ZLASWP( JB, A(1, J), LDA, K, K+NB-1, IPIV, 1 ) +* +* Compute block row of U. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ NB, JB, ONE, A( K, K ), LDA, + $ A( K, J ), LDA ) +* +* Update trailing submatrix. +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, JB, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, J ), LDA, ONE, + $ A( K+NB, J ), LDA ) + 30 CONTINUE +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* + 20 CONTINUE + +* +* Apply interchanges to the left-overs +* + DO 40 K = 1, MIN( M, N ), NB + CALL ZLASWP( K-1, A( 1, 1 ), LDA, K, + $ MIN (K+NB-1, MIN ( M, N )), IPIV, 1 ) + 40 CONTINUE +* +* Apply update to the M+1:N columns when N > M +* + IF ( N.GT.M ) THEN + + CALL ZLASWP( N-M, A(1, M+1), LDA, 1, M, IPIV, 1 ) + + DO 50 K = 1, M, NB + + JB = MIN( M-K+1, NB ) +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, N-M, ONE, A( K, K ), LDA, + $ A( K, M+1 ), LDA ) + +* + IF ( K+NB.LE.M ) THEN + CALL ZGEMM( 'No transpose', 'No transpose', + $ M-K-NB+1, N-M, NB, -ONE, + $ A( K+NB, K ), LDA, A( K, M+1 ), LDA, ONE, + $ A( K+NB, M+1 ), LDA ) + END IF + 50 CONTINUE + END IF +* + END IF + RETURN +* +* End of ZGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/REC/cgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/REC/cgetrf.f new file mode 100644 index 0000000..9daab38 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/REC/cgetrf.f @@ -0,0 +1,281 @@ +C> \brief \b CGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This code implements an iterative version of Sivan Toledo's recursive +C> LU algorithm[1]. For square matrices, this iterative versions should +C> be within a factor of two of the optimum number of memory transfers. +C> +C> The pattern is as follows, with the large blocks of U being updated +C> in one call to DTRSM, and the dotted lines denoting sections that +C> have had all pending permutations applied: +C> +C> 1 2 3 4 5 6 7 8 +C> +-+-+---+-------+------ +C> | |1| | | +C> |.+-+ 2 | | +C> | | | | | +C> |.|.+-+-+ 4 | +C> | | | |1| | +C> | | |.+-+ | +C> | | | | | | +C> |.|.|.|.+-+-+---+ 8 +C> | | | | | |1| | +C> | | | | |.+-+ 2 | +C> | | | | | | | | +C> | | | | |.|.+-+-+ +C> | | | | | | | |1| +C> | | | | | | |.+-+ +C> | | | | | | | | | +C> |.|.|.|.|.|.|.|.+----- +C> | | | | | | | | | +C> +C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in +C> the binary expansion of the current column. Each Schur update is +C> applied as soon as the necessary portion of U is available. +C> +C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.X) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, NEGONE + REAL ZERO + PARAMETER ( ONE = (1.0E+0, 0.0E+0) ) + PARAMETER ( NEGONE = (-1.0E+0, 0.0E+0) ) + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL SFMIN, PIVMAG + COMPLEX TMP + INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD + INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ICAMAX + LOGICAL SISNAN + EXTERNAL SLAMCH, ICAMAX, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, CSCAL, XERBLA, CLASWP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, IAND, ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + NSTEP = MIN( M, N ) + DO J = 1, NSTEP + KAHEAD = IAND( J, -J ) + KSTART = J + 1 - KAHEAD + KCOLS = MIN( KAHEAD, M-J ) +* +* Find pivot. +* + JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + +* Permute just this column. + IF (JP .NE. J) THEN + TMP = A( J, J ) + A( J, J ) = A( JP, J ) + A( JP, J ) = TMP + END IF + +* Apply pending permutations to L + NTOPIV = 1 + IPIVSTART = J + JPIVSTART = J - NTOPIV + DO WHILE ( NTOPIV .LT. KAHEAD ) + CALL CLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J, + $ IPIV, 1 ) + IPIVSTART = IPIVSTART - NTOPIV; + NTOPIV = NTOPIV * 2; + JPIVSTART = JPIVSTART - NTOPIV; + END DO + +* Permute U block to match L + CALL CLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 ) + +* Factor the current column + PIVMAG = ABS( A( J, J ) ) + IF( PIVMAG.NE.ZERO .AND. .NOT.SISNAN( PIVMAG ) ) THEN + IF( PIVMAG .GE. SFMIN ) THEN + CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + END DO + END IF + ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN + INFO = J + END IF + +* Solve for U block. + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD, + $ KCOLS, ONE, A( KSTART, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA ) +* Schur complement. + CALL CGEMM( 'No transpose', 'No transpose', M-J, + $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA ) + END DO + +* Handle pivot permutations on the way out of the recursion + NPIVED = IAND( NSTEP, -NSTEP ) + J = NSTEP - NPIVED + DO WHILE ( J .GT. 0 ) + NTOPIV = IAND( J, -J ) + CALL CLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP, + $ IPIV, 1 ) + J = J - NTOPIV + END DO + +* If short and wide, handle the rest of the columns. + IF ( M .LT. N ) THEN + CALL CLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 ) + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M, + $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA ) + END IF + + RETURN +* +* End of CGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/REC/dgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/REC/dgetrf.f new file mode 100644 index 0000000..db6cece --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/REC/dgetrf.f @@ -0,0 +1,277 @@ +C> \brief \b DGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This code implements an iterative version of Sivan Toledo's recursive +C> LU algorithm[1]. For square matrices, this iterative versions should +C> be within a factor of two of the optimum number of memory transfers. +C> +C> The pattern is as follows, with the large blocks of U being updated +C> in one call to DTRSM, and the dotted lines denoting sections that +C> have had all pending permutations applied: +C> +C> 1 2 3 4 5 6 7 8 +C> +-+-+---+-------+------ +C> | |1| | | +C> |.+-+ 2 | | +C> | | | | | +C> |.|.+-+-+ 4 | +C> | | | |1| | +C> | | |.+-+ | +C> | | | | | | +C> |.|.|.|.+-+-+---+ 8 +C> | | | | | |1| | +C> | | | | |.+-+ 2 | +C> | | | | | | | | +C> | | | | |.|.+-+-+ +C> | | | | | | | |1| +C> | | | | | | |.+-+ +C> | | | | | | | | | +C> |.|.|.|.|.|.|.|.+----- +C> | | | | | | | | | +C> +C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in +C> the binary expansion of the current column. Each Schur update is +C> applied as soon as the necessary portion of U is available. +C> +C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.X) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + PARAMETER ( NEGONE = -1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN, TMP + INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD + INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IDAMAX + LOGICAL DISNAN + EXTERNAL DLAMCH, IDAMAX, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DTRSM, DSCAL, XERBLA, DLASWP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, IAND +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + NSTEP = MIN( M, N ) + DO J = 1, NSTEP + KAHEAD = IAND( J, -J ) + KSTART = J + 1 - KAHEAD + KCOLS = MIN( KAHEAD, M-J ) +* +* Find pivot. +* + JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + +* Permute just this column. + IF (JP .NE. J) THEN + TMP = A( J, J ) + A( J, J ) = A( JP, J ) + A( JP, J ) = TMP + END IF + +* Apply pending permutations to L + NTOPIV = 1 + IPIVSTART = J + JPIVSTART = J - NTOPIV + DO WHILE ( NTOPIV .LT. KAHEAD ) + CALL DLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J, + $ IPIV, 1 ) + IPIVSTART = IPIVSTART - NTOPIV; + NTOPIV = NTOPIV * 2; + JPIVSTART = JPIVSTART - NTOPIV; + END DO + +* Permute U block to match L + CALL DLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 ) + +* Factor the current column + IF( A( J, J ).NE.ZERO .AND. .NOT.DISNAN( A( J, J ) ) ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + END DO + END IF + ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN + INFO = J + END IF + +* Solve for U block. + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD, + $ KCOLS, ONE, A( KSTART, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA ) +* Schur complement. + CALL DGEMM( 'No transpose', 'No transpose', M-J, + $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA ) + END DO + +* Handle pivot permutations on the way out of the recursion + NPIVED = IAND( NSTEP, -NSTEP ) + J = NSTEP - NPIVED + DO WHILE ( J .GT. 0 ) + NTOPIV = IAND( J, -J ) + CALL DLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP, + $ IPIV, 1 ) + J = J - NTOPIV + END DO + +* If short and wide, handle the rest of the columns. + IF ( M .LT. N ) THEN + CALL DLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 ) + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M, + $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA ) + END IF + + RETURN +* +* End of DGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/REC/sgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/REC/sgetrf.f new file mode 100644 index 0000000..379ad1c --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/REC/sgetrf.f @@ -0,0 +1,277 @@ +C> \brief \b SGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This code implements an iterative version of Sivan Toledo's recursive +C> LU algorithm[1]. For square matrices, this iterative versions should +C> be within a factor of two of the optimum number of memory transfers. +C> +C> The pattern is as follows, with the large blocks of U being updated +C> in one call to STRSM, and the dotted lines denoting sections that +C> have had all pending permutations applied: +C> +C> 1 2 3 4 5 6 7 8 +C> +-+-+---+-------+------ +C> | |1| | | +C> |.+-+ 2 | | +C> | | | | | +C> |.|.+-+-+ 4 | +C> | | | |1| | +C> | | |.+-+ | +C> | | | | | | +C> |.|.|.|.+-+-+---+ 8 +C> | | | | | |1| | +C> | | | | |.+-+ 2 | +C> | | | | | | | | +C> | | | | |.|.+-+-+ +C> | | | | | | | |1| +C> | | | | | | |.+-+ +C> | | | | | | | | | +C> |.|.|.|.|.|.|.|.+----- +C> | | | | | | | | | +C> +C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in +C> the binary expansion of the current column. Each Schur update is +C> applied as soon as the necessary portion of U is available. +C> +C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.X) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + PARAMETER ( NEGONE = -1.0E+0 ) +* .. +* .. Local Scalars .. + REAL SFMIN, TMP + INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD + INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ISAMAX + LOGICAL SISNAN + EXTERNAL SLAMCH, ISAMAX, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL STRSM, SSCAL, XERBLA, SLASWP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, IAND +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + NSTEP = MIN( M, N ) + DO J = 1, NSTEP + KAHEAD = IAND( J, -J ) + KSTART = J + 1 - KAHEAD + KCOLS = MIN( KAHEAD, M-J ) +* +* Find pivot. +* + JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + +! Permute just this column. + IF (JP .NE. J) THEN + TMP = A( J, J ) + A( J, J ) = A( JP, J ) + A( JP, J ) = TMP + END IF + +! Apply pending permutations to L + NTOPIV = 1 + IPIVSTART = J + JPIVSTART = J - NTOPIV + DO WHILE ( NTOPIV .LT. KAHEAD ) + CALL SLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J, + $ IPIV, 1 ) + IPIVSTART = IPIVSTART - NTOPIV; + NTOPIV = NTOPIV * 2; + JPIVSTART = JPIVSTART - NTOPIV; + END DO + +! Permute U block to match L + CALL SLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 ) + +! Factor the current column + IF( A( J, J ).NE.ZERO .AND. .NOT.SISNAN( A( J, J ) ) ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + END DO + END IF + ELSE IF( A( J,J ) .EQ. ZERO .AND. INFO .EQ. 0 ) THEN + INFO = J + END IF + +! Solve for U block. + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD, + $ KCOLS, ONE, A( KSTART, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA ) +! Schur complement. + CALL SGEMM( 'No transpose', 'No transpose', M-J, + $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA ) + END DO + +! Handle pivot permutations on the way out of the recursion + NPIVED = IAND( NSTEP, -NSTEP ) + J = NSTEP - NPIVED + DO WHILE ( J .GT. 0 ) + NTOPIV = IAND( J, -J ) + CALL SLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP, + $ IPIV, 1 ) + J = J - NTOPIV + END DO + +! If short and wide, handle the rest of the columns. + IF ( M .LT. N ) THEN + CALL SLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 ) + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', M, + $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA ) + END IF + + RETURN +* +* End of SGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/lu/REC/zgetrf.f b/dspl/liblapack/SRC/VARIANTS/lu/REC/zgetrf.f new file mode 100644 index 0000000..dc42c83 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/lu/REC/zgetrf.f @@ -0,0 +1,281 @@ +C> \brief \b ZGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZGETRF computes an LU factorization of a general M-by-N matrix A +C> using partial pivoting with row interchanges. +C> +C> The factorization has the form +C> A = P * L * U +C> where P is a permutation matrix, L is lower triangular with unit +C> diagonal elements (lower trapezoidal if m > n), and U is upper +C> triangular (upper trapezoidal if m < n). +C> +C> This code implements an iterative version of Sivan Toledo's recursive +C> LU algorithm[1]. For square matrices, this iterative versions should +C> be within a factor of two of the optimum number of memory transfers. +C> +C> The pattern is as follows, with the large blocks of U being updated +C> in one call to DTRSM, and the dotted lines denoting sections that +C> have had all pending permutations applied: +C> +C> 1 2 3 4 5 6 7 8 +C> +-+-+---+-------+------ +C> | |1| | | +C> |.+-+ 2 | | +C> | | | | | +C> |.|.+-+-+ 4 | +C> | | | |1| | +C> | | |.+-+ | +C> | | | | | | +C> |.|.|.|.+-+-+---+ 8 +C> | | | | | |1| | +C> | | | | |.+-+ 2 | +C> | | | | | | | | +C> | | | | |.|.+-+-+ +C> | | | | | | | |1| +C> | | | | | | |.+-+ +C> | | | | | | | | | +C> |.|.|.|.|.|.|.|.+----- +C> | | | | | | | | | +C> +C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in +C> the binary expansion of the current column. Each Schur update is +C> applied as soon as the necessary portion of U is available. +C> +C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with +C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), +C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the M-by-N matrix to be factored. +C> On exit, the factors L and U from the factorization +C> A = P*L*U; the unit diagonal elements of L are not stored. +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] IPIV +C> \verbatim +C> IPIV is INTEGER array, dimension (min(M,N)) +C> The pivot indices; for 1 <= i <= min(M,N), row i of the +C> matrix was interchanged with row IPIV(i). +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +C> has been completed, but the factor U is exactly +C> singular, and division by zero will occur if it is used +C> to solve a system of equations. +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.X) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, NEGONE + DOUBLE PRECISION ZERO + PARAMETER ( ONE = (1.0D+0, 0.0D+0) ) + PARAMETER ( NEGONE = (-1.0D+0, 0.0D+0) ) + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN, PIVMAG + COMPLEX*16 TMP + INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD + INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IZAMAX + LOGICAL DISNAN + EXTERNAL DLAMCH, IZAMAX, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZTRSM, ZSCAL, XERBLA, ZLASWP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, IAND, ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + NSTEP = MIN( M, N ) + DO J = 1, NSTEP + KAHEAD = IAND( J, -J ) + KSTART = J + 1 - KAHEAD + KCOLS = MIN( KAHEAD, M-J ) +* +* Find pivot. +* + JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + +! Permute just this column. + IF (JP .NE. J) THEN + TMP = A( J, J ) + A( J, J ) = A( JP, J ) + A( JP, J ) = TMP + END IF + +! Apply pending permutations to L + NTOPIV = 1 + IPIVSTART = J + JPIVSTART = J - NTOPIV + DO WHILE ( NTOPIV .LT. KAHEAD ) + CALL ZLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J, + $ IPIV, 1 ) + IPIVSTART = IPIVSTART - NTOPIV; + NTOPIV = NTOPIV * 2; + JPIVSTART = JPIVSTART - NTOPIV; + END DO + +! Permute U block to match L + CALL ZLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 ) + +! Factor the current column + PIVMAG = ABS( A( J, J ) ) + IF( PIVMAG.NE.ZERO .AND. .NOT.DISNAN( PIVMAG ) ) THEN + IF( PIVMAG .GE. SFMIN ) THEN + CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + END DO + END IF + ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN + INFO = J + END IF + +! Solve for U block. + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD, + $ KCOLS, ONE, A( KSTART, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA ) +! Schur complement. + CALL ZGEMM( 'No transpose', 'No transpose', M-J, + $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA, + $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA ) + END DO + +! Handle pivot permutations on the way out of the recursion + NPIVED = IAND( NSTEP, -NSTEP ) + J = NSTEP - NPIVED + DO WHILE ( J .GT. 0 ) + NTOPIV = IAND( J, -J ) + CALL ZLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP, + $ IPIV, 1 ) + J = J - NTOPIV + END DO + +! If short and wide, handle the rest of the columns. + IF ( M .LT. N ) THEN + CALL ZLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 ) + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M, + $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA ) + END IF + + RETURN +* +* End of ZGETRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/qr/LL/cgeqrf.f b/dspl/liblapack/SRC/VARIANTS/qr/LL/cgeqrf.f new file mode 100644 index 0000000..3cbec13 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/qr/LL/cgeqrf.f @@ -0,0 +1,416 @@ +C> \brief \b CGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> CGEQRF computes a QR factorization of a real M-by-N matrix A: +C> A = Q * R. +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX array, dimension (LDA,N) +C> On entry, the M-by-N matrix A. +C> On exit, the elements on and above the diagonal of the array +C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +C> upper triangular if m >= n); the elements below the diagonal, +C> with the array TAU, represent the orthogonal matrix Q as a +C> product of min(m,n) elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] TAU +C> \verbatim +C> TAU is COMPLEX array, dimension (min(M,N)) +C> The scalar factors of the elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[out] WORK +C> \verbatim +C> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +C> \endverbatim +C> +C> \param[in] LWORK +C> \verbatim +C> LWORK is INTEGER +C> \endverbatim +C> \verbatim +C> The dimension of the array WORK. The dimension can be divided into three parts. +C> \endverbatim +C> \verbatim +C> 1) The part for the triangular factor T. If the very last T is not bigger +C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise, +C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T +C> \endverbatim +C> \verbatim +C> 2) The part for the very last T when T is bigger than any of the rest T. +C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, +C> where K = min(M,N), NX is calculated by +C> NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) +C> \endverbatim +C> \verbatim +C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) +C> \endverbatim +C> \verbatim +C> So LWORK = part1 + part2 + part3 +C> \endverbatim +C> \verbatim +C> If LWORK = -1, then a workspace query is assumed; the routine +C> only calculates the optimal size of the WORK array, returns +C> this value as the first entry of the WORK array, and no error +C> message related to LWORK is issued by XERBLA. +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* Further Details +* =============== +C>\details \b Further \b Details +C> \verbatim +C> +C> The matrix Q is represented as a product of elementary reflectors +C> +C> Q = H(1) H(2) . . . H(k), where k = min(m,n). +C> +C> Each H(i) has the form +C> +C> H(i) = I - tau * v * v' +C> +C> where tau is a real scalar, and v is a real vector with +C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +C> and tau in TAU(i). +C> +C> \endverbatim +C> +* ===================================================================== + SUBROUTINE CGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB, + $ NBMIN, NX, LBWORK, NT, LLWORK +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCEIL + EXTERNAL ILAENV, SCEIL +* .. +* .. Executable Statements .. + + INFO = 0 + NBMIN = 2 + NX = 0 + IWS = N + K = MIN( M, N ) + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) + END IF +* +* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: +* +* NB=3 2NB=6 K=10 +* | | | +* 1--2--3--4--5--6--7--8--9--10 +* | \________/ +* K-NX=5 NT=4 +* +* So here 4 x 4 is the last T stored in the workspace +* + NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB + +* +* optimal workspace = space for dlarfb + space for normal T's + space for the last T +* + LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) + LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) + + IF ( NT.GT.NB ) THEN + + LBWORK = K-NT +* +* Optimal workspace for dlarfb = MAX(1,N)*NT +* + LWKOPT = (LBWORK+LLWORK)*NB + WORK( 1 ) = (LWKOPT+NT*NT) + + ELSE + + LBWORK = SCEIL(REAL(K)/REAL(NB))*NB + LWKOPT = (LBWORK+LLWORK-NB)*NB + WORK( 1 ) = LWKOPT + + END IF + +* +* Test the input arguments +* + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( NB.GT.1 .AND. NB.LT.K ) THEN + + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF ( NT.LE.NB ) THEN + IWS = (LBWORK+LLWORK-NB)*NB + ELSE + IWS = (LBWORK+LLWORK)*NB+NT*NT + END IF + + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + IF ( NT.LE.NB ) THEN + NB = LWORK / (LLWORK+(LBWORK-NB)) + ELSE + NB = (LWORK-NT*NT)/(LBWORK+LLWORK) + END IF + + NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Update the current column using old T's +* + DO 20 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:I+IB-1) from the left +* + CALL CLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, IB, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ IB) + +20 CONTINUE +* +* Compute the QR factorization of the current block +* A(I:M,I:I+IB-1) +* + CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1), IINFO ) + + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), + $ WORK(I), LBWORK ) +* + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) THEN + + IF ( I .NE. 1 ) THEN + + DO 30 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:K) from the left +* + CALL CLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, K-I+1, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ K-I+1) +30 CONTINUE + + CALL CGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1),IINFO ) + + ELSE +* +* Use unblocked code to factor the last or only block. +* + CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), + $ WORK,IINFO ) + + END IF + END IF + + +* +* Apply update to the column M+1:N when N > M +* + IF ( M.LT.N .AND. I.NE.1) THEN +* +* Form the last triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + IF ( NT .LE. NB ) THEN + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK ) + ELSE + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+1), NT ) + END IF + +* +* Apply H' to A(1:M,M+1:N) from the left +* + DO 40 J = 1, K-NX, NB + + IB = MIN( K-J+1, NB ) + + CALL CLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, IB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + +40 CONTINUE + + IF ( NT.LE.NB ) THEN + CALL CLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + ELSE + CALL CLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, + $ WORK(LBWORK*NB+1), + $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + END IF + + END IF + + WORK( 1 ) = IWS + RETURN +* +* End of CGEQRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/qr/LL/dgeqrf.f b/dspl/liblapack/SRC/VARIANTS/qr/LL/dgeqrf.f new file mode 100644 index 0000000..8f1979d --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/qr/LL/dgeqrf.f @@ -0,0 +1,417 @@ +C> \brief \b DGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> DGEQRF computes a QR factorization of a real M-by-N matrix A: +C> A = Q * R. +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is DOUBLE PRECISION array, dimension (LDA,N) +C> On entry, the M-by-N matrix A. +C> On exit, the elements on and above the diagonal of the array +C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +C> upper triangular if m >= n); the elements below the diagonal, +C> with the array TAU, represent the orthogonal matrix Q as a +C> product of min(m,n) elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] TAU +C> \verbatim +C> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +C> The scalar factors of the elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[out] WORK +C> \verbatim +C> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +C> \endverbatim +C> +C> \param[in] LWORK +C> \verbatim +C> LWORK is INTEGER +C> \endverbatim +C> \verbatim +C> The dimension of the array WORK. The dimension can be divided into three parts. +C> \endverbatim +C> \verbatim +C> 1) The part for the triangular factor T. If the very last T is not bigger +C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise, +C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T +C> \endverbatim +C> \verbatim +C> 2) The part for the very last T when T is bigger than any of the rest T. +C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, +C> where K = min(M,N), NX is calculated by +C> NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) +C> \endverbatim +C> \verbatim +C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) +C> \endverbatim +C> \verbatim +C> So LWORK = part1 + part2 + part3 +C> \endverbatim +C> \verbatim +C> If LWORK = -1, then a workspace query is assumed; the routine +C> only calculates the optimal size of the WORK array, returns +C> this value as the first entry of the WORK array, and no error +C> message related to LWORK is issued by XERBLA. +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* Further Details +* =============== +C>\details \b Further \b Details +C> \verbatim +C> +C> The matrix Q is represented as a product of elementary reflectors +C> +C> Q = H(1) H(2) . . . H(k), where k = min(m,n). +C> +C> Each H(i) has the form +C> +C> H(i) = I - tau * v * v' +C> +C> where tau is a real scalar, and v is a real vector with +C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +C> and tau in TAU(i). +C> +C> \endverbatim +C> +* ===================================================================== + SUBROUTINE DGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB, + $ NBMIN, NX, LBWORK, NT, LLWORK +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCEIL + EXTERNAL ILAENV, SCEIL +* .. +* .. Executable Statements .. + + INFO = 0 + NBMIN = 2 + NX = 0 + IWS = N + K = MIN( M, N ) + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) + END IF +* +* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: +* +* NB=3 2NB=6 K=10 +* | | | +* 1--2--3--4--5--6--7--8--9--10 +* | \________/ +* K-NX=5 NT=4 +* +* So here 4 x 4 is the last T stored in the workspace +* + NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB + +* +* optimal workspace = space for dlarfb + space for normal T's + space for the last T +* + LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) + LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) + + IF ( NT.GT.NB ) THEN + + LBWORK = K-NT +* +* Optimal workspace for dlarfb = MAX(1,N)*NT +* + LWKOPT = (LBWORK+LLWORK)*NB + WORK( 1 ) = (LWKOPT+NT*NT) + + ELSE + + LBWORK = SCEIL(REAL(K)/REAL(NB))*NB + LWKOPT = (LBWORK+LLWORK-NB)*NB + WORK( 1 ) = LWKOPT + + END IF + +* +* Test the input arguments +* + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( NB.GT.1 .AND. NB.LT.K ) THEN + + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF ( NT.LE.NB ) THEN + IWS = (LBWORK+LLWORK-NB)*NB + ELSE + IWS = (LBWORK+LLWORK)*NB+NT*NT + END IF + + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + IF ( NT.LE.NB ) THEN + NB = LWORK / (LLWORK+(LBWORK-NB)) + ELSE + NB = (LWORK-NT*NT)/(LBWORK+LLWORK) + END IF + + NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Update the current column using old T's +* + DO 20 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:I+IB-1) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, IB, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ IB) + +20 CONTINUE +* +* Compute the QR factorization of the current block +* A(I:M,I:I+IB-1) +* + CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1), IINFO ) + + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), + $ WORK(I), LBWORK ) +* + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) THEN + + IF ( I .NE. 1 ) THEN + + DO 30 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:K) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, K-I+1, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ K-I+1) +30 CONTINUE + + CALL DGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1),IINFO ) + + ELSE +* +* Use unblocked code to factor the last or only block. +* + CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), + $ WORK,IINFO ) + + END IF + END IF + + +* +* Apply update to the column M+1:N when N > M +* + IF ( M.LT.N .AND. I.NE.1) THEN +* +* Form the last triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + IF ( NT .LE. NB ) THEN + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK ) + ELSE + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+1), NT ) + END IF + +* +* Apply H' to A(1:M,M+1:N) from the left +* + DO 40 J = 1, K-NX, NB + + IB = MIN( K-J+1, NB ) + + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, IB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + +40 CONTINUE + + IF ( NT.LE.NB ) THEN + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + ELSE + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, + $ WORK(LBWORK*NB+1), + $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + END IF + + END IF + + WORK( 1 ) = IWS + RETURN +* +* End of DGEQRF +* + END + diff --git a/dspl/liblapack/SRC/VARIANTS/qr/LL/sceil.f b/dspl/liblapack/SRC/VARIANTS/qr/LL/sceil.f new file mode 100644 index 0000000..86394cc --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/qr/LL/sceil.f @@ -0,0 +1,87 @@ +C> \brief \b SCEIL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* REAL FUNCTION SCEIL( A ) +* +* .. Scalar Arguments .. +* REAL A +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. +* INTRINSIC INT +* .. +* .. Executable Statements ..* +* +* IF (A-INT(A).EQ.0) THEN +* SCEIL = A +* ELSE IF (A.GT.0) THEN +* SCEIL = INT(A)+1; +* ELSE +* SCEIL = INT(A) +* END IF +* +* RETURN +* +* END +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C>\endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsOTHERcomputational +* +* ===================================================================== + REAL FUNCTION SCEIL( A ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments ..* + REAL A +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC INT +* .. +* .. Executable Statements ..* +* + IF (A-INT(A).EQ.0) THEN + SCEIL = A + ELSE IF (A.GT.0) THEN + SCEIL = INT(A)+1; + ELSE + SCEIL = INT(A) + END IF + + RETURN +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/qr/LL/sgeqrf.f b/dspl/liblapack/SRC/VARIANTS/qr/LL/sgeqrf.f new file mode 100644 index 0000000..7b02271 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/qr/LL/sgeqrf.f @@ -0,0 +1,416 @@ +C> \brief \b SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> SGEQRF computes a QR factorization of a real M-by-N matrix A: +C> A = Q * R. +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is REAL array, dimension (LDA,N) +C> On entry, the M-by-N matrix A. +C> On exit, the elements on and above the diagonal of the array +C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +C> upper triangular if m >= n); the elements below the diagonal, +C> with the array TAU, represent the orthogonal matrix Q as a +C> product of min(m,n) elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] TAU +C> \verbatim +C> TAU is REAL array, dimension (min(M,N)) +C> The scalar factors of the elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[out] WORK +C> \verbatim +C> WORK is REAL array, dimension (MAX(1,LWORK)) +C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +C> \endverbatim +C> +C> \param[in] LWORK +C> \verbatim +C> LWORK is INTEGER +C> \endverbatim +C> \verbatim +C> The dimension of the array WORK. The dimension can be divided into three parts. +C> \endverbatim +C> \verbatim +C> 1) The part for the triangular factor T. If the very last T is not bigger +C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise, +C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T +C> \endverbatim +C> \verbatim +C> 2) The part for the very last T when T is bigger than any of the rest T. +C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, +C> where K = min(M,N), NX is calculated by +C> NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) +C> \endverbatim +C> \verbatim +C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) +C> \endverbatim +C> \verbatim +C> So LWORK = part1 + part2 + part3 +C> \endverbatim +C> \verbatim +C> If LWORK = -1, then a workspace query is assumed; the routine +C> only calculates the optimal size of the WORK array, returns +C> this value as the first entry of the WORK array, and no error +C> message related to LWORK is issued by XERBLA. +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* Further Details +* =============== +C>\details \b Further \b Details +C> \verbatim +C> +C> The matrix Q is represented as a product of elementary reflectors +C> +C> Q = H(1) H(2) . . . H(k), where k = min(m,n). +C> +C> Each H(i) has the form +C> +C> H(i) = I - tau * v * v' +C> +C> where tau is a real scalar, and v is a real vector with +C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +C> and tau in TAU(i). +C> +C> \endverbatim +C> +* ===================================================================== + SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB, + $ NBMIN, NX, LBWORK, NT, LLWORK +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCEIL + EXTERNAL ILAENV, SCEIL +* .. +* .. Executable Statements .. + + INFO = 0 + NBMIN = 2 + NX = 0 + IWS = N + K = MIN( M, N ) + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) + END IF +* +* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: +* +* NB=3 2NB=6 K=10 +* | | | +* 1--2--3--4--5--6--7--8--9--10 +* | \________/ +* K-NX=5 NT=4 +* +* So here 4 x 4 is the last T stored in the workspace +* + NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB + +* +* optimal workspace = space for dlarfb + space for normal T's + space for the last T +* + LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) + LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) + + IF ( NT.GT.NB ) THEN + + LBWORK = K-NT +* +* Optimal workspace for dlarfb = MAX(1,N)*NT +* + LWKOPT = (LBWORK+LLWORK)*NB + WORK( 1 ) = (LWKOPT+NT*NT) + + ELSE + + LBWORK = SCEIL(REAL(K)/REAL(NB))*NB + LWKOPT = (LBWORK+LLWORK-NB)*NB + WORK( 1 ) = LWKOPT + + END IF + +* +* Test the input arguments +* + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( NB.GT.1 .AND. NB.LT.K ) THEN + + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF ( NT.LE.NB ) THEN + IWS = (LBWORK+LLWORK-NB)*NB + ELSE + IWS = (LBWORK+LLWORK)*NB+NT*NT + END IF + + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + IF ( NT.LE.NB ) THEN + NB = LWORK / (LLWORK+(LBWORK-NB)) + ELSE + NB = (LWORK-NT*NT)/(LBWORK+LLWORK) + END IF + + NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Update the current column using old T's +* + DO 20 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:I+IB-1) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, IB, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ IB) + +20 CONTINUE +* +* Compute the QR factorization of the current block +* A(I:M,I:I+IB-1) +* + CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1), IINFO ) + + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), + $ WORK(I), LBWORK ) +* + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) THEN + + IF ( I .NE. 1 ) THEN + + DO 30 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:K) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, K-I+1, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ K-I+1) +30 CONTINUE + + CALL SGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1),IINFO ) + + ELSE +* +* Use unblocked code to factor the last or only block. +* + CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), + $ WORK,IINFO ) + + END IF + END IF + + +* +* Apply update to the column M+1:N when N > M +* + IF ( M.LT.N .AND. I.NE.1) THEN +* +* Form the last triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + IF ( NT .LE. NB ) THEN + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK ) + ELSE + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+1), NT ) + END IF + +* +* Apply H' to A(1:M,M+1:N) from the left +* + DO 40 J = 1, K-NX, NB + + IB = MIN( K-J+1, NB ) + + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, IB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + +40 CONTINUE + + IF ( NT.LE.NB ) THEN + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + ELSE + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, + $ WORK(LBWORK*NB+1), + $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + END IF + + END IF + + WORK( 1 ) = IWS + RETURN +* +* End of SGEQRF +* + END diff --git a/dspl/liblapack/SRC/VARIANTS/qr/LL/zgeqrf.f b/dspl/liblapack/SRC/VARIANTS/qr/LL/zgeqrf.f new file mode 100644 index 0000000..4ddad00 --- /dev/null +++ b/dspl/liblapack/SRC/VARIANTS/qr/LL/zgeqrf.f @@ -0,0 +1,416 @@ +C> \brief \b ZGEQRF VARIANT: left-looking Level 3 BLAS of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* Purpose +* ======= +* +C>\details \b Purpose: +C>\verbatim +C> +C> ZGEQRF computes a QR factorization of a real M-by-N matrix A: +C> A = Q * R. +C> +C> This is the left-looking Level 3 BLAS version of the algorithm. +C> +C>\endverbatim +* +* Arguments: +* ========== +* +C> \param[in] M +C> \verbatim +C> M is INTEGER +C> The number of rows of the matrix A. M >= 0. +C> \endverbatim +C> +C> \param[in] N +C> \verbatim +C> N is INTEGER +C> The number of columns of the matrix A. N >= 0. +C> \endverbatim +C> +C> \param[in,out] A +C> \verbatim +C> A is COMPLEX*16 array, dimension (LDA,N) +C> On entry, the M-by-N matrix A. +C> On exit, the elements on and above the diagonal of the array +C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +C> upper triangular if m >= n); the elements below the diagonal, +C> with the array TAU, represent the orthogonal matrix Q as a +C> product of min(m,n) elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[in] LDA +C> \verbatim +C> LDA is INTEGER +C> The leading dimension of the array A. LDA >= max(1,M). +C> \endverbatim +C> +C> \param[out] TAU +C> \verbatim +C> TAU is COMPLEX*16 array, dimension (min(M,N)) +C> The scalar factors of the elementary reflectors (see Further +C> Details). +C> \endverbatim +C> +C> \param[out] WORK +C> \verbatim +C> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +C> \endverbatim +C> +C> \param[in] LWORK +C> \verbatim +C> LWORK is INTEGER +C> \endverbatim +C> \verbatim +C> The dimension of the array WORK. The dimension can be divided into three parts. +C> \endverbatim +C> \verbatim +C> 1) The part for the triangular factor T. If the very last T is not bigger +C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise, +C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T +C> \endverbatim +C> \verbatim +C> 2) The part for the very last T when T is bigger than any of the rest T. +C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB, +C> where K = min(M,N), NX is calculated by +C> NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) ) +C> \endverbatim +C> \verbatim +C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB) +C> \endverbatim +C> \verbatim +C> So LWORK = part1 + part2 + part3 +C> \endverbatim +C> \verbatim +C> If LWORK = -1, then a workspace query is assumed; the routine +C> only calculates the optimal size of the WORK array, returns +C> this value as the first entry of the WORK array, and no error +C> message related to LWORK is issued by XERBLA. +C> \endverbatim +C> +C> \param[out] INFO +C> \verbatim +C> INFO is INTEGER +C> = 0: successful exit +C> < 0: if INFO = -i, the i-th argument had an illegal value +C> \endverbatim +C> +* +* Authors: +* ======== +* +C> \author Univ. of Tennessee +C> \author Univ. of California Berkeley +C> \author Univ. of Colorado Denver +C> \author NAG Ltd. +* +C> \date December 2016 +* +C> \ingroup variantsGEcomputational +* +* Further Details +* =============== +C>\details \b Further \b Details +C> \verbatim +C> +C> The matrix Q is represented as a product of elementary reflectors +C> +C> Q = H(1) H(2) . . . H(k), where k = min(m,n). +C> +C> Each H(i) has the form +C> +C> H(i) = I - tau * v * v' +C> +C> where tau is a real scalar, and v is a real vector with +C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +C> and tau in TAU(i). +C> +C> \endverbatim +C> +* ===================================================================== + SUBROUTINE ZGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB, + $ NBMIN, NX, LBWORK, NT, LLWORK +* .. +* .. External Subroutines .. + EXTERNAL ZGEQR2, ZLARFB, ZLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCEIL + EXTERNAL ILAENV, SCEIL +* .. +* .. Executable Statements .. + + INFO = 0 + NBMIN = 2 + NX = 0 + IWS = N + K = MIN( M, N ) + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) ) + END IF +* +* Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.: +* +* NB=3 2NB=6 K=10 +* | | | +* 1--2--3--4--5--6--7--8--9--10 +* | \________/ +* K-NX=5 NT=4 +* +* So here 4 x 4 is the last T stored in the workspace +* + NT = K-SCEIL(REAL(K-NX)/REAL(NB))*NB + +* +* optimal workspace = space for dlarfb + space for normal T's + space for the last T +* + LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB)) + LLWORK = SCEIL(REAL(LLWORK)/REAL(NB)) + + IF ( NT.GT.NB ) THEN + + LBWORK = K-NT +* +* Optimal workspace for dlarfb = MAX(1,N)*NT +* + LWKOPT = (LBWORK+LLWORK)*NB + WORK( 1 ) = (LWKOPT+NT*NT) + + ELSE + + LBWORK = SCEIL(REAL(K)/REAL(NB))*NB + LWKOPT = (LBWORK+LLWORK-NB)*NB + WORK( 1 ) = LWKOPT + + END IF + +* +* Test the input arguments +* + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( NB.GT.1 .AND. NB.LT.K ) THEN + + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF ( NT.LE.NB ) THEN + IWS = (LBWORK+LLWORK-NB)*NB + ELSE + IWS = (LBWORK+LLWORK)*NB+NT*NT + END IF + + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + IF ( NT.LE.NB ) THEN + NB = LWORK / (LLWORK+(LBWORK-NB)) + ELSE + NB = (LWORK-NT*NT)/(LBWORK+LLWORK) + END IF + + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Update the current column using old T's +* + DO 20 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:I+IB-1) from the left +* + CALL ZLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, IB, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ IB) + +20 CONTINUE +* +* Compute the QR factorization of the current block +* A(I:M,I:I+IB-1) +* + CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1), IINFO ) + + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), + $ WORK(I), LBWORK ) +* + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) THEN + + IF ( I .NE. 1 ) THEN + + DO 30 J = 1, I - NB, NB +* +* Apply H' to A(J:M,I:K) from the left +* + CALL ZLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, K-I+1, NB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ K-I+1) +30 CONTINUE + + CALL ZGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+NT*NT+1),IINFO ) + + ELSE +* +* Use unblocked code to factor the last or only block. +* + CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), + $ WORK,IINFO ) + + END IF + END IF + + +* +* Apply update to the column M+1:N when N > M +* + IF ( M.LT.N .AND. I.NE.1) THEN +* +* Form the last triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + IF ( NT .LE. NB ) THEN + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK ) + ELSE + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1, + $ A( I, I ), LDA, TAU( I ), + $ WORK(LBWORK*NB+1), NT ) + END IF + +* +* Apply H' to A(1:M,M+1:N) from the left +* + DO 40 J = 1, K-NX, NB + + IB = MIN( K-J+1, NB ) + + CALL ZLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, IB, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + +40 CONTINUE + + IF ( NT.LE.NB ) THEN + CALL ZLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, WORK(J), LBWORK, + $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + ELSE + CALL ZLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-J+1, N-M, K-J+1, + $ A( J, J ), LDA, + $ WORK(LBWORK*NB+1), + $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1), + $ N-M) + END IF + + END IF + + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQRF +* + END diff --git a/dspl/liblapack/SRC/cbbcsd.f b/dspl/liblapack/SRC/cbbcsd.f new file mode 100644 index 0000000..b5a18d5 --- /dev/null +++ b/dspl/liblapack/SRC/cbbcsd.f @@ -0,0 +1,1086 @@ +*> \brief \b CBBCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CBBCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, +* THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, +* V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, +* B22D, B22E, RWORK, LRWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q +* .. +* .. Array Arguments .. +* REAL B11D( * ), B11E( * ), B12D( * ), B12E( * ), +* $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), +* $ PHI( * ), THETA( * ), RWORK( * ) +* COMPLEX U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CBBCSD computes the CS decomposition of a unitary matrix in +*> bidiagonal-block form, +*> +*> +*> [ B11 | B12 0 0 ] +*> [ 0 | 0 -I 0 ] +*> X = [----------------] +*> [ B21 | B22 0 0 ] +*> [ 0 | 0 0 I ] +*> +*> [ C | -S 0 0 ] +*> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H +*> = [---------] [---------------] [---------] . +*> [ | U2 ] [ S | C 0 0 ] [ | V2 ] +*> [ 0 | 0 0 I ] +*> +*> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger +*> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be +*> transposed and/or permuted. This can be done in constant time using +*> the TRANS and SIGNS options. See CUNCSD for details.) +*> +*> The bidiagonal matrices B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1:Q) and PHI(1:Q-1). +*> +*> The unitary matrices U1, U2, V1T, and V2T are input/output. +*> The input matrices are pre- or post-multiplied by the appropriate +*> singular vector matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is updated; +*> otherwise: U1 is not updated. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is updated; +*> otherwise: U2 is not updated. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is updated; +*> otherwise: V1T is not updated. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is updated; +*> otherwise: V2T is not updated. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X, the unitary matrix in +*> bidiagonal-block form. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in the top-left block of X. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in the top-left block of X. +*> 0 <= Q <= MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> On entry, the angles THETA(1),...,THETA(Q) that, along with +*> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block +*> form. On exit, the angles whose cosines and sines define the +*> diagonal blocks in the CS decomposition. +*> \endverbatim +*> +*> \param[in,out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., +*> THETA(Q), define the matrix in bidiagonal-block form. +*> \endverbatim +*> +*> \param[in,out] U1 +*> \verbatim +*> U1 is COMPLEX array, dimension (LDU1,P) +*> On entry, a P-by-P matrix. On exit, U1 is postmultiplied +*> by the left singular vector matrix common to [ B11 ; 0 ] and +*> [ B12 0 0 ; 0 -I 0 0 ]. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of the array U1, LDU1 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] U2 +*> \verbatim +*> U2 is COMPLEX array, dimension (LDU2,M-P) +*> On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is +*> postmultiplied by the left singular vector matrix common to +*> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2, LDU2 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] V1T +*> \verbatim +*> V1T is COMPLEX array, dimension (LDV1T,Q) +*> On entry, a Q-by-Q matrix. On exit, V1T is premultiplied +*> by the conjugate transpose of the right singular vector +*> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of the array V1T, LDV1T >= MAX(1,Q). +*> \endverbatim +*> +*> \param[in,out] V2T +*> \verbatim +*> V2T is COMPLEX array, dimension (LDV2T,M-Q) +*> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is +*> premultiplied by the conjugate transpose of the right +*> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and +*> [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] B11D +*> \verbatim +*> B11D is REAL array, dimension (Q) +*> When CBBCSD converges, B11D contains the cosines of THETA(1), +*> ..., THETA(Q). If CBBCSD fails to converge, then B11D +*> contains the diagonal of the partially reduced top-left +*> block. +*> \endverbatim +*> +*> \param[out] B11E +*> \verbatim +*> B11E is REAL array, dimension (Q-1) +*> When CBBCSD converges, B11E contains zeros. If CBBCSD fails +*> to converge, then B11E contains the superdiagonal of the +*> partially reduced top-left block. +*> \endverbatim +*> +*> \param[out] B12D +*> \verbatim +*> B12D is REAL array, dimension (Q) +*> When CBBCSD converges, B12D contains the negative sines of +*> THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then +*> B12D contains the diagonal of the partially reduced top-right +*> block. +*> \endverbatim +*> +*> \param[out] B12E +*> \verbatim +*> B12E is REAL array, dimension (Q-1) +*> When CBBCSD converges, B12E contains zeros. If CBBCSD fails +*> to converge, then B12E contains the subdiagonal of the +*> partially reduced top-right block. +*> \endverbatim +*> +*> \param[out] B21D +*> \verbatim +*> B21D is REAL array, dimension (Q) +*> When CBBCSD converges, B21D contains the negative sines of +*> THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then +*> B21D contains the diagonal of the partially reduced bottom-left +*> block. +*> \endverbatim +*> +*> \param[out] B21E +*> \verbatim +*> B21E is REAL array, dimension (Q-1) +*> When CBBCSD converges, B21E contains zeros. If CBBCSD fails +*> to converge, then B21E contains the subdiagonal of the +*> partially reduced bottom-left block. +*> \endverbatim +*> +*> \param[out] B22D +*> \verbatim +*> B22D is REAL array, dimension (Q) +*> When CBBCSD converges, B22D contains the negative sines of +*> THETA(1), ..., THETA(Q). If CBBCSD fails to converge, then +*> B22D contains the diagonal of the partially reduced bottom-right +*> block. +*> \endverbatim +*> +*> \param[out] B22E +*> \verbatim +*> B22E is REAL array, dimension (Q-1) +*> When CBBCSD converges, B22E contains zeros. If CBBCSD fails +*> to converge, then B22E contains the subdiagonal of the +*> partially reduced bottom-right block. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. LRWORK >= MAX(1,8*Q). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the RWORK array, +*> returns this value as the first entry of the work array, and +*> no error message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if CBBCSD did not converge, INFO specifies the number +*> of nonzero entries in PHI, and B11D, B11E, etc., +*> contain the partially reduced matrix. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL REAL, default = MAX(10,MIN(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they +*> are within TOLMUL*EPS of either bound. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, + $ B22D, B22E, RWORK, LRWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q +* .. +* .. Array Arguments .. + REAL B11D( * ), B11E( * ), B12D( * ), B12E( * ), + $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), + $ PHI( * ), THETA( * ), RWORK( * ) + COMPLEX U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) + REAL HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO + PARAMETER ( HUNDRED = 100.0E0, MEIGHTH = -0.125E0, + $ ONE = 1.0E0, PIOVER2 = 1.57079632679489662E0, + $ TEN = 10.0E0, ZERO = 0.0E0 ) + COMPLEX NEGONECOMPLEX + PARAMETER ( NEGONECOMPLEX = (-1.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12, + $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T, + $ WANTV2T + INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS, + $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J, + $ LRWORKMIN, LRWORKOPT, MAXIT, MINI + REAL B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY, + $ EPS, MU, NU, R, SIGMA11, SIGMA21, + $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL, + $ UNFL, X1, X2, Y1, Y2 +* +* .. External Subroutines .. + EXTERNAL CLASR, CSCAL, CSWAP, SLARTGP, SLARTGS, SLAS2, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LRWORK .EQ. -1 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) +* + IF( M .LT. 0 ) THEN + INFO = -6 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -7 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -8 + ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN + INFO = -8 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -12 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -14 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -16 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -18 + END IF +* +* Quick return if Q = 0 +* + IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN + LRWORKMIN = 1 + RWORK(1) = LRWORKMIN + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + IU1CS = 1 + IU1SN = IU1CS + Q + IU2CS = IU1SN + Q + IU2SN = IU2CS + Q + IV1TCS = IU2SN + Q + IV1TSN = IV1TCS + Q + IV2TCS = IV1TSN + Q + IV2TSN = IV2TCS + Q + LRWORKOPT = IV2TSN + Q - 1 + LRWORKMIN = LRWORKOPT + RWORK(1) = LRWORKOPT + IF( LRWORK .LT. LRWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -28 + END IF + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CBBCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) + TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) ) + TOL = TOLMUL*EPS + THRESH = MAX( TOL, MAXITR*Q*Q*UNFL ) +* +* Test for negligible sines or cosines +* + DO I = 1, Q + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = 1, Q-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Initial deflation +* + IMAX = Q + DO WHILE( IMAX .GT. 1 ) + IF( PHI(IMAX-1) .NE. ZERO ) THEN + EXIT + END IF + IMAX = IMAX - 1 + END DO + IMIN = IMAX - 1 + IF ( IMIN .GT. 1 ) THEN + DO WHILE( PHI(IMIN-1) .NE. ZERO ) + IMIN = IMIN - 1 + IF ( IMIN .LE. 1 ) EXIT + END DO + END IF +* +* Initialize iteration counter +* + MAXIT = MAXITR*Q*Q + ITER = 0 +* +* Begin main iteration loop +* + DO WHILE( IMAX .GT. 1 ) +* +* Compute the matrix entries +* + B11D(IMIN) = COS( THETA(IMIN) ) + B21D(IMIN) = -SIN( THETA(IMIN) ) + DO I = IMIN, IMAX - 1 + B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) ) + B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) ) + B12D(I) = SIN( THETA(I) ) * COS( PHI(I) ) + B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) ) + B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) ) + B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) ) + B22D(I) = COS( THETA(I) ) * COS( PHI(I) ) + B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) ) + END DO + B12D(IMAX) = SIN( THETA(IMAX) ) + B22D(IMAX) = COS( THETA(IMAX) ) +* +* Abort if not converging; otherwise, increment ITER +* + IF( ITER .GT. MAXIT ) THEN + INFO = 0 + DO I = 1, Q + IF( PHI(I) .NE. ZERO ) + $ INFO = INFO + 1 + END DO + RETURN + END IF +* + ITER = ITER + IMAX - IMIN +* +* Compute shifts +* + THETAMAX = THETA(IMIN) + THETAMIN = THETA(IMIN) + DO I = IMIN+1, IMAX + IF( THETA(I) > THETAMAX ) + $ THETAMAX = THETA(I) + IF( THETA(I) < THETAMIN ) + $ THETAMIN = THETA(I) + END DO +* + IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN +* +* Zero on diagonals of B11 and B22; induce deflation with a +* zero shift +* + MU = ZERO + NU = ONE +* + ELSE IF( THETAMIN .LT. THRESH ) THEN +* +* Zero on diagonals of B12 and B22; induce deflation with a +* zero shift +* + MU = ONE + NU = ZERO +* + ELSE +* +* Compute shifts for B11 and B21 and use the lesser +* + CALL SLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11, + $ DUMMY ) + CALL SLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21, + $ DUMMY ) +* + IF( SIGMA11 .LE. SIGMA21 ) THEN + MU = SIGMA11 + NU = SQRT( ONE - MU**2 ) + IF( MU .LT. THRESH ) THEN + MU = ZERO + NU = ONE + END IF + ELSE + NU = SIGMA21 + MU = SQRT( 1.0 - NU**2 ) + IF( NU .LT. THRESH ) THEN + MU = ONE + NU = ZERO + END IF + END IF + END IF +* +* Rotate to produce bulges in B11 and B21 +* + IF( MU .LE. NU ) THEN + CALL SLARTGS( B11D(IMIN), B11E(IMIN), MU, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) ) + ELSE + CALL SLARTGS( B21D(IMIN), B21E(IMIN), NU, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) ) + END IF +* + TEMP = RWORK(IV1TCS+IMIN-1)*B11D(IMIN) + + $ RWORK(IV1TSN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = RWORK(IV1TCS+IMIN-1)*B11E(IMIN) - + $ RWORK(IV1TSN+IMIN-1)*B11D(IMIN) + B11D(IMIN) = TEMP + B11BULGE = RWORK(IV1TSN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B11D(IMIN+1) + TEMP = RWORK(IV1TCS+IMIN-1)*B21D(IMIN) + + $ RWORK(IV1TSN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = RWORK(IV1TCS+IMIN-1)*B21E(IMIN) - + $ RWORK(IV1TSN+IMIN-1)*B21D(IMIN) + B21D(IMIN) = TEMP + B21BULGE = RWORK(IV1TSN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B21D(IMIN+1) +* +* Compute THETA(IMIN) +* + THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ), + $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) ) +* +* Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) +* + IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN + CALL SLARTGP( B11BULGE, B11D(IMIN), RWORK(IU1SN+IMIN-1), + $ RWORK(IU1CS+IMIN-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) ) + ELSE + CALL SLARTGS( B12D( IMIN ), B12E( IMIN ), NU, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) ) + END IF + IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN + CALL SLARTGP( B21BULGE, B21D(IMIN), RWORK(IU2SN+IMIN-1), + $ RWORK(IU2CS+IMIN-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) ) + ELSE + CALL SLARTGS( B22D(IMIN), B22E(IMIN), MU, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) ) + END IF + RWORK(IU2CS+IMIN-1) = -RWORK(IU2CS+IMIN-1) + RWORK(IU2SN+IMIN-1) = -RWORK(IU2SN+IMIN-1) +* + TEMP = RWORK(IU1CS+IMIN-1)*B11E(IMIN) + + $ RWORK(IU1SN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11D(IMIN+1) - + $ RWORK(IU1SN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B11BULGE = RWORK(IU1SN+IMIN-1)*B11E(IMIN+1) + B11E(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11E(IMIN+1) + END IF + TEMP = RWORK(IU1CS+IMIN-1)*B12D(IMIN) + + $ RWORK(IU1SN+IMIN-1)*B12E(IMIN) + B12E(IMIN) = RWORK(IU1CS+IMIN-1)*B12E(IMIN) - + $ RWORK(IU1SN+IMIN-1)*B12D(IMIN) + B12D(IMIN) = TEMP + B12BULGE = RWORK(IU1SN+IMIN-1)*B12D(IMIN+1) + B12D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B12D(IMIN+1) + TEMP = RWORK(IU2CS+IMIN-1)*B21E(IMIN) + + $ RWORK(IU2SN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21D(IMIN+1) - + $ RWORK(IU2SN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B21BULGE = RWORK(IU2SN+IMIN-1)*B21E(IMIN+1) + B21E(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21E(IMIN+1) + END IF + TEMP = RWORK(IU2CS+IMIN-1)*B22D(IMIN) + + $ RWORK(IU2SN+IMIN-1)*B22E(IMIN) + B22E(IMIN) = RWORK(IU2CS+IMIN-1)*B22E(IMIN) - + $ RWORK(IU2SN+IMIN-1)*B22D(IMIN) + B22D(IMIN) = TEMP + B22BULGE = RWORK(IU2SN+IMIN-1)*B22D(IMIN+1) + B22D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B22D(IMIN+1) +* +* Inner loop: chase bulges from B11(IMIN,IMIN+2), +* B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to +* bottom-right +* + DO I = IMIN+1, IMAX-1 +* +* Compute PHI(I-1) +* + X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1) + X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE + Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1) + Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE +* + PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), +* B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL SLARTGP( X2, X1, RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN + CALL SLARTGP( B11BULGE, B11E(I-1), RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL SLARTGP( B21BULGE, B21E(I-1), RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11D(I), B11E(I), MU, RWORK(IV1TCS+I-1), + $ RWORK(IV1TSN+I-1) ) + ELSE + CALL SLARTGS( B21D(I), B21E(I), NU, RWORK(IV1TCS+I-1), + $ RWORK(IV1TSN+I-1) ) + END IF + RWORK(IV1TCS+I-1) = -RWORK(IV1TCS+I-1) + RWORK(IV1TSN+I-1) = -RWORK(IV1TSN+I-1) + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL SLARTGP( B12BULGE, B12D(I-1), RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22D(I-1), RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B12E(I-1), B12D(I), NU, + $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) ) + ELSE + CALL SLARTGS( B22E(I-1), B22D(I), MU, + $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) ) + END IF +* + TEMP = RWORK(IV1TCS+I-1)*B11D(I) + RWORK(IV1TSN+I-1)*B11E(I) + B11E(I) = RWORK(IV1TCS+I-1)*B11E(I) - + $ RWORK(IV1TSN+I-1)*B11D(I) + B11D(I) = TEMP + B11BULGE = RWORK(IV1TSN+I-1)*B11D(I+1) + B11D(I+1) = RWORK(IV1TCS+I-1)*B11D(I+1) + TEMP = RWORK(IV1TCS+I-1)*B21D(I) + RWORK(IV1TSN+I-1)*B21E(I) + B21E(I) = RWORK(IV1TCS+I-1)*B21E(I) - + $ RWORK(IV1TSN+I-1)*B21D(I) + B21D(I) = TEMP + B21BULGE = RWORK(IV1TSN+I-1)*B21D(I+1) + B21D(I+1) = RWORK(IV1TCS+I-1)*B21D(I+1) + TEMP = RWORK(IV2TCS+I-1-1)*B12E(I-1) + + $ RWORK(IV2TSN+I-1-1)*B12D(I) + B12D(I) = RWORK(IV2TCS+I-1-1)*B12D(I) - + $ RWORK(IV2TSN+I-1-1)*B12E(I-1) + B12E(I-1) = TEMP + B12BULGE = RWORK(IV2TSN+I-1-1)*B12E(I) + B12E(I) = RWORK(IV2TCS+I-1-1)*B12E(I) + TEMP = RWORK(IV2TCS+I-1-1)*B22E(I-1) + + $ RWORK(IV2TSN+I-1-1)*B22D(I) + B22D(I) = RWORK(IV2TCS+I-1-1)*B22D(I) - + $ RWORK(IV2TSN+I-1-1)*B22E(I-1) + B22E(I-1) = TEMP + B22BULGE = RWORK(IV2TSN+I-1-1)*B22E(I) + B22E(I) = RWORK(IV2TCS+I-1-1)*B22E(I) +* +* Compute THETA(I) +* + X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1) + X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE + Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1) + Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE +* + THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), +* B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL SLARTGP( X2, X1, RWORK(IU1SN+I-1), RWORK(IU1CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN + CALL SLARTGP( B11BULGE, B11D(I), RWORK(IU1SN+I-1), + $ RWORK(IU1CS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL SLARTGP( B12BULGE, B12E(I-1), RWORK(IU1SN+I-1), + $ RWORK(IU1CS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11E(I), B11D(I+1), MU, RWORK(IU1CS+I-1), + $ RWORK(IU1SN+I-1) ) + ELSE + CALL SLARTGS( B12D(I), B12E(I), NU, RWORK(IU1CS+I-1), + $ RWORK(IU1SN+I-1) ) + END IF + IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, RWORK(IU2SN+I-1), RWORK(IU2CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN + CALL SLARTGP( B21BULGE, B21D(I), RWORK(IU2SN+I-1), + $ RWORK(IU2CS+I-1), R ) + ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22E(I-1), RWORK(IU2SN+I-1), + $ RWORK(IU2CS+I-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B21E(I), B21E(I+1), NU, RWORK(IU2CS+I-1), + $ RWORK(IU2SN+I-1) ) + ELSE + CALL SLARTGS( B22D(I), B22E(I), MU, RWORK(IU2CS+I-1), + $ RWORK(IU2SN+I-1) ) + END IF + RWORK(IU2CS+I-1) = -RWORK(IU2CS+I-1) + RWORK(IU2SN+I-1) = -RWORK(IU2SN+I-1) +* + TEMP = RWORK(IU1CS+I-1)*B11E(I) + RWORK(IU1SN+I-1)*B11D(I+1) + B11D(I+1) = RWORK(IU1CS+I-1)*B11D(I+1) - + $ RWORK(IU1SN+I-1)*B11E(I) + B11E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B11BULGE = RWORK(IU1SN+I-1)*B11E(I+1) + B11E(I+1) = RWORK(IU1CS+I-1)*B11E(I+1) + END IF + TEMP = RWORK(IU2CS+I-1)*B21E(I) + RWORK(IU2SN+I-1)*B21D(I+1) + B21D(I+1) = RWORK(IU2CS+I-1)*B21D(I+1) - + $ RWORK(IU2SN+I-1)*B21E(I) + B21E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B21BULGE = RWORK(IU2SN+I-1)*B21E(I+1) + B21E(I+1) = RWORK(IU2CS+I-1)*B21E(I+1) + END IF + TEMP = RWORK(IU1CS+I-1)*B12D(I) + RWORK(IU1SN+I-1)*B12E(I) + B12E(I) = RWORK(IU1CS+I-1)*B12E(I) - + $ RWORK(IU1SN+I-1)*B12D(I) + B12D(I) = TEMP + B12BULGE = RWORK(IU1SN+I-1)*B12D(I+1) + B12D(I+1) = RWORK(IU1CS+I-1)*B12D(I+1) + TEMP = RWORK(IU2CS+I-1)*B22D(I) + RWORK(IU2SN+I-1)*B22E(I) + B22E(I) = RWORK(IU2CS+I-1)*B22E(I) - + $ RWORK(IU2SN+I-1)*B22D(I) + B22D(I) = TEMP + B22BULGE = RWORK(IU2SN+I-1)*B22D(I+1) + B22D(I+1) = RWORK(IU2CS+I-1)*B22D(I+1) +* + END DO +* +* Compute PHI(IMAX-1) +* + X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) + + $ COS(THETA(IMAX-1))*B21E(IMAX-1) + Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) + + $ COS(THETA(IMAX-1))*B22D(IMAX-1) + Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE +* + PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) ) +* +* Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) +* + RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2 +* + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL SLARTGP( B12BULGE, B12D(IMAX-1), + $ RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22D(IMAX-1), + $ RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B12E(IMAX-1), B12D(IMAX), NU, + $ RWORK(IV2TCS+IMAX-1-1), + $ RWORK(IV2TSN+IMAX-1-1) ) + ELSE + CALL SLARTGS( B22E(IMAX-1), B22D(IMAX), MU, + $ RWORK(IV2TCS+IMAX-1-1), + $ RWORK(IV2TSN+IMAX-1-1) ) + END IF +* + TEMP = RWORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) + + $ RWORK(IV2TSN+IMAX-1-1)*B12D(IMAX) + B12D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B12D(IMAX) - + $ RWORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1) + B12E(IMAX-1) = TEMP + TEMP = RWORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) + + $ RWORK(IV2TSN+IMAX-1-1)*B22D(IMAX) + B22D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B22D(IMAX) - + $ RWORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1) + B22E(IMAX-1) = TEMP +* +* Update singular vectors +* + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL CLASR( 'R', 'V', 'F', P, IMAX-IMIN+1, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1), + $ U1(1,IMIN), LDU1 ) + ELSE + CALL CLASR( 'L', 'V', 'F', IMAX-IMIN+1, P, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1), + $ U1(IMIN,1), LDU1 ) + END IF + END IF + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL CLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1), + $ U2(1,IMIN), LDU2 ) + ELSE + CALL CLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1), + $ U2(IMIN,1), LDU2 ) + END IF + END IF + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL CLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1), + $ V1T(IMIN,1), LDV1T ) + ELSE + CALL CLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1), + $ V1T(1,IMIN), LDV1T ) + END IF + END IF + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL CLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q, + $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1), + $ V2T(IMIN,1), LDV2T ) + ELSE + CALL CLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1, + $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1), + $ V2T(1,IMIN), LDV2T ) + END IF + END IF +* +* Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) +* + IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN + B11D(IMAX) = -B11D(IMAX) + B21D(IMAX) = -B21D(IMAX) + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL CSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T ) + ELSE + CALL CSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Compute THETA(IMAX) +* + X1 = COS(PHI(IMAX-1))*B11D(IMAX) + + $ SIN(PHI(IMAX-1))*B12E(IMAX-1) + Y1 = COS(PHI(IMAX-1))*B21D(IMAX) + + $ SIN(PHI(IMAX-1))*B22E(IMAX-1) +* + THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) ) +* +* Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), +* and B22(IMAX,IMAX-1) +* + IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN + B12D(IMAX) = -B12D(IMAX) + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL CSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 ) + ELSE + CALL CSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 ) + END IF + END IF + END IF + IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN + B22D(IMAX) = -B22D(IMAX) + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL CSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 ) + ELSE + CALL CSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 ) + END IF + END IF + END IF +* +* Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) +* + IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL CSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T ) + ELSE + CALL CSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Test for negligible sines or cosines +* + DO I = IMIN, IMAX + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = IMIN, IMAX-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Deflate +* + IF (IMAX .GT. 1) THEN + DO WHILE( PHI(IMAX-1) .EQ. ZERO ) + IMAX = IMAX - 1 + IF (IMAX .LE. 1) EXIT + END DO + END IF + IF( IMIN .GT. IMAX - 1 ) + $ IMIN = IMAX - 1 + IF (IMIN .GT. 1) THEN + DO WHILE (PHI(IMIN-1) .NE. ZERO) + IMIN = IMIN - 1 + IF (IMIN .LE. 1) EXIT + END DO + END IF +* +* Repeat main iteration loop +* + END DO +* +* Postprocessing: order THETA from least to greatest +* + DO I = 1, Q +* + MINI = I + THETAMIN = THETA(I) + DO J = I+1, Q + IF( THETA(J) .LT. THETAMIN ) THEN + MINI = J + THETAMIN = THETA(J) + END IF + END DO +* + IF( MINI .NE. I ) THEN + THETA(MINI) = THETA(I) + THETA(I) = THETAMIN + IF( COLMAJOR ) THEN + IF( WANTU1 ) + $ CALL CSWAP( P, U1(1,I), 1, U1(1,MINI), 1 ) + IF( WANTU2 ) + $ CALL CSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 ) + IF( WANTV1T ) + $ CALL CSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T ) + IF( WANTV2T ) + $ CALL CSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1), + $ LDV2T ) + ELSE + IF( WANTU1 ) + $ CALL CSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 ) + IF( WANTU2 ) + $ CALL CSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 ) + IF( WANTV1T ) + $ CALL CSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 ) + IF( WANTV2T ) + $ CALL CSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 ) + END IF + END IF +* + END DO +* + RETURN +* +* End of CBBCSD +* + END + diff --git a/dspl/liblapack/SRC/cbdsqr.f b/dspl/liblapack/SRC/cbdsqr.f new file mode 100644 index 0000000..0bda3a3 --- /dev/null +++ b/dspl/liblapack/SRC/cbdsqr.f @@ -0,0 +1,842 @@ +*> \brief \b CBDSQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CBDSQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, +* LDU, C, LDC, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), RWORK( * ) +* COMPLEX C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CBDSQR computes the singular values and, optionally, the right and/or +*> left singular vectors from the singular value decomposition (SVD) of +*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +*> zero-shift QR algorithm. The SVD of B has the form +*> +*> B = Q * S * P**H +*> +*> where S is the diagonal matrix of singular values, Q is an orthogonal +*> matrix of left singular vectors, and P is an orthogonal matrix of +*> right singular vectors. If left singular vectors are requested, this +*> subroutine actually returns U*Q instead of Q, and, if right singular +*> vectors are requested, this subroutine returns P**H*VT instead of +*> P**H, for given complex input matrices U and VT. When U and VT are +*> the unitary matrices that reduce a general matrix A to bidiagonal +*> form: A = U*B*VT, as computed by CGEBRD, then +*> +*> A = (U*Q) * S * (P**H*VT) +*> +*> is the SVD of A. Optionally, the subroutine may also compute Q**H*C +*> for a given complex input matrix C. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +*> no. 5, pp. 873-912, Sept 1990) and +*> "Accurate singular values and differential qd algorithms," by +*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +*> Department, University of California at Berkeley, July 1992 +*> for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> The number of columns of the matrix VT. NCVT >= 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> The number of rows of the matrix U. NRU >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B in decreasing +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the N-1 offdiagonal elements of the bidiagonal +*> matrix B. +*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +*> will contain the diagonal and superdiagonal elements of a +*> bidiagonal matrix orthogonally equivalent to the one given +*> as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is COMPLEX array, dimension (LDVT, NCVT) +*> On entry, an N-by-NCVT matrix VT. +*> On exit, VT is overwritten by P**H * VT. +*> Not referenced if NCVT = 0. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. +*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU, N) +*> On entry, an NRU-by-N matrix U. +*> On exit, U is overwritten by U * Q. +*> Not referenced if NRU = 0. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,NRU). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, NCC) +*> On entry, an N-by-NCC matrix C. +*> On exit, C is overwritten by Q**H * C. +*> Not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm did not converge; D and E contain the +*> elements of a bidiagonal matrix which is orthogonally +*> similar to the input matrix B; if INFO = i, i +*> elements of E have not converged to zero. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> If it is positive, TOLMUL*EPS is the desired relative +*> precision in the computed singular values. +*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the +*> desired absolute accuracy in the computed singular +*> values (corresponds to relative accuracy +*> abs(TOLMUL*EPS) in the largest singular value. +*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably +*> between 10 (for fast convergence) and .1/EPS +*> (for there to be some accuracy in the results). +*> Default is to lose at either one eighth or 2 of the +*> available decimal digits in each computed singular value +*> (whichever is smaller). +*> +*> MAXITR INTEGER, default = 6 +*> MAXITR controls the maximum number of passes of the +*> algorithm through its inner loop. The algorithms stops +*> (and so fails to converge) if the number of passes +*> through the inner loop exceeds MAXITR*N**2. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), RWORK( * ) + COMPLEX C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLASR, CSROT, CSSCAL, CSWAP, SLARTG, SLAS2, + $ SLASQ1, SLASV2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, RWORK, INFO ) +* +* If INFO equals 2, dqds didn't finish, try to finish +* + IF( INFO .NE. 2 ) RETURN + INFO = 0 + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + RWORK( I ) = CS + RWORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ), + $ U, LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ), + $ C, LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, + $ COSR, SINR ) + IF( NRU.GT.0 ) + $ CALL CSROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL CSROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL+1 ) = CS + RWORK( I-LL+1+NM1 ) = SN + RWORK( I-LL+1+NM12 ) = OLDCS + RWORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL ) = CS + RWORK( I-LL+NM1 ) = -SN + RWORK( I-LL+NM12 ) = OLDCS + RWORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + RWORK( I-LL+1 ) = COSR + RWORK( I-LL+1+NM1 ) = SINR + RWORK( I-LL+1+NM12 ) = COSL + RWORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + RWORK( I-LL ) = COSR + RWORK( I-LL+NM1 ) = -SINR + RWORK( I-LL+NM12 ) = COSL + RWORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL CSSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL CSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL CSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL CSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of CBDSQR +* + END diff --git a/dspl/liblapack/SRC/cgbbrd.f b/dspl/liblapack/SRC/cgbbrd.f new file mode 100644 index 0000000..e4e820a --- /dev/null +++ b/dspl/liblapack/SRC/cgbbrd.f @@ -0,0 +1,573 @@ +*> \brief \b CGBBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, +* LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), RWORK( * ) +* COMPLEX AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), +* $ Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBBRD reduces a complex general m-by-n band matrix A to real upper +*> bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> The routine computes B, and optionally forms Q or P**H, or computes +*> Q**H*C for a given matrix C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether or not the matrices Q and P**H are to be +*> formed. +*> = 'N': do not form Q or P**H; +*> = 'Q': form Q only; +*> = 'P': form P**H only; +*> = 'B': form both. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the m-by-n band matrix A, stored in rows 1 to +*> KL+KU+1. The j-th column of A is stored in the j-th column of +*> the array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> On exit, A is overwritten by values generated during the +*> reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The superdiagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,M) +*> If VECT = 'Q' or 'B', the m-by-m unitary matrix Q. +*> If VECT = 'N' or 'P', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] PT +*> \verbatim +*> PT is COMPLEX array, dimension (LDPT,N) +*> If VECT = 'P' or 'B', the n-by-n unitary matrix P'. +*> If VECT = 'N' or 'Q', the array PT is not referenced. +*> \endverbatim +*> +*> \param[in] LDPT +*> \verbatim +*> LDPT is INTEGER +*> The leading dimension of the array PT. +*> LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,NCC) +*> On entry, an m-by-ncc matrix C. +*> On exit, C is overwritten by Q**H*C. +*> C is not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), + $ Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT + REAL ABST, RC + COMPLEX RA, RB, RS, T +* .. +* .. External Subroutines .. + EXTERNAL CLARGV, CLARTG, CLARTV, CLASET, CROT, CSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P**H to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL CLASET( 'Full', M, M, CZERO, CONE, Q, LDQ ) + IF( WANTPT ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The complex sines of the plane rotations are stored in WORK, +* and the real cosines in RWORK. +* + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL CLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, RWORK( J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ RWORK( J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL CLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ RWORK( I+ML-1 ), WORK( I+ML-1 ), RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL CROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ RWORK( I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL CROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL CROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ RWORK( J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL CLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, RWORK( J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ RWORK( J1+KUN ), WORK( J1+KUN ), KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL CLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ RWORK( I+MU-1 ), WORK( I+MU-1 ), RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL CROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ RWORK( I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P**H +* + DO 60 J = J1, J2, KB1 + CALL CROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ), + $ CONJG( WORK( J+KUN ) ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to complex lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, overwriting superdiagonal +* elements on subdiagonal elements +* + DO 100 I = 1, MIN( M-1, N ) + CALL CLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + AB( 1, I ) = RA + IF( I.LT.N ) THEN + AB( 2, I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL CROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, + $ CONJG( RS ) ) + IF( WANTC ) + $ CALL CROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + ELSE +* +* A has been reduced to complex upper bidiagonal form or is +* diagonal +* + IF( KU.GT.0 .AND. M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL CLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + AB( KU+1, I ) = RA + IF( I.GT.1 ) THEN + RB = -CONJG( RS )*AB( KU, I ) + AB( KU, I ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL CROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, CONJG( RS ) ) + 110 CONTINUE + END IF + END IF +* +* Make diagonal and superdiagonal elements real, storing them in D +* and E +* + T = AB( KU+1, 1 ) + DO 120 I = 1, MINMN + ABST = ABS( T ) + D( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTQ ) + $ CALL CSCAL( M, T, Q( 1, I ), 1 ) + IF( WANTC ) + $ CALL CSCAL( NCC, CONJG( T ), C( I, 1 ), LDC ) + IF( I.LT.MINMN ) THEN + IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN + E( I ) = ZERO + T = AB( 1, I+1 ) + ELSE + IF( KU.EQ.0 ) THEN + T = AB( 2, I )*CONJG( T ) + ELSE + T = AB( KU, I+1 )*CONJG( T ) + END IF + ABST = ABS( T ) + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTPT ) + $ CALL CSCAL( N, T, PT( I+1, 1 ), LDPT ) + T = AB( KU+1, I+1 )*CONJG( T ) + END IF + END IF + 120 CONTINUE + RETURN +* +* End of CGBBRD +* + END diff --git a/dspl/liblapack/SRC/cgbcon.f b/dspl/liblapack/SRC/cgbcon.f new file mode 100644 index 0000000..1d32a68 --- /dev/null +++ b/dspl/liblapack/SRC/cgbcon.f @@ -0,0 +1,320 @@ +*> \brief \b CGBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, KL, KU, LDAB, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL RWORK( * ) +* COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBCON estimates the reciprocal of the condition number of a complex +*> general band matrix A, in either the 1-norm or the infinity-norm, +*> using the LU factorization computed by CGBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM + COMPLEX T, ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL LSAME, ICAMAX, SLAMCH, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLACN2, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL CAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(U**H). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK, + $ INFO ) +* +* Multiply by inv(L**H). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - CDOTC( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of CGBCON +* + END diff --git a/dspl/liblapack/SRC/cgbequ.f b/dspl/liblapack/SRC/cgbequ.f new file mode 100644 index 0000000..100f920 --- /dev/null +++ b/dspl/liblapack/SRC/cgbequ.f @@ -0,0 +1,333 @@ +*> \brief \b CGBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBEQU computes row and column scalings intended to equilibrate an +*> M-by-N band matrix A and reduce its condition number. R returns the +*> row scale factors and C the column scale factors, chosen to try to +*> make the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0, or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGBEQU +* + END diff --git a/dspl/liblapack/SRC/cgbequb.f b/dspl/liblapack/SRC/cgbequb.f new file mode 100644 index 0000000..fd69a57 --- /dev/null +++ b/dspl/liblapack/SRC/cgbequb.f @@ -0,0 +1,350 @@ +*> \brief \b CGBEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from CGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, + $ LOGRDX + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = SLAMCH( 'B' ) + LOGRDX = LOG(RADIX) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGBEQUB +* + END diff --git a/dspl/liblapack/SRC/cgbrfs.f b/dspl/liblapack/SRC/cgbrfs.f new file mode 100644 index 0000000..464128b --- /dev/null +++ b/dspl/liblapack/SRC/cgbrfs.f @@ -0,0 +1,475 @@ +*> \brief \b CGBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is banded, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from CGBTRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CGBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGBMV, CGBTRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1, + $ CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = CABS1( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N, + $ INFO ) + CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CGBRFS +* + END diff --git a/dspl/liblapack/SRC/cgbrfsx.f b/dspl/liblapack/SRC/cgbrfsx.f new file mode 100644 index 0000000..041b6a1 --- /dev/null +++ b/dspl/liblapack/SRC/cgbrfsx.f @@ -0,0 +1,763 @@ +*> \brief \b CGBRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, +* $ NPARAMS, N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBRFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, + $ NPARAMS, N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN, IGNORE_CWISE + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE, N_NORMS, + $ ITHRESH + REAL ANORM, RCOND_TMP, ILLRCOND_THRESH, ERR_LBND, + $ CWISE_WRONG, RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CGBCON, CLA_GBRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL SLAMCH, CLANGB, CLA_GBRCOND_X, CLA_GBRCOND_C + REAL SLAMCH, CLANGB, CLA_GBRCOND_X, CLA_GBRCOND_C + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = CLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + CALL CGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 .AND. INFO .EQ. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + IF ( NOTRAN ) THEN + CALL CLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + ELSE + CALL CLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = CLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, C, .TRUE., INFO, WORK, RWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = CLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, R, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = CLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, C, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = CLA_GBRCOND_X( TRANS, N, KL, KU, AB, LDAB, + $ AFB, LDAFB, IPIV, X( 1, J ), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of CGBRFSX +* + END diff --git a/dspl/liblapack/SRC/cgbsv.f b/dspl/liblapack/SRC/cgbsv.f new file mode 100644 index 0000000..7368783 --- /dev/null +++ b/dspl/liblapack/SRC/cgbsv.f @@ -0,0 +1,223 @@ +*> \brief CGBSV computes the solution to system of linear equations A * X = B for GB matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBSV computes the solution to a complex system of linear equations +*> A * X = B, where A is a band matrix of order N with KL subdiagonals +*> and KU superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as A = L * U, where L is a product of permutation +*> and unit lower triangular matrices with KL subdiagonals, and U is +*> upper triangular with KL+KU superdiagonals. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CGBTRF, CGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL CGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of CGBSV +* + END diff --git a/dspl/liblapack/SRC/cgbsvx.f b/dspl/liblapack/SRC/cgbsvx.f new file mode 100644 index 0000000..1c102cb --- /dev/null +++ b/dspl/liblapack/SRC/cgbsvx.f @@ -0,0 +1,647 @@ +*> \brief CGBSVX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), C( * ), FERR( * ), R( * ), +* $ RWORK( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBSVX uses the LU factorization to compute the solution to a complex +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a band matrix of order N with KL subdiagonals and KU +*> superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed by this subroutine: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = L * U, +*> where L is a product of permutation and unit lower triangular +*> matrices with KL subdiagonals, and U is upper triangular with +*> KL+KU superdiagonals. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB and IPIV contain the factored form of +*> A. If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> AB, AFB, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then A must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by CGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns details of the LU factorization of A. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns details of the LU factorization of the equilibrated +*> matrix A (see the description of AB for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = L*U +*> as computed by CGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> On exit, RWORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If RWORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 RWORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGBsolve +* +* ===================================================================== + SUBROUTINE CGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* Moved setting of INFO = N+1 so INFO does not subsequently get +* overwritten. Sven, 17 Mar 05. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGB, CLANTB, SLAMCH + EXTERNAL LSAME, CLANGB, CLANTB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBCON, CGBEQU, CGBRFS, CGBTRF, CGBTRS, + $ CLACPY, CLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL CCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL CGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = CLANTB( 'M', 'U', 'N', INFO, MIN( INFO-1, KL+KU ), + $ AFB( MAX( 1, KL+KU+2-INFO ), 1 ), LDAFB, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + RPVGRW = CLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL CGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of CGBSVX +* + END diff --git a/dspl/liblapack/SRC/cgbsvxx.f b/dspl/liblapack/SRC/cgbsvxx.f new file mode 100644 index 0000000..2e113f9 --- /dev/null +++ b/dspl/liblapack/SRC/cgbsvxx.f @@ -0,0 +1,803 @@ +*> \brief CGBSVXX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, RPVGRW, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBSVXX uses the LU factorization to compute the solution to a +*> complex system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. CGBSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> CGBSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> CGBSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what CGBSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then AB must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by CGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In SGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGBsolve +* +* ===================================================================== + SUBROUTINE CGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, RPVGRW, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, I, J, KL, KU + REAL AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, CLA_GBRPVGRW + LOGICAL LSAME + REAL SLAMCH, CLA_GBRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL CGBEQUB, CGBTRF, CGBTRS, CLACPY, CLAQGB, + $ XERBLA, CLASCL2, CGBRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in CGBRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until SGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGBEQUB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL CLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL CLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + DO 40, J = 1, N + DO 30, I = KL+1, 2*KL+KU+1 + AFB( I, J ) = AB( I-KL, J ) + 30 CONTINUE + 40 CONTINUE + CALL CGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = CLA_GBRPVGRW( N, KL, KU, INFO, AB, LDAB, AFB, + $ LDAFB ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = CLA_GBRPVGRW( N, KL, KU, N, AB, LDAB, AFB, LDAFB ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) + +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL CLASCL2( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL CLASCL2( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of CGBSVXX +* + END diff --git a/dspl/liblapack/SRC/cgbtf2.f b/dspl/liblapack/SRC/cgbtf2.f new file mode 100644 index 0000000..cd34d28 --- /dev/null +++ b/dspl/liblapack/SRC/cgbtf2.f @@ -0,0 +1,277 @@ +*> \brief \b CGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBTF2 computes an LU factorization of a complex m-by-n band matrix +*> A using partial pivoting with row interchanges. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U, because of fill-in resulting from the row +*> interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ICAMAX + EXTERNAL ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ICAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL CSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL CSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL CGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of CGBTF2 +* + END diff --git a/dspl/liblapack/SRC/cgbtrf.f b/dspl/liblapack/SRC/cgbtrf.f new file mode 100644 index 0000000..601d063 --- /dev/null +++ b/dspl/liblapack/SRC/cgbtrf.f @@ -0,0 +1,517 @@ +*> \brief \b CGBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBTRF computes an LU factorization of a complex m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + COMPLEX TEMP +* .. +* .. Local Arrays .. + COMPLEX WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ICAMAX, ILAENV + EXTERNAL ICAMAX, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL, + $ CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use CLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL CGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL CGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of CGBTRF +* + END diff --git a/dspl/liblapack/SRC/cgbtrs.f b/dspl/liblapack/SRC/cgbtrs.f new file mode 100644 index 0000000..75d1953 --- /dev/null +++ b/dspl/liblapack/SRC/cgbtrs.f @@ -0,0 +1,297 @@ +*> \brief \b CGBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGBTRS solves a system of linear equations +*> A * X = B, A**T * X = B, or A**H * X = B +*> with a general band matrix A using the LU factorization computed +*> by CGBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSWAP, CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL CGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF +* + ELSE +* +* Solve A**H * X = B. +* + DO 50 I = 1, NRHS +* +* Solve U**H * X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KL+KU, AB, LDAB, B( 1, I ), 1 ) + 50 CONTINUE +* +* Solve L**H * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 60 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', LM, NRHS, -ONE, + $ B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE, + $ B( J, 1 ), LDB ) + CALL CLACGV( NRHS, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 60 CONTINUE + END IF + END IF + RETURN +* +* End of CGBTRS +* + END diff --git a/dspl/liblapack/SRC/cgebak.f b/dspl/liblapack/SRC/cgebak.f new file mode 100644 index 0000000..63c73bf --- /dev/null +++ b/dspl/liblapack/SRC/cgebak.f @@ -0,0 +1,270 @@ +*> \brief \b CGEBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* REAL SCALE( * ) +* COMPLEX V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEBAK forms the right or left eigenvectors of a complex general +*> matrix by backward transformation on the computed eigenvectors of the +*> balanced matrix output by CGEBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N', do nothing, return immediately; +*> = 'P', do backward transformation for permutation only; +*> = 'S', do backward transformation for scaling only; +*> = 'B', do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to CGEBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by CGEBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutation and scaling factors, as returned +*> by CGEBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by CHSEIN or CTREVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL CSSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of CGEBAK +* + END diff --git a/dspl/liblapack/SRC/cgebal.f b/dspl/liblapack/SRC/cgebal.f new file mode 100644 index 0000000..9f3c25c --- /dev/null +++ b/dspl/liblapack/SRC/cgebal.f @@ -0,0 +1,399 @@ +*> \brief \b CGEBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL SCALE( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEBAL balances a general complex matrix A. This involves, first, +*> permuting A by a similarity transformation to isolate eigenvalues +*> in the first 1 to ILO-1 and last IHI+1 to N elements on the +*> diagonal; and second, applying a diagonal similarity transformation +*> to rows and columns ILO to IHI to make the rows and columns as +*> close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrix, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A: +*> = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +*> for i = 1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied to +*> A. If P(j) is the index of the row and column interchanged +*> with row and column j and D(j) is the scaling factor +*> applied to row and column j, then +*> SCALE(j) = P(j) for j = 1,...,ILO-1 +*> = D(j) for j = ILO,...,IHI +*> = P(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The permutations consist of row and column interchanges which put +*> the matrix in the form +*> +*> ( T1 X Y ) +*> P A P = ( 0 B Z ) +*> ( 0 0 T2 ) +*> +*> where T1 and T2 are upper triangular matrices whose eigenvalues lie +*> along the diagonal. The column indices ILO and IHI mark the starting +*> and ending columns of the submatrix B. Balancing consists of applying +*> a diagonal similarity transformation inv(D) * B * D to make the +*> 1-norms of each row of B and its corresponding column nearly equal. +*> The output matrix is +*> +*> ( T1 X*D Y ) +*> ( 0 inv(D)*B*D inv(D)*Z ). +*> ( 0 0 T2 ) +*> +*> Information about the permutations P and the diagonal matrix D is +*> returned in the vector SCALE. +*> +*> This subroutine is based on the EISPACK routine CBAL. +*> +*> Modified by Tzu-Yi Chen, Computer Science Division, University of +*> California at Berkeley, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL SCALE( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 2.0E+0 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL SISNAN, LSAME + INTEGER ICAMAX + REAL SLAMCH, SCNRM2 + EXTERNAL SISNAN, LSAME, ICAMAX, SLAMCH, SCNRM2 +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( REAL( A( J, I ) ).NE.ZERO .OR. AIMAG( A( J, I ) ).NE. + $ ZERO )GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( REAL( A( I, J ) ).NE.ZERO .OR. AIMAG( A( I, J ) ).NE. + $ ZERO )GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L +* + C = SCNRM2( L-K+1, A( K, I ), 1 ) + R = SCNRM2( L-K+1, A( I , K ), LDA ) + ICA = ICAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ICAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + IF( SISNAN( C+F+CA+R+G+RA ) ) THEN +* +* Exit if NaN to avoid infinite loop +* + INFO = -3 + CALL XERBLA( 'CGEBAL', -INFO ) + RETURN + END IF + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL CSSCAL( N-K+1, G, A( I, K ), LDA ) + CALL CSSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of CGEBAL +* + END diff --git a/dspl/liblapack/SRC/cgebd2.f b/dspl/liblapack/SRC/cgebd2.f new file mode 100644 index 0000000..a234f36 --- /dev/null +++ b/dspl/liblapack/SRC/cgebd2.f @@ -0,0 +1,332 @@ +*> \brief \b CGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEBD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEBD2 reduces a complex general m by n matrix A to upper or lower +*> real bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the unitary matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the unitary matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEcomputational +* @precisions normal c -> s d z +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +*> A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +*> A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, v and u are complex vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL CLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Apply H(i)**H to A(i+1:m,i+1:n) from the left +* + CALL CLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + $ CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGEBD2 +* + END diff --git a/dspl/liblapack/SRC/cgebrd.f b/dspl/liblapack/SRC/cgebrd.f new file mode 100644 index 0000000..eebd26f --- /dev/null +++ b/dspl/liblapack/SRC/cgebrd.f @@ -0,0 +1,354 @@ +*> \brief \b CGEBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEBRD reduces a general complex M-by-N matrix A to upper or lower +*> bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the unitary matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the unitary matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,M,N). +*> For optimum performance LWORK >= (M+N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +*> A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +*> A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX, WS +* .. +* .. External Subroutines .. + EXTERNAL CGEBD2, CGEMM, CLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'CGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'CGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'CGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'CGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+ib-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL CLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+ib:m,i+ib:n), using +* an update of the form A := A - V*Y**H - X*U**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1, + $ N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL CGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL CGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of CGEBRD +* + END diff --git a/dspl/liblapack/SRC/cgecon.f b/dspl/liblapack/SRC/cgecon.f new file mode 100644 index 0000000..fb57da2 --- /dev/null +++ b/dspl/liblapack/SRC/cgecon.f @@ -0,0 +1,269 @@ +*> \brief \b CGECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGECON estimates the reciprocal of the condition number of a general +*> complex matrix A, in either the 1-norm or the infinity-norm, using +*> the LU factorization computed by CGETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U**H). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L**H). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CGECON +* + END diff --git a/dspl/liblapack/SRC/cgeequ.f b/dspl/liblapack/SRC/cgeequ.f new file mode 100644 index 0000000..75aa2d7 --- /dev/null +++ b/dspl/liblapack/SRC/cgeequ.f @@ -0,0 +1,313 @@ +*> \brief \b CGEEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEEQU computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGEEQU +* + END diff --git a/dspl/liblapack/SRC/cgeequb.f b/dspl/liblapack/SRC/cgeequb.f new file mode 100644 index 0000000..3f738b8 --- /dev/null +++ b/dspl/liblapack/SRC/cgeequb.f @@ -0,0 +1,330 @@ +*> \brief \b CGEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from CGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = SLAMCH( 'B' ) + LOGRDX = LOG( RADIX ) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG(R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of CGEEQUB +* + END diff --git a/dspl/liblapack/SRC/cgees.f b/dspl/liblapack/SRC/cgees.f new file mode 100644 index 0000000..4da2cdf --- /dev/null +++ b/dspl/liblapack/SRC/cgees.f @@ -0,0 +1,424 @@ +*> \brief CGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, +* LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEES computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues, the Schur form T, and, optionally, the matrix of Schur +*> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> Schur form so that selected eigenvalues are at the top left. +*> The leading columns of Z then form an orthonormal basis for the +*> invariant subspace corresponding to the selected eigenvalues. +*> +*> A complex matrix is in Schur form if it is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered: +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of one COMPLEX argument +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to order +*> to the top left of the Schur form. +*> IF SORT = 'N', SELECT is not referenced. +*> The eigenvalue W(j) is selected if SELECT(W(j)) is true. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten by its Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues for which +*> SELECT is true. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> W contains the computed eigenvalues, in the same order that +*> they appear on the diagonal of the output Schur form T. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is COMPLEX array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the unitary matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1; if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of W +*> contain those eigenvalues which have converged; +*> if JOBVS = 'V', VS contains the matrix which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because +*> some eigenvalues were too close to separate (the +*> problem is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Schur form no longer satisfy +*> SELECT = .TRUE.. This could also be caused by +*> underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, + $ LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTST, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, + $ CLASCL, CTRSEN, CUNGHR, SLABAD, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N +* + CALL CHSEQR( 'S', JOBVS, N, 1, N, A, LDA, W, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, HSWORK ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (CWorkspace: none) +* (RWorkspace: none) +* + CALL CTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND ) + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL CCOPY( N, A, LDA+1, W, 1 ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEES +* + END diff --git a/dspl/liblapack/SRC/cgeesx.f b/dspl/liblapack/SRC/cgeesx.f new file mode 100644 index 0000000..dd833ae --- /dev/null +++ b/dspl/liblapack/SRC/cgeesx.f @@ -0,0 +1,499 @@ +*> \brief CGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, +* VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, +* BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SENSE, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEESX computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues, the Schur form T, and, optionally, the matrix of Schur +*> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> Schur form so that selected eigenvalues are at the top left; +*> computes a reciprocal condition number for the average of the +*> selected eigenvalues (RCONDE); and computes a reciprocal condition +*> number for the right invariant subspace corresponding to the +*> selected eigenvalues (RCONDV). The leading columns of Z form an +*> orthonormal basis for this invariant subspace. +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +*> these quantities are called s and sep respectively). +*> +*> A complex matrix is in Schur form if it is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of one COMPLEX argument +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to order +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue W(j) is selected if SELECT(W(j)) is true. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for average of selected eigenvalues only; +*> = 'V': Computed for selected right invariant subspace only; +*> = 'B': Computed for both. +*> If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the N-by-N matrix A. +*> On exit, A is overwritten by its Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues for which +*> SELECT is true. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> W contains the computed eigenvalues, in the same order +*> that they appear on the diagonal of the output Schur form T. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is COMPLEX array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the unitary matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1, and if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL +*> If SENSE = 'E' or 'B', RCONDE contains the reciprocal +*> condition number for the average of the selected eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL +*> If SENSE = 'V' or 'B', RCONDV contains the reciprocal +*> condition number for the selected right invariant subspace. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM), +*> where SDIM is the number of selected eigenvalues computed by +*> this routine. Note that 2*SDIM*(N-SDIM) <= N*N/2. Note also +*> that an error is only returned if LWORK < max(1,2*N), but if +*> SENSE = 'E' or 'V' or 'B' this may not be large enough. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates upper bound on the optimal size of the +*> array WORK, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of W +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the transformation which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, + $ VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, LWRK, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, + $ CLASCL, CTRSEN, CUNGHR, SLABAD, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine CTRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N +* + CALL CHSEQR( 'S', JOBVS, N, 1, N, A, LDA, W, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, ( N*N )/2 ) + END IF + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEESX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM) +* otherwise, need none ) +* (RWorkspace: none) +* + CALL CTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-14 ) THEN +* +* Not enough complex workspace +* + INFO = -15 + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL CCOPY( N, A, LDA+1, W, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEESX +* + END diff --git a/dspl/liblapack/SRC/cgeev.f b/dspl/liblapack/SRC/cgeev.f new file mode 100644 index 0000000..bdd75e4 --- /dev/null +++ b/dspl/liblapack/SRC/cgeev.f @@ -0,0 +1,503 @@ +*> \brief CGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEEV computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of are computed. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> W contains the computed eigenvalues. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> u(j) = VL(:,j), the j-th column of VL. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> v(j) = VR(:,j), the j-th column of VR. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors have been computed; +*> elements and i+1:N of W contain eigenvalues which have +*> converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @generated from zgeev.f, fortran z -> c, Tue Apr 19 01:47:44 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, + $ WORK, LWORK, RWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU, + $ IWRK, K, LWORK_TREVC, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, XERBLA, CSSCAL, CGEBAK, CGEBAL, CGEHRD, + $ CHSEQR, CLACPY, CLASCL, CSCAL, CTREVC3, CUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + REAL SLAMCH, SCNRM2, CLANGE + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, CMPLX, CONJG, AIMAG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N + IF( WANTVL ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL CTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL CTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + CALL CHSEQR( 'E', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + END IF + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, HSWORK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL CGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from CHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N, prefer N + 2*N*NB) +* (RWorkspace: need 2*N) +* + IRWORK = IBAL + N + CALL CTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, + $ RWORK( IRWORK ), N, IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / SCNRM2( N, VL( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL CGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / SCNRM2( N, VR( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( IRWORK+K-1 ) = REAL( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = ISAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL CSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEEV +* + END diff --git a/dspl/liblapack/SRC/cgeevx.f b/dspl/liblapack/SRC/cgeevx.f new file mode 100644 index 0000000..b1ff3cc --- /dev/null +++ b/dspl/liblapack/SRC/cgeevx.f @@ -0,0 +1,667 @@ +*> \brief CGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, +* LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, +* RCONDV, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N +* REAL ABNRM +* .. +* .. Array Arguments .. +* REAL RCONDE( * ), RCONDV( * ), RWORK( * ), +* $ SCALE( * ) +* COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEEVX computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +*> (RCONDE), and reciprocal condition numbers for the right +*> eigenvectors (RCONDV). +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> +*> Balancing a matrix means permuting the rows and columns to make it +*> more nearly upper triangular, and applying a diagonal similarity +*> transformation D * A * D**(-1), where D is a diagonal matrix, to +*> make its rows and columns closer in norm and the condition numbers +*> of its eigenvalues and eigenvectors smaller. The computed +*> reciprocal condition numbers correspond to the balanced matrix. +*> Permuting rows and columns will not change the condition numbers +*> (in exact arithmetic) but diagonal scaling will. For further +*> explanation of balancing, see section 4.10.2 of the LAPACK +*> Users' Guide. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Indicates how the input matrix should be diagonally scaled +*> and/or permuted to improve the conditioning of its +*> eigenvalues. +*> = 'N': Do not diagonally scale or permute; +*> = 'P': Perform permutations to make the matrix more nearly +*> upper triangular. Do not diagonally scale; +*> = 'S': Diagonally scale the matrix, ie. replace A by +*> D*A*D**(-1), where D is a diagonal matrix chosen +*> to make the rows and columns of A more equal in +*> norm. Do not permute; +*> = 'B': Both diagonally scale and permute A. +*> +*> Computed reciprocal condition numbers will be for the matrix +*> after balancing and/or permuting. Permuting does not change +*> condition numbers (in exact arithmetic), but balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVL must = 'V'. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVR must = 'V'. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for eigenvalues only; +*> = 'V': Computed for right eigenvectors only; +*> = 'B': Computed for eigenvalues and right eigenvectors. +*> +*> If SENSE = 'E' or 'B', both left and right eigenvectors +*> must also be computed (JOBVL = 'V' and JOBVR = 'V'). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. If JOBVL = 'V' or +*> JOBVR = 'V', A contains the Schur form of the balanced +*> version of the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> W contains the computed eigenvalues. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> u(j) = VL(:,j), the j-th column of VL. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> v(j) = VR(:,j), the j-th column of VR. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values determined when A was +*> balanced. The balanced A(i,j) = 0 if I > J and +*> J = 1,...,ILO-1 or I = IHI+1,...,N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> when balancing A. If P(j) is the index of the row and column +*> interchanged with row and column j, and D(j) is the scaling +*> factor applied to row and column j, then +*> SCALE(J) = P(J), for J = 1,...,ILO-1 +*> = D(J), for J = ILO,...,IHI +*> = P(J) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is REAL +*> The one-norm of the balanced matrix (the maximum +*> of the sum of absolute values of elements of any column). +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension (N) +*> RCONDE(j) is the reciprocal condition number of the j-th +*> eigenvalue. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension (N) +*> RCONDV(j) is the reciprocal condition number of the j-th +*> right eigenvector. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. If SENSE = 'N' or 'E', +*> LWORK >= max(1,2*N), and if SENSE = 'V' or 'B', +*> LWORK >= N*N+2*N. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors or condition numbers +*> have been computed; elements 1:ILO-1 and i+1:N of W +*> contain eigenvalues which have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @generated from zgeevx.f, fortran z -> c, Tue Apr 19 01:47:44 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, + $ LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, + $ RCONDV, WORK, LWORK, RWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + REAL ABNRM +* .. +* .. Array Arguments .. + REAL RCONDE( * ), RCONDV( * ), RWORK( * ), + $ SCALE( * ) + COMPLEX A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLASCL, XERBLA, CSSCAL, CGEBAK, CGEBAL, + $ CGEHRD, CHSEQR, CLACPY, CLASCL, CSCAL, CTREVC3, + $ CTRSNA, CUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + REAL SLAMCH, SCNRM2, CLANGE + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SCNRM2, CLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, CMPLX, CONJG, AIMAG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR. + $ LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by CHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 ) +* + IF( WANTVL ) THEN + CALL CTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, LWORK_TREVC ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + CALL CTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, LWORK_TREVC ) + CALL CHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + IF( WNTSNN ) THEN + CALL CHSEQR( 'E', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + CALL CHSEQR( 'S', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + END IF + END IF + HSWORK = INT( WORK(1) ) +* + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = 2*N + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 2*N ) + ELSE + MINWRK = 2*N + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'CUNGHR', + $ ' ', N, 1, N, -1 ) ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, 2*N ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = CLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL CGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = CLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL CHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from CHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N, prefer N + 2*N*NB) +* (RWorkspace: need N) +* + CALL CTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, + $ RWORK, N, IERR ) + END IF +* +* Compute condition numbers if desired +* (CWorkspace: need N*N+2*N unless SENSE = 'E') +* (RWorkspace: need 2*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL CTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL CGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / SCNRM2( N, VL( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( K ) = REAL( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = ISAMAX( N, RWORK, 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( K ) ) + CALL CSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL CGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / SCNRM2( N, VR( 1, I ), 1 ) + CALL CSSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( K ) = REAL( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = ISAMAX( N, RWORK, 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( K ) ) + CALL CSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGEEVX +* + END diff --git a/dspl/liblapack/SRC/cgehd2.f b/dspl/liblapack/SRC/cgehd2.f new file mode 100644 index 0000000..d4d78b4 --- /dev/null +++ b/dspl/liblapack/SRC/cgehd2.f @@ -0,0 +1,224 @@ +*> \brief \b CGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEHD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEHD2 reduces a complex general matrix A to upper Hessenberg form H +*> by a unitary similarity transformation: Q**H * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to CGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= max(1,N). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the n by n general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the unitary matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL CLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i)**H to A(i+1:ihi,i+1:n) from the left +* + CALL CLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = ALPHA + 10 CONTINUE +* + RETURN +* +* End of CGEHD2 +* + END diff --git a/dspl/liblapack/SRC/cgehrd.f b/dspl/liblapack/SRC/cgehrd.f new file mode 100644 index 0000000..ca0f2d5 --- /dev/null +++ b/dspl/liblapack/SRC/cgehrd.f @@ -0,0 +1,356 @@ +*> \brief \b CGEHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEHRD reduces a complex general matrix A to upper Hessenberg form H by +*> an unitary similarity transformation: Q**H * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to CGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the unitary matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +*> zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,N). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This file is a slight modification of LAPACK-3.0's DGEHRD +*> subroutine incorporating improvements proposed by Quintana-Orti and +*> Van de Geijn (2006). (See DLAHR2.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEHD2, CGEMM, CLAHR2, CLARFB, CTRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'CGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IF( LWORK.LT.N*NB+TSIZE ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.(N*NBMIN+TSIZE) ) THEN + NB = (LWORK-TSIZE) / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + IWT = 1 + N*NB + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V**H +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL CLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL CAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGEHRD +* + END diff --git a/dspl/liblapack/SRC/cgejsv.f b/dspl/liblapack/SRC/cgejsv.f new file mode 100644 index 0000000..8eb43cf --- /dev/null +++ b/dspl/liblapack/SRC/cgejsv.f @@ -0,0 +1,2235 @@ +*> \brief \b CGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) +* REAL SVA( N ), RWORK( LRWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^*, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and +*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> \endverbatim +*> +*> Arguments: +*> ========== +*> +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=B*D. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are not well determined by the data +*> and are considered as noisy; the matrix is treated as +*> numerically rank defficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^* restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use CGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> =========================== +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use CGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^* seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^* is taken as input. If A is +*> replaced with A^*, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> The option 'T' can be used to compute only the singular values, or +*> the full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> In general, this option is considered experimental, and 'N'; should +*> be preferred. This is subject to changes in the future. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^*. In that case, [V] is computed +*> in U as left singular vectors of A^* and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^*. In that case, [U] is computed +*> in V as right singular vectors of A^* and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] CWORK +*> \verbatim +*> CWORK is COMPLEX array, dimension (MAX(2,LWORK)) +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the required length of +*> CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of CWORK to confirm proper allocation of workspace. +*> LWORK depends on the job: +*> +*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): +*> LWORK >= 2*N+1. This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= N + (N+1)*NB. Here NB is the optimal +*> block size for CGEQP3 and CGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)). +*> 1.2. .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). In this case, LWORK the minimal +*> requirement is LWORK >= N*N + 2*N. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ), +*> N*N+LWORK(CPOCON)). +*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> (JOBU.EQ.'N') +*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> CUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ), +*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). +*> 2.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CGELQ, +*> CUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ), +*> N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)). +*> 3. If SIGMA and the left singular vectors are needed +*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). +*> 3.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON), +*> 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). +*> +*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> 4.1. if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= 5*N+2*N*N. +*> 4.2. if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= 4*N+N*N. +*> In both cases, the allocated CWORK can accommodate blocked runs +*> of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ. +*> +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the +*> minimal length of CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(7,LWORK)) +*> On exit, +*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) +*> such that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> RWORK(2) = See the description of RWORK(1). +*> RWORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +*> It is computed using SPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> RWORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> RWORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy +*> of diag(A^* * A) / Trace(A^* * A) taken as point in the +*> probability simplex. +*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) +*> If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit RWORK(1) contains the required length of +*> RWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK to confirm proper allocation of workspace. +*> LRWORK depends on the job: +*> +*> 1. If only the singular values are requested i.e. if +*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') +*> then: +*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then: LRWORK = max( 7, 2 * M ). +*> 1.2. Otherwise, LRWORK = max( 7, N ). +*> 2. If singular values with the right singular vectors are requested +*> i.e. if +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. +*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) +*> then: +*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 2.2. Otherwise, LRWORK = max( 7, N ). +*> 3. If singular values with the left singular vectors are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 3.2. Otherwise, LRWORK = max( 7, N ). +*> 4. If singular values with both the left and the right singular vectors +*> are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 4.2. Otherwise, LRWORK = max( 7, N ). +*> +*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and +*> the length of RWORK is returned in RWORK(1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, of dimension at least 4, that further depends +*> on the job: +*> +*> 1. If only the singular values are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 2. If the singular values and the right singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 3. If the singular values and the left singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4. If the singular values with both the left and the right singular vectors +*> are requested, then: +*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. +*> +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to +*> do the job as specified by the JOB parameters. +*> If the call to CGEJSV is a workspace query (indicated by LWORK .EQ. -1 and +*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of +*> IWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : CGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3, +*> CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by CGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (CGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (CGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: CGEQP3) should be +*> implemented as in [3]. We have a new version of CGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in CGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of CGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2016. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE CGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) + REAL SVA( N ), RWORK( LRWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + COMPLEX CTEMP + REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK, + $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM, + $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, + $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, + $ ROWPIV, RSVEC, TRANSP +* + INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK + INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, + $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF + INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ, + $ LWRK_CUNMQR, LWRK_CUNMQRM +* .. +* .. Local Arrays + COMPLEX CDUMMY(1) + REAL RDUMMY(1) +* +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, ALOG, MAX, MIN, REAL, NINT, SQRT +* .. +* .. External Functions .. + REAL SLAMCH, SCNRM2 + INTEGER ISAMAX, ICAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, ICAMAX, LSAME, SLAMCH, SCNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ, CCOPY, CGELQF, CGEQP3, CGEQRF, CLACPY, CLAPMR, + $ CLASCL, SLASCL, CLASET, CLASSQ, CLASWP, CUNGQR, CUNMLQ, + $ CUNMQR, CPOCON, SSCAL, CSSCAL, CSWAP, CTRSM, CLACGV, + $ XERBLA +* + EXTERNAL CGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .EQ. 0 ) THEN +* .. compute the minimal and the optimal workspace lengths +* [[The expressions for computing the minimal and the optimal +* values of LCWORK, LRWORK are written with a lot of redundancy and +* can be simplified. However, this verbose form is useful for +* maintenance and modifications of the code.]] +* +* .. minimal workspace length for CGEQP3 of an M x N matrix, +* CGEQRF of an N x N matrix, CGELQF of an N x N matrix, +* CUNMLQ for computing N x N matrix, CUNMQR for computing N x N +* matrix, CUNMQR for computing M x N matrix, respectively. + LWQP3 = N+1 + LWQRF = MAX( 1, N ) + LWLQF = MAX( 1, N ) + LWUNMLQ = MAX( 1, N ) + LWUNMQR = MAX( 1, N ) + LWUNMQRM = MAX( 1, M ) +* .. minimal workspace length for CPOCON of an N x N matrix + LWCON = 2 * N +* .. minimal workspace length for CGESVJ of an N x N matrix, +* without and with explicit accumulation of Jacobi rotations + LWSVDJ = MAX( 2 * N, 1 ) + LWSVDJV = MAX( 2 * N, 1 ) +* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ + LRWQP3 = N + LRWCON = N + LRWSVDJ = N + IF ( LQUERY ) THEN + CALL CGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_CGEQP3 = CDUMMY(1) + CALL CGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_CGEQRF = CDUMMY(1) + CALL CGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_CGELQF = CDUMMY(1) + END IF + MINWRK = 2 + OPTWRK = 2 + MINIWRK = N + IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN +* .. minimal and optimal sizes of the complex workspace if +* only the singular values are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) + ELSE + MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, + $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N**2+LWCON, + $ N+LWRK_CGEQRF, LWRK_CGESVJ ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWRK_CGEQRF, + $ LWRK_CGESVJ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the right singular vectors are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, + $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) + ELSE + MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, + $ N+LWSVDJ, N+LWUNMLQ ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, LWCON, LWRK_CGESVJ, + $ N+LWRK_CGELQF, 2*N+LWRK_CGEQRF, + $ N+LWRK_CGESVJ, N+LWRK_CUNMLQ ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, LWRK_CGESVJ,N+LWRK_CGELQF, + $ 2*N+LWRK_CGEQRF, N+LWRK_CGESVJ, + $ N+LWRK_CUNMLQ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the left singular vectors are requested + IF ( ERREST ) THEN + MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) + ELSE + MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) + END IF + IF ( LQUERY ) THEN + CALL CGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = N + MAX( LWRK_CGEQP3, LWCON, N+LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CUNMQRM ) + ELSE + OPTWRK = N + MAX( LWRK_CGEQP3, N+LWRK_CGEQRF, + $ LWRK_CGESVJ, LWRK_CUNMQRM ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE +* .. minimal and optimal sizes of the complex workspace if the +* full SVD is requested + IF ( .NOT. JRACC ) THEN + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + END IF + MINIWRK = MINIWRK + N + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + END IF + IF ( LQUERY ) THEN + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + CALL CUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQR = CDUMMY(1) + IF ( .NOT. JRACC ) THEN + CALL CGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_CGEQP3N = CDUMMY(1) + CALL CGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJ = CDUMMY(1) + CALL CGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJU = CDUMMY(1) + CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJV = CDUMMY(1) + CALL CUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, + $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, + $ 2*N+LWRK_CGEQP3N, + $ 2*N+N**2+N+LWRK_CGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_CGESVJ, + $ 2*N+N**2+N+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ 2*N+N**2+N+LWRK_CUNMLQ, + $ N+N**2+LWRK_CGESVJU, + $ N+LWRK_CUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, + $ 2*N+N**2+LWCON, 2*N+LWRK_CGEQRF, + $ 2*N+LWRK_CGEQP3N, + $ 2*N+N**2+N+LWRK_CGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_CGESVJ, + $ 2*N+N**2+N+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ 2*N+N**2+N+LWRK_CUNMLQ, + $ N+N**2+LWRK_CGESVJU, + $ N+LWRK_CUNMQRM ) + END IF + ELSE + CALL CGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_CGESVJV = CDUMMY(1) + CALL CUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_CUNMQR = CDUMMY(1) + CALL CUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_CUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_CGEQP3, N+LWCON, + $ 2*N+LWRK_CGEQRF, 2*N+N**2, + $ 2*N+N**2+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR,N+LWRK_CUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_CGEQP3, 2*N+LWRK_CGEQRF, + $ 2*N+N**2, 2*N+N**2+LWRK_CGESVJV, + $ 2*N+N**2+N+LWRK_CUNMQR, + $ N+LWRK_CUNMQRM ) + END IF + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + END IF + END IF + MINWRK = MAX( 2, MINWRK ) + OPTWRK = MAX( 2, OPTWRK ) + IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 + IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'CGEJSV', - INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = OPTWRK + CWORK(2) = MINWRK + RWORK(1) = MINRWRK + IWORK(1) = MAX( 4, MINIWRK ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:4) = 0 + RWORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure SLAMCH() does not fail on the target architecture. +* + EPSLN = SLAMCH('Epsilon') + SFMIN = SLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = SLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / SQRT(REAL(M)*REAL(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL CLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'CGEJSV', -INFO ) + RETURN + END IF + AAQQ = SQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL SSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL CLASET( 'G', M, N1, CZERO, CONE, U, LDU ) + IF ( RSVEC ) CALL CLASET( 'G', N, N, CZERO, CONE, V, LDV ) + RWORK(1) = ONE + RWORK(2) = ONE + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + IWORK(4) = -1 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill nonzero columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL CLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL CLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL CGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) + CALL CUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) + CALL CCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = CONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + RWORK(1) = ONE / SCALEM + RWORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IWORK(4) = -1 + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL CLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* CLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + RWORK(M+p) = XSC * SCALEM + RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) + AATMAX = MAX( AATMAX, RWORK(p) ) + IF (RWORK(p) .NE. ZERO) + $ AATMIN = MIN(AATMIN,RWORK(p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + RWORK(M+p) = SCALEM*ABS( A(p,ICAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, RWORK(M+p) ) + AATMIN = MIN( AATMIN, RWORK(M+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^* would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL SLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * ALOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / ALOG(REAL(N)) +* +* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. +* It is derived from the diagonal of A^* * A. Do the same with the +* diagonal of A * A^*, compute the entropy of the corresponding +* probability distribution. Note that A * A^* and A^* * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = 1, M + BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * ALOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / ALOG(REAL(M)) +* +* Analyze the entropies and decide A or A^*. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^* is better than A, take the adjoint of A. This is allowed +* only for square matrices, M=N. + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + A(p,p) = CONJG(A(p,p)) + DO 1116 q = p + 1, N + CTEMP = CONJG(A(q,p)) + A(q,p) = CONJG(A(p,q)) + A(p,q) = CTEMP + 1116 CONTINUE + 1115 CONTINUE + A(N,N) = CONJG(A(N,N)) + DO 1117 p = 1, N + RWORK(M+p) = SVA(p) + SVA(p) = RWORK(p) +* previously computed row 2-norms are now column 2-norms +* of the transposed matrix + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than SQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep +* SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then CGESVJ will compute them. So, in that case, +* one should use CGESVJ instead of CGEJSV. + BIG1 = SQRT( BIG ) + TEMP1 = SQRT( BIG / REAL(N) ) +* >> for future updates: allow bigger range, i.e. the largest column +* will be allowed up to BIG/N and CGESVJ will do the rest. However, for +* this all other (LAPACK) components must allow such a range. +* TEMP1 = BIG/REAL(N) +* TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components + CALL SLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL CLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). + XSC = SQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using CGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL CLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN + IWOFF = 2*N + ELSE + IWOFF = N + END IF + DO 1952 p = 1, M - 1 + q = ISAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 + IWORK(IWOFF+p) = q + IF ( p .NE. q ) THEN + TEMP1 = RWORK(M+p) + RWORK(M+p) = RWORK(M+q) + RWORK(M+q) = TEMP1 + END IF + 1952 CONTINUE + CALL CLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in xGEQPX from TOMS # 782). Good results will be obtained using +* xGEQPX with properly (!) chosen numerical parameters. +* Any improvement of CGEQP3 improves overal performance of CGEJSV. +* +* A * P1 = Q1 * [ R1^* 0]^*: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL CGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, + $ RWORK, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then CGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = SQRT(REAL(N))*EPSLN + DO 3001 p = 2, N + IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-defficient. + TEMP1 = SQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. + $ ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = SQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - REAL(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL CLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL CSSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + IF ( LSVEC )THEN + CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL CPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK, RWORK, IERR ) + END IF +* + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL CLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL CSSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL CPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL CLACPY( 'U', N, N, A, LDA, CWORK, N ) +*[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) +* Change: here index shifted by N to the left, CWORK(1:N) +* not needed for SIGMA only computation + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) +*[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) + CALL CSSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. +*[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, +*[] $ CWORK(N+N*N+1), RWORK, IERR ) + CALL CPOCON( 'U', N, CWORK, N, ONE, TEMP1, + $ CWORK(N*N+1), RWORK, IERR ) +* + END IF + IF ( TEMP1 .NE. ZERO ) THEN + SCONDA = ONE / SQRT(TEMP1) + ELSE + SCONDA = - ONE + END IF +* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL CCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL CLACGV( N-p+1, A(p,p), 1 ) + 1946 CONTINUE + IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / REAL(N) + DO 4947 q = 1, NR + CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL CGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL CCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL CLACGV( NR-p+1, A(p,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / REAL(N) + DO 1947 q = 1, NR + CTEMP = CMPLX(XSC*ABS(A(q,q)),ZERO) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL CGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, + $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* +* + ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) + $ .OR. + $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 1998 CONTINUE + CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) +* + CALL CGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL CLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) + CALL CGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) + CALL CLACPY( 'L', NR, NR, A, LDA, V, LDV ) + CALL CLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) + CALL CGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL CCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + CALL CLACGV( NR-p+1, V(p,p), 1 ) + 8998 CONTINUE + CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) +* + CALL CGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) + CALL CLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) + CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) + END IF +* + CALL CUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, + $ V, LDV, CWORK(N+1), LWORK-N, IERR ) +* + END IF +* .. permute the rows of V +* DO 8991 p = 1, N +* CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) +* 8991 CONTINUE +* CALL CLACPY( 'All', N, N, A, LDA, V, LDV ) + CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + IF ( TRANSP ) THEN + CALL CLACPY( 'A', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN +* + CALL CLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) +* + CALL CGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + CALL CLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + CALL CLACGV( N-p+1, U(p,p), 1 ) + 1965 CONTINUE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL CGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL CCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + CALL CLACGV( N-p+1, U(p,p), 1 ) + 1967 CONTINUE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL CGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* + IF ( NR .LT. M ) THEN + CALL CLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL CLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) + CALL CLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + CALL CSSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL CLACPY( 'A', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of CGEJSV. +* + DO 1968 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 2969 q = 1, NR + CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL CLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = SCNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) + CALL CSSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL CPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, + $ CWORK(2*N+NR*NR+1),RWORK,IERR) + CONDR1 = ONE / SQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. REAL(N) +* more conservative <=> CONDR1 .LT. SQRT(REAL(N)) +* + COND_OK = SQRT(SQRT(REAL(NR))) +*[TP] COND_OK is a tuning parameter. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^* = Q2 * R2 + CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL CCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + CALL CLACGV(NR-p+1, V(p,p), 1 ) + 1969 CONTINUE + V(NR,NR)=CONJG(V(NR,NR)) +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to CGEQP3 +* should be replaced with eg. CALL CGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^* * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL CGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), + $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) +** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL CLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + CTEMP=CMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) +* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) + V(p,q) = - CTEMP + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL CLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL CGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), + $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL CLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = SCNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) + CALL CSSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL CPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) + CONDR2 = ONE / SQRT(TEMP1) +* +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL CLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 4968 q = 2, NR + CTEMP = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) + V(p,q) = - CTEMP + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL CLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL CGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, + $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, + $ LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3970 p = 1, NR + CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL CSSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in CGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL CTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) + ELSE +* .. R1 is well conditioned, but non-square. Adjoint of R2 +* is inverted to get the product of the Jacobi rotations +* used in CGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL CTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL CLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) + CALL CLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^* * V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL CGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3870 p = 1, NR + CALL CCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL CSSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL CTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, + $ U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that CGEJSV completes the task. +* Compute the full SVD of L3 using CGESVJ with explicit +* accumulation of Jacobi rotations. + CALL CGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL CUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, + $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(REAL(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / SCNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL CLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) + IF ( NR .LT. N1 ) THEN + CALL CLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL CLASET('A',M-NR,N1-NR,CZERO,CONE, + $ U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = SQRT(REAL(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 5970 p = 2, N + CTEMP = XSC * CWORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 +* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / +* $ ABS(CWORK(N+(p-1)*N+q)) ) + CWORK(N+(q-1)*N+p)=-CTEMP + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL CLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) + END IF +* + CALL CGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, + $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, + $ INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 6970 p = 1, N + CALL CCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL CSSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL CTRSM( 'L', 'U', 'N', 'N', N, N, + $ CONE, A, LDA, CWORK(N+1), N ) + DO 6972 p = 1, N + CALL CCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = SQRT(REAL(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / SCNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL CLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL CLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) + CALL CLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) + END IF + END IF + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + TEMP1 = SQRT(REAL(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / SCNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values, e.g. when the singular values spread from +* the underflow to the overflow threshold. +* + DO 7968 p = 1, NR + CALL CCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL CLACGV( N-p+1, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + CTEMP = CMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL CLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF + + CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL CLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL CCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + CALL CLACGV( NR-p+1, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + CTEMP = CMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), + $ ZERO) +* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) + U(p,q) = - CTEMP + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL CLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) + END IF + + CALL CGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + IF ( NR .LT. N ) THEN + CALL CLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL CLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL CLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) + END IF + + CALL CUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(REAL(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / SCNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL CSSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL CLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL CLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) + CALL CLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) + END IF + END IF +* + CALL CUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL CLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^* + DO 6974 p = 1, N + CALL CSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL SLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + RWORK(1) = USCAL2 * SCALEM + RWORK(2) = USCAL1 + IF ( ERREST ) RWORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = CONDR1 + RWORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ENTRA + RWORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING + IF ( TRANSP ) THEN + IWORK(4) = 1 + ELSE + IWORK(4) = -1 + END IF + +* + RETURN +* .. +* .. END OF CGEJSV +* .. + END +* diff --git a/dspl/liblapack/SRC/cgelq.f b/dspl/liblapack/SRC/cgelq.f new file mode 100644 index 0000000..909162e --- /dev/null +++ b/dspl/liblapack/SRC/cgelq.f @@ -0,0 +1,306 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CGELQ computes a LQ factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-min(M,N) lower trapezoidal matrix L +*> (L is lower triangular if M <= N); +*> the elements above the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any LQ factorization algorithm they wish. The triangular +*> (trapezoidal) L has to be stored in the lower part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> CLASWLQ or CGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, CGELQ will use either +*> CLASWLQ (if the matrix is short-and-wide) or CGELQT to compute +*> the LQ factorization. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGELQT, CLASWLQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'CGELQ ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'CGELQ ', ' ', M, N, 2, -1 ) + ELSE + MB = 1 + NB = N + END IF + IF( MB.GT.MIN( M, N ) .OR. MB.LT.1 ) MB = 1 + IF( NB.GT.N .OR. NB.LE.M ) NB = N + MINTSZ = M + 5 + IF( NB.GT.M .AND. N.GT.M ) THEN + IF( MOD( N - M, NB - M ).EQ.0 ) THEN + NBLCKS = ( N - M ) / ( NB - M ) + ELSE + NBLCKS = ( N - M ) / ( NB - M ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M ) + $ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + MB = 1 + NB = N + END IF + IF( LWORK.LT.MB*M ) THEN + LMINWS = .TRUE. + MB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = MB*M*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, MB*M ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN + CALL CGELQT( M, N, MB, A, LDA, T( 6 ), MB, WORK, INFO ) + ELSE + CALL CLASWLQ( M, N, MB, NB, A, LDA, T( 6 ), MB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, MB*M ) +* + RETURN +* +* End of CGELQ +* + END diff --git a/dspl/liblapack/SRC/cgelq2.f b/dspl/liblapack/SRC/cgelq2.f new file mode 100644 index 0000000..9742d35 --- /dev/null +++ b/dspl/liblapack/SRC/cgelq2.f @@ -0,0 +1,194 @@ +*> \brief \b CGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELQ2 computes an LQ factorization of a complex m by n matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m by min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +*> A(i,i+1:n), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + END IF + A( I, I ) = ALPHA + CALL CLACGV( N-I+1, A( I, I ), LDA ) + 10 CONTINUE + RETURN +* +* End of CGELQ2 +* + END diff --git a/dspl/liblapack/SRC/cgelqf.f b/dspl/liblapack/SRC/cgelqf.f new file mode 100644 index 0000000..216630e --- /dev/null +++ b/dspl/liblapack/SRC/cgelqf.f @@ -0,0 +1,269 @@ +*> \brief \b CGELQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELQF computes an LQ factorization of a complex M-by-N matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +*> A(i,i+1:n), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGELQ2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGELQF +* + END diff --git a/dspl/liblapack/SRC/cgelqt.f b/dspl/liblapack/SRC/cgelqt.f new file mode 100644 index 0000000..e151f10 --- /dev/null +++ b/dspl/liblapack/SRC/cgelqt.f @@ -0,0 +1,194 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELQT computes a blocked LQ factorization of a complex M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is +*> lower triangular if M <= N); the elements above the diagonal +*> are the rows of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K +* .. +* .. External Subroutines .. + EXTERNAL CGELQT3, CLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. (MB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ))THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, MB + IB = MIN( K-I+1, MB ) +* +* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) +* + CALL CGELQT3( IB, N-I+1, A(I,I), LDA, T(1,I), LDT, IINFO ) + IF( I+IB.LE.M ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the right +* + CALL CLARFB( 'R', 'N', 'F', 'R', M-I-IB+1, N-I+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, WORK , M-I-IB+1 ) + END IF + END DO + RETURN +* +* End of CGELQT +* + END diff --git a/dspl/liblapack/SRC/cgelqt3.f b/dspl/liblapack/SRC/cgelqt3.f new file mode 100644 index 0000000..f643797 --- /dev/null +++ b/dspl/liblapack/SRC/cgelqt3.f @@ -0,0 +1,244 @@ +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE CGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELQT3 recursively computes a LQ factorization of a complex M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M =< N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> below the diagonal contain the N-by-N lower triangular matrix L; the +*> elements above the diagonal are the rows of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE CGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = (1.0E+00,0.0E+00) ) + PARAMETER ( ZERO = (0.0E+00,0.0E+00)) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, M1, M2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CTRMM, CGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( N .LT. M ) THEN + INFO = -2 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, M ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELQT3', -INFO ) + RETURN + END IF +* + IF( M.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL CLARFG( N, A, A( 1, MIN( 2, N ) ), LDA, T ) + T(1,1)=CONJG(T(1,1)) +* + ELSE +* +* Otherwise, split A into blocks... +* + M1 = M/2 + M2 = M-M1 + I1 = MIN( M1+1, M ) + J1 = MIN( M+1, N ) +* +* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL CGELQT3( M1, N, A, LDA, T, LDT, IINFO ) +* +* Compute A(J1:M,1:N) = A(J1:M,1:N) Q1^H [workspace: T(1:N1,J1:N)] +* + DO I=1,M2 + DO J=1,M1 + T( I+M1, J ) = A( I+M1, J ) + END DO + END DO + CALL CTRMM( 'R', 'U', 'C', 'U', M2, M1, ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + CALL CGEMM( 'N', 'C', M2, M1, N-M1, ONE, A( I1, I1 ), LDA, + & A( 1, I1 ), LDA, ONE, T( I1, 1 ), LDT) +* + CALL CTRMM( 'R', 'U', 'N', 'N', M2, M1, ONE, + & T, LDT, T( I1, 1 ), LDT ) +* + CALL CGEMM( 'N', 'N', M2, N-M1, M1, -ONE, T( I1, 1 ), LDT, + & A( 1, I1 ), LDA, ONE, A( I1, I1 ), LDA ) +* + CALL CTRMM( 'R', 'U', 'N', 'U', M2, M1 , ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + DO I=1,M2 + DO J=1,M1 + A( I+M1, J ) = A( I+M1, J ) - T( I+M1, J ) + T( I+M1, J )= ZERO + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL CGELQT3( M2, N-M1, A( I1, I1 ), LDA, + & T( I1, I1 ), LDT, IINFO ) +* +* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,M2 + DO J=1,M1 + T( J, I+M1 ) = (A( J, I+M1 )) + END DO + END DO +* + CALL CTRMM( 'R', 'U', 'C', 'U', M1, M2, ONE, + & A( I1, I1 ), LDA, T( 1, I1 ), LDT ) +* + CALL CGEMM( 'N', 'C', M1, M2, N-M, ONE, A( 1, J1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, I1 ), LDT ) +* + CALL CTRMM( 'L', 'U', 'N', 'N', M1, M2, -ONE, T, LDT, + & T( 1, I1 ), LDT ) +* + CALL CTRMM( 'R', 'U', 'N', 'N', M1, M2, ONE, + & T( I1, I1 ), LDT, T( 1, I1 ), LDT ) +* +* +* +* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] +* [ A(1:N1,J1:N) L2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of CGELQT3 +* + END diff --git a/dspl/liblapack/SRC/cgels.f b/dspl/liblapack/SRC/cgels.f new file mode 100644 index 0000000..dbef9fa --- /dev/null +++ b/dspl/liblapack/SRC/cgels.f @@ -0,0 +1,505 @@ +*> \brief CGELS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELS solves overdetermined or underdetermined complex linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A. It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**H * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**H * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by CGEQRF; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by CGELQF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of the +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGELQF, CGEQRF, CLASCL, CLASET, CTRTRS, CUNMLQ, + $ CUNMQR, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LC', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB ) + WORK( 1 ) = REAL( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL CGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* Underdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**H) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'Upper', 'Conjugate transpose','Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL CUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL CGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**H * B(1:M,1:NRHS) +* + CALL CUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**H * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL CUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**H) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( WSIZE ) +* + RETURN +* +* End of CGELS +* + END diff --git a/dspl/liblapack/SRC/cgelsd.f b/dspl/liblapack/SRC/cgelsd.f new file mode 100644 index 0000000..2c29c3d --- /dev/null +++ b/dspl/liblapack/SRC/cgelsd.f @@ -0,0 +1,666 @@ +*> \brief CGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), S( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELSD computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize 2-norm(| b - A*x |) +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The problem is solved in three steps: +*> (1) Reduce the coefficient matrix A to bidiagonal form with +*> Householder transformations, reducing the original problem +*> into a "bidiagonal least squares problem" (BLS) +*> (2) Solve the BLS using a divide and conquer approach. +*> (3) Apply back all the Householder transformations to solve +*> the original least squares problem. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of the modulus of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK must be at least 1. +*> The exact minimum amount of workspace needed depends on M, +*> N and NRHS. As long as LWORK is at least +*> 2 * N + N * NRHS +*> if M is greater than or equal to N or +*> 2 * M + M * NRHS +*> if M is less than N, the code will execute correctly. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the array WORK and the +*> minimum sizes of the arrays RWORK and IWORK, and returns +*> these values as the first entries of the WORK, RWORK and +*> IWORK arrays, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> LRWORK >= +*> 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) +*> if M is greater than or equal to N or +*> 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) +*> if M is less than N, the code will execute correctly. +*> SMLSIZ is returned by ILAENV and is equal to the maximum +*> size of the subproblems at the bottom of the computation +*> tree (usually about 25), and +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), +*> where MINMN = MIN( M,N ). +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEsolve +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, LRWORK, MAXMN, MAXWRK, MINMN, + $ MINWRK, MM, MNTHR, NLVL, NRWORK, NWORK, SMLSIZ + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEQRF, CLACPY, + $ CLALSD, CLASCL, CLASET, CUNMBR, + $ CUNMLQ, CUNMQR, SLABAD, SLASCL, + $ SLASET, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, SLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + LIWORK = 1 + LRWORK = 1 + IF( MINMN.GT.0 ) THEN + SMLSIZ = ILAENV( 9, 'CGELSD', ' ', 0, 0, 0, 0 ) + MNTHR = ILAENV( 6, 'CGELSD', ' ', M, N, NRHS, -1 ) + NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ + 1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'CGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'CUNMQR', 'LC', M, + $ NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + LRWORK = 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + $ MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + MAXWRK = MAX( MAXWRK, 2*N + ( MM + N )*ILAENV( 1, + $ 'CGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'CUNMBR', 'PLN', N, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + N*NRHS ) + MINWRK = MAX( 2*N + MM, 2*N + N*NRHS ) + END IF + IF( N.GT.M ) THEN + LRWORK = 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + $ MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'CGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'CUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + ( M - 1 )*ILAENV( 1, + $ 'CUNMLQ', 'LC', N, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M*M + 4*M + M*NRHS ) +! XXX: Ensure the Path 2a case below is triggered. The workspace +! calculation should use queries for all routines eventually. + MAXWRK = MAX( MAXWRK, + $ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) ) + ELSE +* +* Path 2 - underdetermined. +* + MAXWRK = 2*M + ( N + M )*ILAENV( 1, 'CGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M + NRHS*ILAENV( 1, 'CUNMBR', + $ 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*ILAENV( 1, 'CUNMBR', + $ 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*NRHS ) + END IF + MINWRK = MAX( 2*M + N, 2*M + M*NRHS ) + END IF + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure B(M+1:N,:) = 0 +* + IF( M.LT.N ) + $ CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (RWorkspace: need N) +* (CWorkspace: need N, prefer N*NB) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (RWorkspace: need N) +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + END IF +* + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N + IE = 1 + NRWORK = IE + N +* +* Bidiagonalize R in A. +* (RWorkspace: need N) +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (CWorkspace: need 2*M, prefer M+M*NB) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize L in WORK(IL). +* (RWorkspace: need M) +* (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL CUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize A. +* (RWorkspace: need M) +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL CLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL CUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK + RETURN +* +* End of CGELSD +* + END diff --git a/dspl/liblapack/SRC/cgelss.f b/dspl/liblapack/SRC/cgelss.f new file mode 100644 index 0000000..84faa29 --- /dev/null +++ b/dspl/liblapack/SRC/cgelss.f @@ -0,0 +1,771 @@ +*> \brief CGELSS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELSS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ), S( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELSS computes the minimum norm solution to a complex linear +*> least squares problem: +*> +*> Minimize 2-norm(| b - A*x |). +*> +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +*> X. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the first min(m,n) rows of A are overwritten with +*> its right singular vectors, stored rowwise. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of the modulus of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1, and also: +*> LWORK >= 2*min(M,N) + max(M,N,NRHS) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (5*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + INTEGER LWORK_CGEQRF, LWORK_CUNMQR, LWORK_CGEBRD, + $ LWORK_CUNMBR, LWORK_CUNGBR, LWORK_CUNMLQ, + $ LWORK_CGELQF + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + COMPLEX DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV, + $ CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR, + $ CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace refers +* to real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'CGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* +* Compute space needed for CGEQRF + CALL CGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) + LWORK_CGEQRF=DUM(1) +* Compute space needed for CUNMQR + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, DUM(1), B, + $ LDB, DUM(1), -1, INFO ) + LWORK_CUNMQR=DUM(1) + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'CGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'CUNMQR', 'LC', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute space needed for CGEBRD + CALL CGEBRD( MM, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), + $ -1, INFO ) + LWORK_CGEBRD=DUM(1) +* Compute space needed for CUNMBR + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_CUNMBR=DUM(1) +* Compute space needed for CUNGBR + CALL CUNGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_CUNGBR=DUM(1) +* Compute total workspace needed + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CGEBRD ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = 2*N + MAX( NRHS, M ) + END IF + IF( N.GT.M ) THEN + MINWRK = 2*M + MAX( NRHS, N ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Compute space needed for CGELQF + CALL CGELQF( M, N, A, LDA, DUM(1), DUM(1), + $ -1, INFO ) + LWORK_CGELQF=DUM(1) +* Compute space needed for CGEBRD + CALL CGEBRD( M, M, A, LDA, S, S, DUM(1), DUM(1), + $ DUM(1), -1, INFO ) + LWORK_CGEBRD=DUM(1) +* Compute space needed for CUNMBR + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_CUNMBR=DUM(1) +* Compute space needed for CUNGBR + CALL CUNGBR( 'P', M, M, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_CUNGBR=DUM(1) +* Compute space needed for CUNMLQ + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_CUNMLQ=DUM(1) +* Compute total workspace needed + MAXWRK = M + LWORK_CGELQF + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_CGEBRD ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_CUNMBR ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_CUNGBR ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + LWORK_CUNMLQ ) + ELSE +* +* Path 2 - underdetermined +* +* Compute space needed for CGEBRD + CALL CGEBRD( M, N, A, LDA, S, S, DUM(1), DUM(1), + $ DUM(1), -1, INFO ) + LWORK_CGEBRD=DUM(1) +* Compute space needed for CUNMBR + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_CUNMBR=DUM(1) +* Compute space needed for CUNGBR + CALL CUNGBR( 'P', M, N, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_CUNGBR=DUM(1) + MAXWRK = 2*M + LWORK_CGEBRD + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (CWorkspace: need N+NRHS, prefer N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, DUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) ) + $ THEN +* +* Underdetermined case, M much less than N +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) ) + $ LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: none) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = 1 + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right singular +* vectors of L in WORK(IL) and multiplying B by transpose of +* left singular vectors +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IL + M*LDWORK +* +* Multiply B by right singular vectors of L in WORK(IL) +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK, + $ B, LDB, CZERO, WORK( IWORK ), LDB ) + CALL CLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, CZERO, WORK( IWORK ), M ) + CALL CLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, CZERO, WORK( IWORK ), 1 ) + CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (CWorkspace: need M+NRHS, prefer M+NHRS*NB) +* (RWorkspace: none) +* + CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* (RWorkspace: none) +* + CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: none) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, DUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL CGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL CGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL CCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGELSS +* + END diff --git a/dspl/liblapack/SRC/cgelsy.f b/dspl/liblapack/SRC/cgelsy.f new file mode 100644 index 0000000..d235087 --- /dev/null +++ b/dspl/liblapack/SRC/cgelsy.f @@ -0,0 +1,477 @@ +*> \brief CGELSY solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGELSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGELSY computes the minimum-norm solution to a complex linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by unitary transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**H [ inv(T11)*Q1**H*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> +*> This routine is basically identical to the original xGELSX except +*> three differences: +*> o The permutation of matrix B (the right hand side) is faster and +*> more simple. +*> o The call to the subroutine xGEQPF has been substituted by the +*> the call to the subroutine xGEQP3. This subroutine is a Blas-3 +*> version of the QR factorization with column pivoting. +*> o Matrix B (the right hand side) is updated with Blas-3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of AP, otherwise column i is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> The unblocked strategy requires that: +*> LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) +*> where MN = min(M,N). +*> The block algorithm requires that: +*> LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) +*> where NB is an upper bound on the blocksize returned +*> by ILAENV for the routines CGEQP3, CTZRZF, CTZRQF, CUNMQR, +*> and CUNMRZ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEsolve +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n +*> E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> +* ===================================================================== + SUBROUTINE CGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM, WSIZE + COMPLEX C1, C2, S1, S2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEQP3, CLAIC1, CLASCL, CLASET, CTRSM, + $ CTZRZF, CUNMQR, CUNMRZ, SLABAD, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL CLANGE, ILAENV, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, CMPLX +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS ) + WORK( 1 ) = CMPLX( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL CGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, RWORK, INFO ) + WSIZE = MN + REAL( WORK( MN+1 ) ) +* +* complex workspace: MN+NB*(N+1). real workspace 2*N. +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* complex workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL CTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* complex workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+REAL( WORK( 2*MN+1 ) ) ) +* +* complex workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**H * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL CUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK, + $ N-RANK, A, LDA, WORK( MN+1 ), B, LDB, + $ WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + END IF +* +* complex workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL CCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* complex workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = CMPLX( LWKOPT ) +* + RETURN +* +* End of CGELSY +* + END diff --git a/dspl/liblapack/SRC/cgemlq.f b/dspl/liblapack/SRC/cgemlq.f new file mode 100644 index 0000000..2f44e7c --- /dev/null +++ b/dspl/liblapack/SRC/cgemlq.f @@ -0,0 +1,283 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> where Q is a complex unitary matrix defined as the product +*> of blocked elementary reflectors computed by short wide +*> LQ factorization (CGELQ) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> Part of the data structure to represent Q as returned by CGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by CGELQ. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> CLASWQR or CGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, CGELQ will use either +*> CLASWLQ (if the matrix is wide-and-short) or CGELQT to compute +*> the LQ factorization. +*> This version of CGEMLQ will use either CLAMSWLQ or CGEMLQT to +*> multiply matrix Q by another matrix. +*> Further Details in CLAMSWLQ or CGEMLQT. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLAMSWLQ, CGEMLQT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * MB + MN = M + ELSE + LW = M * MB + MN = N + END IF +* + IF( ( NB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, NB - K ) .EQ. 0 ) THEN + NBLCKS = ( MN - K ) / ( NB - K ) + ELSE + NBLCKS = ( MN - K ) / ( NB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = REAL( LW ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( NB.LE.K ) .OR. ( NB.GE.MAX( M, N, K ) ) ) THEN + CALL CGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T( 6 ), MB, C, LDC, WORK, INFO ) + ELSE + CALL CLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ MB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = REAL( LW ) +* + RETURN +* +* End of CGEMLQ +* + END diff --git a/dspl/liblapack/SRC/cgemlqt.f b/dspl/liblapack/SRC/cgemlqt.f new file mode 100644 index 0000000..e35e421 --- /dev/null +++ b/dspl/liblapack/SRC/cgemlqt.f @@ -0,0 +1,272 @@ +* Definition: +* =========== +* +* SUBROUTINE CGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. +* COMPLEX V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEMLQT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'C': Q**H C C Q**H +*> +*> where Q is a complex orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**H +*> +*> generated using the compact WY representation as returned by CGELQT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DGELQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQT in the first K rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DGELQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array. The dimension of +*> WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE CGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. + COMPLEX V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0) THEN + INFO = -5 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL CLARFB( 'L', 'C', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL CLARFB( 'R', 'N', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL CLARFB( 'L', 'N', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL CLARFB( 'R', 'C', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of CGEMLQT +* + END diff --git a/dspl/liblapack/SRC/cgemqr.f b/dspl/liblapack/SRC/cgemqr.f new file mode 100644 index 0000000..a43d7be --- /dev/null +++ b/dspl/liblapack/SRC/cgemqr.f @@ -0,0 +1,285 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (CGEQR) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> Part of the data structure to represent Q as returned by CGEQR. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by CGEQR. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> CLATSQR or CGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, CGEQR will use either +*> CLATSQR (if the matrix is tall-and-skinny) or CGEQRT to compute +*> the QR factorization. +*> This version of CGEMQR will use either CLAMTSQR or CGEMQRT to +*> multiply matrix Q by another matrix. +*> Further Details in CLAMTSQR or CGEMQRT. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMQRT, CLAMTSQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * NB + MN = M + ELSE + LW = MB * NB + MN = N + END IF +* + IF( ( MB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, MB - K ).EQ.0 ) THEN + NBLCKS = ( MN - K ) / ( MB - K ) + ELSE + NBLCKS = ( MN - K ) / ( MB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, MN ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( MB.LE.K ) .OR. ( MB.GE.MAX( M, N, K ) ) ) THEN + CALL CGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, INFO ) + ELSE + CALL CLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of CGEMQR +* + END diff --git a/dspl/liblapack/SRC/cgemqrt.f b/dspl/liblapack/SRC/cgemqrt.f new file mode 100644 index 0000000..4926fb7 --- /dev/null +++ b/dspl/liblapack/SRC/cgemqrt.f @@ -0,0 +1,291 @@ +*> \brief \b CGEMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. +* COMPLEX V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEMQRT overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'C': Q**H C C Q**H +*> +*> where Q is a complex orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**H +*> +*> generated using the compact WY representation as returned by CGEQRT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CGEQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRT in the first K columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CGEQRT, stored as a NB-by-N matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. + COMPLEX V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF, Q +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + Q = M + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + Q = N + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.Q ) THEN + INFO = -5 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, Q ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL CLARFB( 'L', 'C', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL CLARFB( 'R', 'N', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL CLARFB( 'L', 'N', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL CLARFB( 'R', 'C', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of CGEMQRT +* + END diff --git a/dspl/liblapack/SRC/cgeql2.f b/dspl/liblapack/SRC/cgeql2.f new file mode 100644 index 0000000..b9521ba --- /dev/null +++ b/dspl/liblapack/SRC/cgeql2.f @@ -0,0 +1,192 @@ +*> \brief \b CGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQL2 computes a QL factorization of a complex m by n matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the m by n lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + ALPHA = A( M-K+I, N-K+I ) + CALL CLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) +* +* Apply H(i)**H to A(1:m-k+i,1:n-k+i-1) from the left +* + A( M-K+I, N-K+I ) = ONE + CALL CLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ CONJG( TAU( I ) ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + 10 CONTINUE + RETURN +* +* End of CGEQL2 +* + END diff --git a/dspl/liblapack/SRC/cgeqlf.f b/dspl/liblapack/SRC/cgeqlf.f new file mode 100644 index 0000000..fdd03f9 --- /dev/null +++ b/dspl/liblapack/SRC/cgeqlf.f @@ -0,0 +1,287 @@ +*> \brief \b CGEQLF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQLF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQLF computes a QL factorization of a complex M-by-N matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the M-by-N lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQL2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL CGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL CGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQLF +* + END diff --git a/dspl/liblapack/SRC/cgeqp3.f b/dspl/liblapack/SRC/cgeqp3.f new file mode 100644 index 0000000..e3d109d --- /dev/null +++ b/dspl/liblapack/SRC/cgeqp3.f @@ -0,0 +1,372 @@ +*> \brief \b CGEQP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQP3 computes a QR factorization with column pivoting of a +*> matrix A: A*P = Q*R using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper trapezoidal matrix R; the elements below +*> the diagonal, together with the array TAU, represent the +*> unitary matrix Q as a product of min(M,N) elementary +*> reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(J).ne.0, the J-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(J)=0, +*> the J-th column of A is a free column. +*> On exit, if JPVT(J)=K, then the J-th column of A*P was the +*> the K-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N+1. +*> For optimal performance LWORK >= ( N+1 )*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a real/complex vector +*> with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +*> A(i+1:m,i), and tau in TAU(i). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +* ===================================================================== + SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SCNRM2 + EXTERNAL ILAENV, SCNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* ==================== +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = N + 1 + NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = ( N + 1 )*NB + END IF + WORK( 1 ) = CMPLX( LWKOPT ) +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA, +*CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK, +*CC $ INFO ) + CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A, + $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK, + $ INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), RWORK( J ), + $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ), + $ N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) ) +* + END IF +* + WORK( 1 ) = CMPLX( LWKOPT ) + RETURN +* +* End of CGEQP3 +* + END diff --git a/dspl/liblapack/SRC/cgeqr.f b/dspl/liblapack/SRC/cgeqr.f new file mode 100644 index 0000000..a00ef45 --- /dev/null +++ b/dspl/liblapack/SRC/cgeqr.f @@ -0,0 +1,307 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CGEQR computes a QR factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R +*> (R is upper triangular if M >= N); +*> the elements below the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any QR factorization algorithm they wish. The triangular +*> (trapezoidal) R has to be stored in the upper part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> CLATSQR or CGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, CGEQR will use either +*> CLATSQR (if the matrix is tall-and-skinny) or CGEQRT to compute +*> the QR factorization. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLATSQR, CGEQRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'CGEQR ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'CGEQR ', ' ', M, N, 2, -1 ) + ELSE + MB = M + NB = 1 + END IF + IF( MB.GT.M .OR. MB.LE.N ) MB = M + IF( NB.GT.MIN( M, N ) .OR. NB.LT.1 ) NB = 1 + MINTSZ = N + 5 + IF( MB.GT.N .AND. M.GT.N ) THEN + IF( MOD( M - N, MB - N ).EQ.0 ) THEN + NBLCKS = ( M - N ) / ( MB - N ) + ELSE + NBLCKS = ( M - N ) / ( MB - N ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) .OR. LWORK.LT.NB*N ) + $ .AND. ( LWORK.GE.N ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + NB = 1 + MB = M + END IF + IF( LWORK.LT.NB*N ) THEN + LMINWS = .TRUE. + NB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, N*NB ) ) .AND. ( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = NB*N*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, NB*N ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF( ( M.LE.N ) .OR. ( MB.LE.N ) .OR. ( MB.GE.M ) ) THEN + CALL CGEQRT( M, N, NB, A, LDA, T( 6 ), NB, WORK, INFO ) + ELSE + CALL CLATSQR( M, N, MB, NB, A, LDA, T( 6 ), NB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, NB*N ) +* + RETURN +* +* End of CGEQR +* + END diff --git a/dspl/liblapack/SRC/cgeqr2.f b/dspl/liblapack/SRC/cgeqr2.f new file mode 100644 index 0000000..1b2030b --- /dev/null +++ b/dspl/liblapack/SRC/cgeqr2.f @@ -0,0 +1,192 @@ +*> \brief \b CGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQR2 computes a QR factorization of a complex m by n matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL CLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of CGEQR2 +* + END diff --git a/dspl/liblapack/SRC/cgeqr2p.f b/dspl/liblapack/SRC/cgeqr2p.f new file mode 100644 index 0000000..3c64255 --- /dev/null +++ b/dspl/liblapack/SRC/cgeqr2p.f @@ -0,0 +1,195 @@ +*> \brief \b CGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagonal elements using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQR2P + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQR2P computes a QR factorization of a complex m by n matrix A: +*> A = Q * R. The diagonal entries of R are real and nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R are +*> real and nonnegative; the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFGP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQR2P', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL CLARFGP( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of CGEQR2P +* + END diff --git a/dspl/liblapack/SRC/cgeqrf.f b/dspl/liblapack/SRC/cgeqrf.f new file mode 100644 index 0000000..8333847 --- /dev/null +++ b/dspl/liblapack/SRC/cgeqrf.f @@ -0,0 +1,270 @@ +*> \brief \b CGEQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQRF computes a QR factorization of a complex M-by-N matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQRF +* + END diff --git a/dspl/liblapack/SRC/cgeqrfp.f b/dspl/liblapack/SRC/cgeqrfp.f new file mode 100644 index 0000000..a56508b --- /dev/null +++ b/dspl/liblapack/SRC/cgeqrfp.f @@ -0,0 +1,273 @@ +*> \brief \b CGEQRFP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQRFP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQRFP computes a QR factorization of a complex M-by-N matrix A: +*> A = Q * R. The diagonal entries of R are real and nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are real and nonnegative; the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CGEQR2P, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRFP', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL CGEQR2P( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL CGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGEQRFP +* + END diff --git a/dspl/liblapack/SRC/cgeqrt.f b/dspl/liblapack/SRC/cgeqrt.f new file mode 100644 index 0000000..ef9c1c3 --- /dev/null +++ b/dspl/liblapack/SRC/cgeqrt.f @@ -0,0 +1,218 @@ +*> \brief \b CGEQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQRT computes a blocked QR factorization of a complex M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if M >= N); the elements below the diagonal +*> are the columns of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K + LOGICAL USE_RECURSIVE_QR + PARAMETER( USE_RECURSIVE_QR=.TRUE. ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRT2, CGEQRT3, CLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NB.LT.1 .OR. ( NB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block A(I:M,I:I+IB-1) +* + IF( USE_RECURSIVE_QR ) THEN + CALL CGEQRT3( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + ELSE + CALL CGEQRT2( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + END IF + IF( I+IB.LE.N ) THEN +* +* Update by applying H**H to A(I:M,I+IB:N) from the left +* + CALL CLARFB( 'L', 'C', 'F', 'C', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, WORK , N-I-IB+1 ) + END IF + END DO + RETURN +* +* End of CGEQRT +* + END diff --git a/dspl/liblapack/SRC/cgeqrt2.f b/dspl/liblapack/SRC/cgeqrt2.f new file mode 100644 index 0000000..9ee3e4f --- /dev/null +++ b/dspl/liblapack/SRC/cgeqrt2.f @@ -0,0 +1,227 @@ +*> \brief \b CGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQRT2 computes a QR factorization of a complex M-by-N matrix A, +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the complex M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**H +*> +*> where V**H is the conjugate transpose of V. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER( ONE = (1.0,0.0), ZERO = (0.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX AII, ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CGEMV, CGERC, CTRMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRT2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO I = 1, K +* +* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) +* + CALL CLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(I:M,I+1:N) from the left +* + AII = A( I, I ) + A( I, I ) = ONE +* +* W(1:N-I) := A(I:M,I+1:N)**H * A(I:M,I) [W = T(:,N)] +* + CALL CGEMV( 'C',M-I+1, N-I, ONE, A( I, I+1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, N ), 1 ) +* +* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)**H +* + ALPHA = -CONJG(T( I, 1 )) + CALL CGERC( M-I+1, N-I, ALPHA, A( I, I ), 1, + $ T( 1, N ), 1, A( I, I+1 ), LDA ) + A( I, I ) = AII + END IF + END DO +* + DO I = 2, N + AII = A( I, I ) + A( I, I ) = ONE +* +* T(1:I-1,I) := alpha * A(I:M,1:I-1)**H * A(I:M,I) +* + ALPHA = -T( I, 1 ) + CALL CGEMV( 'C', M-I+1, I-1, ALPHA, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, I ), 1 ) + A( I, I ) = AII +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL CTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1) = ZERO + END DO + +* +* End of CGEQRT2 +* + END diff --git a/dspl/liblapack/SRC/cgeqrt3.f b/dspl/liblapack/SRC/cgeqrt3.f new file mode 100644 index 0000000..e3cfeea --- /dev/null +++ b/dspl/liblapack/SRC/cgeqrt3.f @@ -0,0 +1,257 @@ +*> \brief CGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE CGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGEQRT3 recursively computes a QR factorization of a complex M-by-N matrix A, +*> using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the complex M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**H +*> +*> where V**H is the conjugate transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE CGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, N1, N2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CTRMM, CGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N .LT. 0 ) THEN + INFO = -2 + ELSE IF( M .LT. N ) THEN + INFO = -1 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGEQRT3', -INFO ) + RETURN + END IF +* + IF( N.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL CLARFG( M, A(1,1), A( MIN( 2, M ), 1 ), 1, T(1,1) ) +* + ELSE +* +* Otherwise, split A into blocks... +* + N1 = N/2 + N2 = N-N1 + J1 = MIN( N1+1, N ) + I1 = MIN( N+1, M ) +* +* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1**H +* + CALL CGEQRT3( M, N1, A, LDA, T, LDT, IINFO ) +* +* Compute A(1:M,J1:N) = Q1**H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] +* + DO J=1,N2 + DO I=1,N1 + T( I, J+N1 ) = A( I, J+N1 ) + END DO + END DO + CALL CTRMM( 'L', 'L', 'C', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + CALL CGEMM( 'C', 'N', N1, N2, M-N1, ONE, A( J1, 1 ), LDA, + & A( J1, J1 ), LDA, ONE, T( 1, J1 ), LDT) +* + CALL CTRMM( 'L', 'U', 'C', 'N', N1, N2, ONE, + & T, LDT, T( 1, J1 ), LDT ) +* + CALL CGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( J1, 1 ), LDA, + & T( 1, J1 ), LDT, ONE, A( J1, J1 ), LDA ) +* + CALL CTRMM( 'L', 'L', 'N', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + DO J=1,N2 + DO I=1,N1 + A( I, J+N1 ) = A( I, J+N1 ) - T( I, J+N1 ) + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2**H +* + CALL CGEQRT3( M-N1, N2, A( J1, J1 ), LDA, + & T( J1, J1 ), LDT, IINFO ) +* +* Compute T3 = T(1:N1,J1:N) = -T1 Y1**H Y2 T2 +* + DO I=1,N1 + DO J=1,N2 + T( I, J+N1 ) = CONJG(A( J+N1, I )) + END DO + END DO +* + CALL CTRMM( 'R', 'L', 'N', 'U', N1, N2, ONE, + & A( J1, J1 ), LDA, T( 1, J1 ), LDT ) +* + CALL CGEMM( 'C', 'N', N1, N2, M-N, ONE, A( I1, 1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, J1 ), LDT ) +* + CALL CTRMM( 'L', 'U', 'N', 'N', N1, N2, -ONE, T, LDT, + & T( 1, J1 ), LDT ) +* + CALL CTRMM( 'R', 'U', 'N', 'N', N1, N2, ONE, + & T( J1, J1 ), LDT, T( 1, J1 ), LDT ) +* +* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] +* [ 0 R2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of CGEQRT3 +* + END diff --git a/dspl/liblapack/SRC/cgerfs.f b/dspl/liblapack/SRC/cgerfs.f new file mode 100644 index 0000000..6178321 --- /dev/null +++ b/dspl/liblapack/SRC/cgerfs.f @@ -0,0 +1,448 @@ +*> \brief \b CGERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGERFS improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates for +*> the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from CGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGETRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = CABS1( X( K, J ) ) + DO 40 I = 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CGERFS +* + END diff --git a/dspl/liblapack/SRC/cgerfsx.f b/dspl/liblapack/SRC/cgerfsx.f new file mode 100644 index 0000000..7b72f9c --- /dev/null +++ b/dspl/liblapack/SRC/cgerfsx.f @@ -0,0 +1,734 @@ +*> \brief \b CGERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ), WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGERFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from CGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. +*> If R is accessed, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. +*> If C is accessed, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ), WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CGECON, CLA_GERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL SLAMCH, CLANGE, CLA_GERCOND_X, CLA_GERCOND_C + REAL SLAMCH, CLANGE, CLA_GERCOND_X, CLA_GERCOND_C + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS(LA_LINRX_ITHRESH_I) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = CLANGE( NORM, N, N, A, LDA, RWORK ) + CALL CGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + IF ( NOTRAN ) THEN + CALL CLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + ELSE + CALL CLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = CLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ C, .TRUE., INFO, WORK, RWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = CLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ R, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = CLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ C, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF (ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = CLA_GERCOND_X( TRANS, N, A, LDA, AF, LDAF, + $ IPIV, X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of CGERFSX +* + END diff --git a/dspl/liblapack/SRC/cgerq2.f b/dspl/liblapack/SRC/cgerq2.f new file mode 100644 index 0000000..b0844ea --- /dev/null +++ b/dspl/liblapack/SRC/cgerq2.f @@ -0,0 +1,195 @@ +*> \brief \b CGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGERQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGERQ2 computes an RQ factorization of a complex m by n matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the m by n upper trapezoidal matrix R; the remaining +*> elements, with the array TAU, represent the unitary matrix +*> Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +*> exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL CLACGV( N-K+I, A( M-K+I, 1 ), LDA ) + ALPHA = A( M-K+I, N-K+I ) + CALL CLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + A( M-K+I, N-K+I ) = ONE + CALL CLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + CALL CLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CGERQ2 +* + END diff --git a/dspl/liblapack/SRC/cgerqf.f b/dspl/liblapack/SRC/cgerqf.f new file mode 100644 index 0000000..4ae26e8 --- /dev/null +++ b/dspl/liblapack/SRC/cgerqf.f @@ -0,0 +1,287 @@ +*> \brief \b CGERQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGERQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGERQF computes an RQ factorization of a complex M-by-N matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of min(m,n) elementary +*> reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +*> exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL CGERQ2, CLARFB, CLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL CGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL CLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL CGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of CGERQF +* + END diff --git a/dspl/liblapack/SRC/cgesc2.f b/dspl/liblapack/SRC/cgesc2.f new file mode 100644 index 0000000..c0b9110 --- /dev/null +++ b/dspl/liblapack/SRC/cgesc2.f @@ -0,0 +1,202 @@ +*> \brief \b CGESC2 solves a system of linear equations using the LU factorization with complete pivoting computed by sgetc2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* .. Scalar Arguments .. +* INTEGER LDA, N +* REAL SCALE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX A( LDA, * ), RHS( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESC2 solves a system of linear equations +*> +*> A * X = scale* RHS +*> +*> with a general N-by-N matrix A using the LU factorization with +*> complete pivoting computed by CGETC2. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix A computed by CGETC2: A = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is COMPLEX array, dimension N. +*> On entry, the right hand side vector b. +*> On exit, the solution vector X. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> 0 <= SCALE <= 1 to prevent owerflow in the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE CGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX A( LDA, * ), RHS( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, EPS, SMLNUM + COMPLEX TEMP +* .. +* .. External Subroutines .. + EXTERNAL CLASWP, CSCAL, SLABAD +* .. +* .. External Functions .. + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL ICAMAX, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Set constant to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL CLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = ICAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = CMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) ) + CALL CSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*REAL( TEMP ) + END IF + DO 40 I = N, 1, -1 + TEMP = CMPLX( ONE, ZERO ) / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL CLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of CGESC2 +* + END diff --git a/dspl/liblapack/SRC/cgesdd.f b/dspl/liblapack/SRC/cgesdd.f new file mode 100644 index 0000000..0734159 --- /dev/null +++ b/dspl/liblapack/SRC/cgesdd.f @@ -0,0 +1,2220 @@ +*> \brief \b CGESDD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESDD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), S( * ) +* COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESDD computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors, by using divide-and-conquer method. The SVD is written +*> +*> A = U * SIGMA * conjugate-transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns VT = V**H, not V. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U and all N rows of V**H are +*> returned in the arrays U and VT; +*> = 'S': the first min(M,N) columns of U and the first +*> min(M,N) rows of V**H are returned in the arrays U +*> and VT; +*> = 'O': If M >= N, the first N columns of U are overwritten +*> in the array A and all rows of V**H are returned in +*> the array VT; +*> otherwise, all columns of U are returned in the +*> array U and the first M rows of V**H are overwritten +*> in the array A; +*> = 'N': no columns of U or rows of V**H are computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBZ = 'O', A is overwritten with the first N columns +*> of U (the left singular vectors, stored +*> columnwise) if M >= N; +*> A is overwritten with the first M rows +*> of V**H (the right singular vectors, stored +*> rowwise) otherwise. +*> if JOBZ .ne. 'O', the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,UCOL) +*> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +*> UCOL = min(M,N) if JOBZ = 'S'. +*> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +*> unitary matrix U; +*> if JOBZ = 'S', U contains the first min(M,N) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; +*> if JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX array, dimension (LDVT,N) +*> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +*> N-by-N unitary matrix V**H; +*> if JOBZ = 'S', VT contains the first min(M,N) rows of +*> V**H (the right singular vectors, stored rowwise); +*> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; +*> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +*> if JOBZ = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> If LWORK = -1, a workspace query is assumed. The optimal +*> size for the WORK array is calculated and stored in WORK(1), +*> and no other work except argument checking is performed. +*> +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LWORK >= 2*mn + mx. +*> If JOBZ = 'O', LWORK >= 2*mn*mn + 2*mn + mx. +*> If JOBZ = 'S', LWORK >= mn*mn + 3*mn. +*> If JOBZ = 'A', LWORK >= mn*mn + 2*mn + mx. +*> These are not tight minimums in all cases; see comments inside code. +*> For good performance, LWORK should generally be larger; +*> a query is recommended. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LRWORK >= 5*mn (LAPACK <= 3.6 needs 7*mn); +*> else if mx >> mn, LRWORK >= 5*mn*mn + 5*mn; +*> else LRWORK >= max( 5*mn*mn + 5*mn, +*> 2*mx*mn + 2*mn*mn + mn ). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The updating process of SBDSDC did not converge. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE CGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT, + $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL + INTEGER LWORK_CGEBRD_MN, LWORK_CGEBRD_MM, + $ LWORK_CGEBRD_NN, LWORK_CGELQF_MN, + $ LWORK_CGEQRF_MN, + $ LWORK_CUNGBR_P_MN, LWORK_CUNGBR_P_NN, + $ LWORK_CUNGBR_Q_MN, LWORK_CUNGBR_Q_MM, + $ LWORK_CUNGLQ_MN, LWORK_CUNGLQ_NN, + $ LWORK_CUNGQR_MM, LWORK_CUNGQR_MN, + $ LWORK_CUNMBR_PRC_MM, LWORK_CUNMBR_QLN_MM, + $ LWORK_CUNMBR_PRC_MN, LWORK_CUNMBR_QLN_MN, + $ LWORK_CUNMBR_PRC_NN, LWORK_CUNMBR_QLN_NN + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) + COMPLEX CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEMM, CGEQRF, CLACP2, CLACPY, + $ CLACRM, CLARCM, CLASCL, CLASET, CUNGBR, CUNGLQ, + $ CUNGQR, CUNMBR, SBDSDC, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, CLANGE + EXTERNAL LSAME, SLAMCH, CLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR1 = INT( MINMN*17.0E0 / 9.0E0 ) + MNTHR2 = INT( MINMN*5.0E0 / 3.0E0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + MINWRK = 1 + MAXWRK = 1 +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace allocated at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD (sbdsdc) is +* BDSPAC = 3*N*N + 4*N for singular values and vectors; +* BDSPAC = 4*N for singular values only; +* not including e, RU, and RVT matrices. +* +* Compute space preferred for each routine + CALL CGEBRD( M, N, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD_MN = INT( CDUM(1) ) +* + CALL CGEBRD( N, N, CDUM(1), N, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD_NN = INT( CDUM(1) ) +* + CALL CGEQRF( M, N, CDUM(1), M, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEQRF_MN = INT( CDUM(1) ) +* + CALL CUNGBR( 'P', N, N, N, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_P_NN = INT( CDUM(1) ) +* + CALL CUNGBR( 'Q', M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_Q_MM = INT( CDUM(1) ) +* + CALL CUNGBR( 'Q', M, N, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_Q_MN = INT( CDUM(1) ) +* + CALL CUNGQR( M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGQR_MM = INT( CDUM(1) ) +* + CALL CUNGQR( M, N, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGQR_MN = INT( CDUM(1) ) +* + CALL CUNMBR( 'P', 'R', 'C', N, N, N, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_CUNMBR_PRC_NN = INT( CDUM(1) ) +* + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_CUNMBR_QLN_MM = INT( CDUM(1) ) +* + CALL CUNMBR( 'Q', 'L', 'N', M, N, N, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_CUNMBR_QLN_MN = INT( CDUM(1) ) +* + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_CUNMBR_QLN_NN = INT( CDUM(1) ) +* + IF( M.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* + MAXWRK = N + LWORK_CGEQRF_MN + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CGEBRD_NN ) + MINWRK = 3*N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* + WRKBL = N + LWORK_CGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_CUNGQR_MN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_PRC_NN ) + MAXWRK = M*N + N*N + WRKBL + MINWRK = 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* + WRKBL = N + LWORK_CGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_CUNGQR_MN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_PRC_NN ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* + WRKBL = N + LWORK_CGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_CUNGQR_MM ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_CUNMBR_PRC_NN ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + MAX( 3*N, N + M ) + END IF + ELSE IF( M.GE.MNTHR2 ) THEN +* +* Path 5 (M >> N, but not as much as MNTHR1) +* + MAXWRK = 2*N + LWORK_CGEBRD_MN + MINWRK = 2*N + M + IF( WNTQO ) THEN +* Path 5o (M >> N, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_Q_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN +* Path 5s (M >> N, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_Q_MN ) + ELSE IF( WNTQA ) THEN +* Path 5a (M >> N, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNGBR_Q_MM ) + END IF + ELSE +* +* Path 6 (M >= N, but not much larger) +* + MAXWRK = 2*N + LWORK_CGEBRD_MN + MINWRK = 2*N + M + IF( WNTQO ) THEN +* Path 6o (M >= N, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_PRC_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_QLN_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN +* Path 6s (M >= N, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_QLN_MN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_PRC_NN ) + ELSE IF( WNTQA ) THEN +* Path 6a (M >= N, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_CUNMBR_PRC_NN ) + END IF + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD (sbdsdc) is +* BDSPAC = 3*M*M + 4*M for singular values and vectors; +* BDSPAC = 4*M for singular values only; +* not including e, RU, and RVT matrices. +* +* Compute space preferred for each routine + CALL CGEBRD( M, N, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD_MN = INT( CDUM(1) ) +* + CALL CGEBRD( M, M, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD_MM = INT( CDUM(1) ) +* + CALL CGELQF( M, N, CDUM(1), M, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGELQF_MN = INT( CDUM(1) ) +* + CALL CUNGBR( 'P', M, N, M, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_P_MN = INT( CDUM(1) ) +* + CALL CUNGBR( 'P', N, N, M, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_P_NN = INT( CDUM(1) ) +* + CALL CUNGBR( 'Q', M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGBR_Q_MM = INT( CDUM(1) ) +* + CALL CUNGLQ( M, N, M, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGLQ_MN = INT( CDUM(1) ) +* + CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_CUNGLQ_NN = INT( CDUM(1) ) +* + CALL CUNMBR( 'P', 'R', 'C', M, M, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_CUNMBR_PRC_MM = INT( CDUM(1) ) +* + CALL CUNMBR( 'P', 'R', 'C', M, N, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_CUNMBR_PRC_MN = INT( CDUM(1) ) +* + CALL CUNMBR( 'P', 'R', 'C', N, N, M, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_CUNMBR_PRC_NN = INT( CDUM(1) ) +* + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_CUNMBR_QLN_MM = INT( CDUM(1) ) +* + IF( N.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* + MAXWRK = M + LWORK_CGELQF_MN + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CGEBRD_MM ) + MINWRK = 3*M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* + WRKBL = M + LWORK_CGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_CUNGLQ_MN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_PRC_MM ) + MAXWRK = M*N + M*M + WRKBL + MINWRK = 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* + WRKBL = M + LWORK_CGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_CUNGLQ_MN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_PRC_MM ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* + WRKBL = M + LWORK_CGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_CUNGLQ_NN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_CUNMBR_PRC_MM ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + MAX( 3*M, M + N ) + END IF + ELSE IF( N.GE.MNTHR2 ) THEN +* +* Path 5t (N >> M, but not as much as MNTHR1) +* + MAXWRK = 2*M + LWORK_CGEBRD_MN + MINWRK = 2*M + N + IF( WNTQO ) THEN +* Path 5to (N >> M, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_P_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN +* Path 5ts (N >> M, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_P_MN ) + ELSE IF( WNTQA ) THEN +* Path 5ta (N >> M, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNGBR_P_NN ) + END IF + ELSE +* +* Path 6t (N > M, but not much larger) +* + MAXWRK = 2*M + LWORK_CGEBRD_MN + MINWRK = 2*M + N + IF( WNTQO ) THEN +* Path 6to (N > M, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_PRC_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN +* Path 6ts (N > M, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_PRC_MN ) + ELSE IF( WNTQA ) THEN +* Path 6ta (N > M, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_CUNMBR_PRC_NN ) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = MAXWRK + IF( LWORK.LT.MINWRK .AND. .NOT. LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N [tau] + N [work] +* CWorkspace: prefer N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + N +* +* Perform bidiagonal SVD, compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + IF( LWORK .GE. M*N + N*N + 3*N ) THEN +* +* WORK(IR) is M by N +* + LDWRKR = M + ELSE + LDWRKR = ( LWORK - N*N - 3*N ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK( IR ), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of R in WORK(IRU) and computing right singular vectors +* of R in WORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of R +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by the right singular vectors of R +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* CWorkspace: need N*N [U] + N*N [R] +* CWorkspace: prefer N*N [U] + M*N [R] +* RWorkspace: need 0 +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, CZERO, + $ WORK( IR ), LDWRKR ) + CALL CLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* CWorkspace: need N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of R +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* CWorkspace: need N*N [R] +* RWorkspace: need 0 +* + CALL CLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ), + $ LDWRKR, CZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* CWorkspace: need N*N [U] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* CWorkspace: need N*N [U] + N [tau] + M [work] +* CWorkspace: prefer N*N [U] + N [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out below it +* + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of R +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* CWorkspace: need N*N [U] +* RWorkspace: need 0 +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ), + $ LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE IF( M.GE.MNTHR2 ) THEN +* +* MNTHR2 <= M < MNTHR1 +* +* Path 5 (M >> N, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* CUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5n (M >> N, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Path 5o (M >> N, JOBZ='O') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate Q in A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK(IU) is LDWRKU by N +* + LDWRKU = ( LWORK - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in WORK(IU), copying to VT +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, + $ WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] +* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] +* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ), + $ N, WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 5s (M >> N, JOBZ='S') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) + ELSE +* +* Path 5a (M >> N, JOBZ='A') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) + END IF +* + ELSE +* +* M .LT. MNTHR2 +* +* Path 6 (M >= N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* Use CUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need N [e] +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 6n (M >= N, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + IF( LWORK .GE. M*N + 3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK( IU ) is LDWRKU by N +* + LDWRKU = ( LWORK - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Path 6o (M >= N, JOBZ='O') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*N ) THEN +* +* Path 6o-fast +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of A, copying +* to A +* CWorkspace: need 2*N [tauq, taup] + M*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] +* + CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IU ), + $ LDWRKU ) + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL CLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Path 6o-slow +* Generate Q in A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] +* RWorkspace: need 0 +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] +* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] +* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + DO 30 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CLACRM( CHUNK, N, A( I, 1 ), LDA, + $ RWORK( IRU ), N, WORK( IU ), LDWRKU, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 30 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Path 6s (M >= N, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL CLASET( 'F', M, N, CZERO, CZERO, U, LDU ) + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Path 6a (M >= N, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Set the right corner of U to identity matrix +* + CALL CLASET( 'F', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.N ) THEN + CALL CLASET( 'F', M-N, M-N, CZERO, CONE, + $ U( N+1, N+1 ), LDU ) + END IF +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M [tau] + M [work] +* CWorkspace: prefer M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + M +* +* Perform bidiagonal SVD, compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 + LDWKVT = M +* +* WORK(IVT) is M by M +* + IL = IVT + LDWKVT*M + IF( LWORK .GE. M*N + M*M + 3*M ) THEN +* +* WORK(IL) M by N +* + LDWRKL = M + CHUNK = N + ELSE +* +* WORK(IL) is M by CHUNK +* + LDWRKL = M + CHUNK = ( LWORK - M*M - 3*M ) / M + END IF + ITAU = IL + LDWRKL*CHUNK + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of L +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by the right singular vectors of L +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by Q +* in A, storing result in WORK(IL) and copying to A +* CWorkspace: need M*M [VT] + M*M [L] +* CWorkspace: prefer M*M [VT] + M*N [L] +* RWorkspace: need 0 +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M, + $ A( 1, I ), LDA, CZERO, WORK( IL ), + $ LDWRKL ) + CALL CLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* CWorkspace: need M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by left singular vectors of L +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy VT to WORK(IL), multiply right singular vectors of L +* in WORK(IL) by Q in A, storing result in VT +* CWorkspace: need M*M [L] +* RWorkspace: need 0 +* + CALL CLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL, + $ A, LDA, CZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* CWorkspace: need M*M [VT] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* CWorkspace: need M*M [VT] + M [tau] + N [work] +* CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out above it +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of L +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* CWorkspace: need M*M [VT] +* RWorkspace: need 0 +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE IF( N.GE.MNTHR2 ) THEN +* +* MNTHR2 <= N < MNTHR1 +* +* Path 5t (N >> M, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* CUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* + IF( WNTQN ) THEN +* +* Path 5tn (N >> M, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + IVT = NWORK +* +* Path 5to (N >> M, JOBZ='O') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate P**H in A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + LDWKVT = M + IF( LWORK .GE. M*N + 3*M ) THEN +* +* WORK( IVT ) is M by N +* + NWORK = IVT + LDWKVT*N + CHUNK = N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK - 3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRVT) +* storing the result in WORK(IVT), copying to U +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ), + $ LDWKVT, RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU ) +* +* Multiply RWORK(IRVT) by P**H in A, storing the +* result in WORK(IVT), copying to A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] +* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] +* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + DO 50 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA, + $ WORK( IVT ), LDWKVT, RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 50 CONTINUE + ELSE IF( WNTQS ) THEN +* +* Path 5ts (N >> M, JOBZ='S') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) + ELSE +* +* Path 5ta (N >> M, JOBZ='A') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) + END IF +* + ELSE +* +* N .LT. MNTHR2 +* +* Path 6t (N > M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* Use CUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need M [e] +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 6tn (N > M, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 6to (N > M, JOBZ='O') + LDWKVT = M + IVT = NWORK + IF( LWORK .GE. M*N + 3*M ) THEN +* +* WORK( IVT ) is M by N +* + CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK - 3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*M ) THEN +* +* Path 6to-fast +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of A, +* copying to A +* CWorkspace: need 2*M [tauq, taup] + M*N [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL CLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Path 6to-slow +* Generate P**H in A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] +* RWorkspace: need 0 +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] +* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] +* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + DO 60 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), + $ LDA, WORK( IVT ), LDWKVT, + $ RWORK( NRWORK ) ) + CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 60 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Path 6ts (N > M, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL CLASET( 'F', M, N, CZERO, CZERO, VT, LDVT ) + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Path 6ta (N > M, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M +* + CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Set all of VT to identity matrix +* + CALL CLASET( 'F', N, N, CZERO, CONE, VT, LDVT ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL CUNMBR( 'P', 'R', 'C', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGESDD +* + END diff --git a/dspl/liblapack/SRC/cgesv.f b/dspl/liblapack/SRC/cgesv.f new file mode 100644 index 0000000..2b0fb21 --- /dev/null +++ b/dspl/liblapack/SRC/cgesv.f @@ -0,0 +1,179 @@ +*> \brief CGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as +*> A = P * L * U, +*> where P is a permutation matrix, L is unit lower triangular, and U is +*> upper triangular. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CGETRF, CGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL CGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of CGESV +* + END diff --git a/dspl/liblapack/SRC/cgesvd.f b/dspl/liblapack/SRC/cgesvd.f new file mode 100644 index 0000000..cdcf9db --- /dev/null +++ b/dspl/liblapack/SRC/cgesvd.f @@ -0,0 +1,3706 @@ +*> \brief CGESVD computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), S( * ) +* COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESVD computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * conjugate-transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns V**H, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U are returned in array U: +*> = 'S': the first min(m,n) columns of U (the left singular +*> vectors) are returned in the array U; +*> = 'O': the first min(m,n) columns of U (the left singular +*> vectors) are overwritten on the array A; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**H: +*> = 'A': all N rows of V**H are returned in the array VT; +*> = 'S': the first min(m,n) rows of V**H (the right singular +*> vectors) are returned in the array VT; +*> = 'O': the first min(m,n) rows of V**H (the right singular +*> vectors) are overwritten on the array A; +*> = 'N': no rows of V**H (no right singular vectors) are +*> computed. +*> +*> JOBVT and JOBU cannot both be 'O'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBU = 'O', A is overwritten with the first min(m,n) +*> columns of U (the left singular vectors, +*> stored columnwise); +*> if JOBVT = 'O', A is overwritten with the first min(m,n) +*> rows of V**H (the right singular vectors, +*> stored rowwise); +*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +*> are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,UCOL) +*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +*> If JOBU = 'A', U contains the M-by-M unitary matrix U; +*> if JOBU = 'S', U contains the first min(m,n) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBU = 'N' or 'O', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'S' or 'A', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX array, dimension (LDVT,N) +*> If JOBVT = 'A', VT contains the N-by-N unitary matrix +*> V**H; +*> if JOBVT = 'S', VT contains the first min(m,n) rows of +*> V**H (the right singular vectors, stored rowwise); +*> if JOBVT = 'N' or 'O', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (5*min(M,N)) +*> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the +*> unconverged superdiagonal elements of an upper bidiagonal +*> matrix B whose diagonal is in S (not necessarily sorted). +*> B satisfies A = U * B * VT, so it has the same singular +*> values as A, and singular vectors related by U and VT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if CBDSQR did not converge, INFO specifies how many +*> superdiagonals of an intermediate bidiagonal form B +*> did not converge to zero. See the description of RWORK +*> above for details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEsing +* +* ===================================================================== + SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), S( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + INTEGER LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M, + $ LWORK_CGEBRD, LWORK_CUNGBR_P, LWORK_CUNGBR_Q, + $ LWORK_CGELQF, LWORK_CUNGLQ_N, LWORK_CUNGLQ_M + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) + COMPLEX CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY, + $ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR, + $ SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Space needed for ZBDSQR is BDSPAC = 5*N +* + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) +* Compute space needed for CGEQRF + CALL CGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEQRF = INT( CDUM(1) ) +* Compute space needed for CUNGQR + CALL CUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CUNGQR_N = INT( CDUM(1) ) + CALL CUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CUNGQR_M = INT( CDUM(1) ) +* Compute space needed for CGEBRD + CALL CGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD = INT( CDUM(1) ) +* Compute space needed for CUNGBR + CALL CUNGBR( 'P', N, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_P = INT( CDUM(1) ) + CALL CUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_Q = INT( CDUM(1) ) +* + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + LWORK_CGEQRF + MAXWRK = MAX( MAXWRK, 2*N+LWORK_CGEBRD ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_P ) + MINWRK = 3*N + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_CGEQRF + WRKBL = MAX( WRKBL, N+LWORK_CUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_CUNGBR_P ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD = INT( CDUM(1) ) + MAXWRK = 2*N + LWORK_CGEBRD + IF( WNTUS .OR. WNTUO ) THEN + CALL CUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_Q = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) + END IF + IF( WNTUA ) THEN + CALL CUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_Q = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_Q ) + END IF + IF( .NOT.WNTVN ) THEN + MAXWRK = MAX( MAXWRK, 2*N+LWORK_CUNGBR_P ) + END IF + MINWRK = 2*N + M + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Space needed for CBDSQR is BDSPAC = 5*M +* + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) +* Compute space needed for CGELQF + CALL CGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGELQF = INT( CDUM(1) ) +* Compute space needed for CUNGLQ + CALL CUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, + $ IERR ) + LWORK_CUNGLQ_N = INT( CDUM(1) ) + CALL CUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_CUNGLQ_M = INT( CDUM(1) ) +* Compute space needed for CGEBRD + CALL CGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD = INT( CDUM(1) ) +* Compute space needed for CUNGBR P + CALL CUNGBR( 'P', M, M, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_P = INT( CDUM(1) ) +* Compute space needed for CUNGBR Q + CALL CUNGBR( 'Q', M, M, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_Q = INT( CDUM(1) ) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + LWORK_CGELQF + MAXWRK = MAX( MAXWRK, 2*M+LWORK_CGEBRD ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_Q ) + MINWRK = 3*M + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + LWORK_CGELQF + WRKBL = MAX( WRKBL, M+LWORK_CUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_CUNGBR_Q ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + CALL CGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_CGEBRD = INT( CDUM(1) ) + MAXWRK = 2*M + LWORK_CGEBRD + IF( WNTVS .OR. WNTVO ) THEN +* Compute space needed for CUNGBR P + CALL CUNGBR( 'P', M, N, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_P = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) + END IF + IF( WNTVA ) THEN + CALL CUNGBR( 'P', N, N, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_CUNGBR_P = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_P ) + END IF + IF( .NOT.WNTUN ) THEN + MAXWRK = MAX( MAXWRK, 2*M+LWORK_CUNGBR_Q ) + END IF + MINWRK = 2*M + N + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + IF( N .GT. 1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, + $ WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IR ), LDWRKR, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IR ), LDWRKR, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: need 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL CLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL CLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N) +* (RWorkspace: 0) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, + $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) +* (RWorkspace: 0) +* + CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of CGESVD +* + END diff --git a/dspl/liblapack/SRC/cgesvdx.f b/dspl/liblapack/SRC/cgesvdx.f new file mode 100644 index 0000000..5b08d57 --- /dev/null +++ b/dspl/liblapack/SRC/cgesvdx.f @@ -0,0 +1,858 @@ +*> \brief CGESVDX computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, +* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, +* $ LWORK, RWORK, IWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT, RANGE +* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS +* REAL VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL S( * ), RWORK( * ) +* COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESVDX computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> CGESVDX uses an eigenvalue problem for obtaining the SVD, which +*> allows for the computation of a subset of singular values and +*> vectors. See SBDSVDX for details. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'V': the first min(m,n) columns of U (the left singular +*> vectors) or as specified by RANGE are returned in +*> the array U; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'V': the first min(m,n) rows of V**T (the right singular +*> vectors) or as specified by RANGE are returned in +*> the array VT; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found, +*> 0 <= NS <= min(M,N). +*> If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,UCOL) +*> If JOBU = 'V', U contains columns of U (the left singular +*> vectors, stored columnwise) as specified by RANGE; if +*> JOBU = 'N', U is not referenced. +*> Note: The user must ensure that UCOL >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'V', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX array, dimension (LDVT,N) +*> If JOBVT = 'V', VT contains the rows of V**T (the right singular +*> vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', +*> VT is not referenced. +*> Note: The user must ensure that LDVT >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'V', LDVT >= NS (see above). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see +*> comments inside the code): +*> - PATH 1 (M much larger than N) +*> - PATH 1t (N much larger than M) +*> LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> LRWORK >= MIN(M,N)*(MIN(M,N)*2+15*MIN(M,N)). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*MIN(M,N)) +*> If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, +*> then IWORK contains the indices of the eigenvectors that failed +*> to converge in SBDSVDX/SSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in SBDSVDX/SSTEVX. +*> if INFO = N*2 + 1, an internal error occurred in +*> SBDSVDX +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEsing +* +* ===================================================================== + SUBROUTINE CGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, + $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, + $ LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT, RANGE + INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS + REAL VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL S( * ), RWORK( * ) + COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + CHARACTER JOBZ, RNGTGK + LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT + INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL, + $ ITAU, ITAUP, ITAUQ, ITEMP, ITEMPR, ITGKZ, + $ IUTGK, J, K, MAXWRK, MINMN, MINWRK, MNTHR + REAL ABSTOL, ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEBRD, CGELQF, CGEQRF, CLASCL, CLASET, + $ CUNMBR, CUNMQR, CUNMLQ, CLACPY, + $ SBDSVDX, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + NS = 0 + INFO = 0 + ABSTOL = 2*SLAMCH('S') + LQUERY = ( LWORK.EQ.-1 ) + MINMN = MIN( M, N ) + + WANTU = LSAME( JOBU, 'V' ) + WANTVT = LSAME( JOBVT, 'V' ) + IF( WANTU .OR. WANTVT ) THEN + JOBZ = 'V' + ELSE + JOBZ = 'N' + END IF + ALLS = LSAME( RANGE, 'A' ) + VALS = LSAME( RANGE, 'V' ) + INDS = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.LSAME( JOBU, 'V' ) .AND. + $ .NOT.LSAME( JOBU, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( JOBVT, 'V' ) .AND. + $ .NOT.LSAME( JOBVT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLS .OR. VALS .OR. INDS ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( M.GT.LDA ) THEN + INFO = -7 + ELSE IF( MINMN.GT.0 ) THEN + IF( VALS ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -8 + ELSE IF( VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDS ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, MINMN ) ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( MINMN, IL ) .OR. IU.GT.MINMN ) THEN + INFO = -11 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( WANTU .AND. LDU.LT.M ) THEN + INFO = -15 + ELSE IF( WANTVT ) THEN + IF( INDS ) THEN + IF( LDVT.LT.IU-IL+1 ) THEN + INFO = -17 + END IF + ELSE IF( LDVT.LT.MINMN ) THEN + INFO = -17 + END IF + END IF + END IF + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + IF( M.GE.N ) THEN + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N) +* + MINWRK = N*(N+5) + MAXWRK = N + N*ILAENV(1,'CGEQRF',' ',M,N,-1,-1) + MAXWRK = MAX(MAXWRK, + $ N*N+2*N+2*N*ILAENV(1,'CGEBRD',' ',N,N,-1,-1)) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ N*N+2*N+N*ILAENV(1,'CUNMQR','LN',N,N,N,-1)) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* + MINWRK = 3*N + M + MAXWRK = 2*N + (M+N)*ILAENV(1,'CGEBRD',' ',M,N,-1,-1) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ 2*N+N*ILAENV(1,'CUNMQR','LN',N,N,N,-1)) + END IF + END IF + ELSE + MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M) +* + MINWRK = M*(M+5) + MAXWRK = M + M*ILAENV(1,'CGELQF',' ',M,N,-1,-1) + MAXWRK = MAX(MAXWRK, + $ M*M+2*M+2*M*ILAENV(1,'CGEBRD',' ',M,M,-1,-1)) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ M*M+2*M+M*ILAENV(1,'CUNMQR','LN',M,M,M,-1)) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* +* + MINWRK = 3*M + N + MAXWRK = 2*M + (M+N)*ILAENV(1,'CGEBRD',' ',M,N,-1,-1) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ 2*M+M*ILAENV(1,'CUNMQR','LN',M,M,M,-1)) + END IF + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = CMPLX( REAL( MAXWRK ), ZERO ) +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVDX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Set singular values indices accord to RANGE='A'. +* + IF( ALLS ) THEN + RNGTGK = 'I' + ILTGK = 1 + IUTGK = MIN( M, N ) + ELSE IF( INDS ) THEN + RNGTGK = 'I' + ILTGK = IL + IUTGK = IU + ELSE + RNGTGK = 'V' + ILTGK = 0 + IUTGK = 0 + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce A using the QR +* decomposition. +* + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N): +* A = Q * R = Q * ( QB * B * PB**T ) +* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) +* U = Q * QB * UB; V**T = VB**T * PB**T +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + ITEMP = ITAU + N + CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Copy R into WORK and bidiagonalize it: +* (Workspace: need N*N+3*N, prefer N*N+N+2*N*NB) +* + IQRF = ITEMP + ITAUQ = ITEMP + N*N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + ID = 1 + IE = ID + N + ITGKZ = IE + N + CALL CLACPY( 'U', N, N, A, LDA, WORK( IQRF ), N ) + CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IQRF+1 ), N ) + CALL CGEBRD( N, N, WORK( IQRF ), N, RWORK( ID ), + $ RWORK( IE ), WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( ITEMP ), LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + N*(N*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*N*N+14*N) +* + CALL SBDSVDX( 'U', JOBZ, RNGTGK, N, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), N*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, N + U( J, I ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO + CALL CLASET( 'A', M-N, NS, CZERO, CZERO, U( N+1,1 ), LDU) +* +* Call CUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL CUNMBR( 'Q', 'L', 'N', N, NS, N, WORK( IQRF ), N, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call CUNMQR to compute Q*(QB*UB). +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAU ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + N + DO I = 1, NS + DO J = 1, N + VT( I, J ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO +* +* Call CUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL CUNMBR( 'P', 'R', 'C', NS, N, N, WORK( IQRF ), N, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* Reduce A to bidiagonal form without QR decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* + ITAUQ = 1 + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + ID = 1 + IE = ID + N + ITGKZ = IE + N + CALL CGEBRD( M, N, A, LDA, RWORK( ID ), RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + N*(N*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*N*N+14*N) +* + CALL SBDSVDX( 'U', JOBZ, RNGTGK, N, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), N*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, N + U( J, I ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO + CALL CLASET( 'A', M-N, NS, CZERO, CZERO, U( N+1,1 ), LDU) +* +* Call CUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL CUNMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + N + DO I = 1, NS + DO J = 1, N + VT( I, J ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO +* +* Call CUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL CUNMBR( 'P', 'R', 'C', NS, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF + END IF + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce A using the LQ decomposition. +* + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M): +* A = L * Q = ( QB * B * PB**T ) * Q +* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q +* U = QB * UB ; V**T = VB**T * PB**T * Q +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + ITAU = 1 + ITEMP = ITAU + M + CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + +* Copy L into WORK and bidiagonalize it: +* (Workspace in WORK( ITEMP ): need M*M+3*M, prefer M*M+M+2*M*NB) +* + ILQF = ITEMP + ITAUQ = ILQF + M*M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + ID = 1 + IE = ID + M + ITGKZ = IE + M + CALL CLACPY( 'L', M, M, A, LDA, WORK( ILQF ), M ) + CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( ILQF+M ), M ) + CALL CGEBRD( M, M, WORK( ILQF ), M, RWORK( ID ), + $ RWORK( IE ), WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( ITEMP ), LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + M*(M*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + CALL SBDSVDX( 'U', JOBZ, RNGTGK, M, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), M*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, M + U( J, I ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO +* +* Call CUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL CUNMBR( 'Q', 'L', 'N', M, NS, M, WORK( ILQF ), M, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + M + DO I = 1, NS + DO J = 1, M + VT( I, J ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO + CALL CLASET( 'A', NS, N-M, CZERO, CZERO, + $ VT( 1,M+1 ), LDVT ) +* +* Call CUNMBR to compute (VB**T)*(PB**T) +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL CUNMBR( 'P', 'R', 'C', NS, M, M, WORK( ILQF ), M, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call CUNMLQ to compute ((VB**T)*(PB**T))*Q. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL CUNMLQ( 'R', 'N', NS, N, M, A, LDA, + $ WORK( ITAU ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + ITAUQ = 1 + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + ID = 1 + IE = ID + M + ITGKZ = IE + M + CALL CGEBRD( M, N, A, LDA, RWORK( ID ), RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + M*(M*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + CALL SBDSVDX( 'L', JOBZ, RNGTGK, M, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), M*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, M + U( J, I ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO +* +* Call CUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL CUNMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + M + DO I = 1, NS + DO J = 1, M + VT( I, J ) = CMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO + CALL CLASET( 'A', NS, N-M, CZERO, CZERO, + $ VT( 1,M+1 ), LDVT ) +* +* Call CUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL CUNMBR( 'P', 'R', 'C', NS, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = CMPLX( REAL( MAXWRK ), ZERO ) +* + RETURN +* +* End of CGESVDX +* + END diff --git a/dspl/liblapack/SRC/cgesvj.f b/dspl/liblapack/SRC/cgesvj.f new file mode 100644 index 0000000..2a5ced2 --- /dev/null +++ b/dspl/liblapack/SRC/cgesvj.f @@ -0,0 +1,1442 @@ +*> \brief CGESVJ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESVJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, +* LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N +* CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), V( LDV, * ), CWORK( LWORK ) +* REAL RWORK( LRWORK ), SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESVJ computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, where M >= N. The SVD of A is written as +*> [++] [xx] [x0] [xx] +*> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] +*> [++] [xx] +*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal +*> matrix, and V is an N-by-N unitary matrix. The diagonal elements +*> of SIGMA are the singular values of A. The columns of U and V are the +*> left and the right singular vectors of A, respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the structure of A. +*> = 'L': The input matrix A is lower triangular; +*> = 'U': The input matrix A is upper triangular; +*> = 'G': The input matrix A is general M-by-N matrix, M >= N. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the left singular vectors +*> (columns of U): +*> = 'U' or 'F': The left singular vectors corresponding to the nonzero +*> singular values are computed and returned in the leading +*> columns of A. See more details in the description of A. +*> The default numerical orthogonality threshold is set to +*> approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=SLAMCH('E'). +*> = 'C': Analogous to JOBU='U', except that user can control the +*> level of numerical orthogonality of the computed left +*> singular vectors. TOL can be set to TOL = CTOL*EPS, where +*> CTOL is given on input in the array WORK. +*> No CTOL smaller than ONE is allowed. CTOL greater +*> than 1 / EPS is meaningless. The option 'C' +*> can be used if M*EPS is satisfactory orthogonality +*> of the computed left singular vectors, so CTOL=M could +*> save few sweeps of Jacobi rotations. +*> See the descriptions of A and WORK(1). +*> = 'N': The matrix U is not computed. However, see the +*> description of A. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the right singular vectors, that +*> is, the matrix V: +*> = 'V' or 'J': the matrix V is computed and returned in the array V +*> = 'A' : the Jacobi rotations are applied to the MV-by-N +*> array V. In other words, the right singular vector +*> matrix V is not computed explicitly; instead it is +*> applied to an MV-by-N matrix initially stored in the +*> first MV rows of V. +*> = 'N' : the matrix V is not computed and the array V is not +*> referenced +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. 1/SLAMCH('E') > M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': +*> If INFO .EQ. 0 : +*> RANKA orthonormal columns of U are returned in the +*> leading RANKA columns of the array A. Here RANKA <= N +*> is the number of computed singular values of A that are +*> above the underflow threshold SLAMCH('S'). The singular +*> vectors corresponding to underflowed or zero singular +*> values are not computed. The value of RANKA is returned +*> in the array RWORK as RANKA=NINT(RWORK(2)). Also see the +*> descriptions of SVA and RWORK. The computed columns of U +*> are mutually numerically orthogonal up to approximately +*> TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), +*> see the description of JOBU. +*> If INFO .GT. 0, +*> the procedure CGESVJ did not converge in the given number +*> of iterations (sweeps). In that case, the computed +*> columns of U may not be orthogonal up to TOL. The output +*> U (stored in A), SIGMA (given by the computed singular +*> values in SVA(1:N)) and V is still a decomposition of the +*> input matrix A in the sense that the residual +*> || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small. +*> If JOBU .EQ. 'N': +*> If INFO .EQ. 0 : +*> Note that the left singular vectors are 'for free' in the +*> one-sided Jacobi SVD algorithm. However, if only the +*> singular values are needed, the level of numerical +*> orthogonality of U is not an issue and iterations are +*> stopped when the columns of the iterated matrix are +*> numerically orthogonal up to approximately M*EPS. Thus, +*> on exit, A contains the columns of U scaled with the +*> corresponding singular values. +*> If INFO .GT. 0 : +*> the procedure CGESVJ did not converge in the given number +*> of iterations (sweeps). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On exit, +*> If INFO .EQ. 0 : +*> depending on the value SCALE = RWORK(1), we have: +*> If SCALE .EQ. ONE: +*> SVA(1:N) contains the computed singular values of A. +*> During the computation SVA contains the Euclidean column +*> norms of the iterated matrices in the array A. +*> If SCALE .NE. ONE: +*> The singular values of A are SCALE*SVA(1:N), and this +*> factored representation is due to the fact that some of the +*> singular values of A might underflow or overflow. +*> +*> If INFO .GT. 0 : +*> the procedure CGESVJ did not converge in the given number of +*> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then the product of Jacobi rotations in CGESVJ +*> is applied to the first MV rows of V. See the description of JOBV. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,N) +*> If JOBV = 'V', then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'A', then V contains the product of the computed right +*> singular vector matrix and the initial matrix in +*> the array V. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV .GE. 1. +*> If JOBV .EQ. 'V', then LDV .GE. max(1,N). +*> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . +*> \endverbatim +*> +*> \param[in,out] CWORK +*> \verbatim +*> CWORK is COMPLEX array, dimension (max(1,LWORK)) +*> Used as workspace. +*> If on entry LWORK .EQ. -1, then a workspace query is assumed and +*> no computation is done; CWORK(1) is set to the minial (and optimal) +*> length of CWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER. +*> Length of CWORK, LWORK >= M+N. +*> \endverbatim +*> +*> \param[in,out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(6,LRWORK)) +*> On entry, +*> If JOBU .EQ. 'C' : +*> RWORK(1) = CTOL, where CTOL defines the threshold for convergence. +*> The process stops if all columns of A are mutually +*> orthogonal up to CTOL*EPS, EPS=SLAMCH('E'). +*> It is required that CTOL >= ONE, i.e. it is not +*> allowed to force the routine to obtain orthogonality +*> below EPSILON. +*> On exit, +*> RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) +*> are the computed singular values of A. +*> (See description of SVA().) +*> RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero +*> singular values. +*> RWORK(3) = NINT(RWORK(3)) is the number of the computed singular +*> values that are larger than the underflow threshold. +*> RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi +*> rotations needed for numerical convergence. +*> RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. +*> This is useful information in cases when CGESVJ did +*> not converge, as it can be used to estimate whether +*> the output is stil useful and for post festum analysis. +*> RWORK(6) = the largest absolute value over all sines of the +*> Jacobi rotation angles in the last sweep. It can be +*> useful for a post festum analysis. +*> If on entry LRWORK .EQ. -1, then a workspace query is assumed and +*> no computation is done; RWORK(1) is set to the minial (and optimal) +*> length of RWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK, LRWORK >= MAX(6,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> > 0 : CGESVJ did not converge in the maximal allowed number +*> (NSWEEP=30) of sweeps. The output may still be useful. +*> See the description of RWORK. +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane +*> rotations. In the case of underflow of the tangent of the Jacobi angle, a +*> modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses +*> column interchanges of de Rijk [1]. The relative accuracy of the computed +*> singular values and the accuracy of the computed singular vectors (in +*> angle metric) is as guaranteed by the theory of Demmel and Veselic [2]. +*> The condition number that determines the accuracy in the full rank case +*> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the +*> spectral condition number. The best performance of this Jacobi SVD +*> procedure is achieved if used in an accelerated version of Drmac and +*> Veselic [4,5], and it is the kernel routine in the SIGMA library [6]. +*> Some tunning parameters (marked with [TP]) are available for the +*> implementer. +*> The computational range for the nonzero singular values is the machine +*> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even +*> denormalized singular values can be computed with the corresponding +*> gradual loss of accurate digits. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> \verbatim +*> +*> ============ +*> +*> Zlatko Drmac (Zagreb, Croatia) +*> +*> \endverbatim +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the +*> singular value decomposition on a vector computer. +*> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. +*> [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. +*> [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular +*> value computation in floating point arithmetic. +*> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. +*> [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2015. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> \verbatim +*> =========================== +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, + $ LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* + IMPLICIT NONE +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N + CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), V( LDV, * ), CWORK( LWORK ) + REAL RWORK( LRWORK ), SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = (0.0E0, 0.0E0), CONE = (1.0E0, 0.0E0) ) + INTEGER NSWEEP + PARAMETER ( NSWEEP = 30 ) +* .. +* .. Local Scalars .. + COMPLEX AAPQ, OMPQ + REAL AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, CTOL, EPSLN, MXAAPQ, + $ MXSINJ, ROOTBIG, ROOTEPS, ROOTSFMIN, ROOTTOL, + $ SKL, SFMIN, SMALL, SN, T, TEMP1, THETA, THSIGN, TOL + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, N2, N34, + $ N4, NBL, NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, GOSCALE, LOWER, LQUERY, LSVEC, NOSCALE, ROTOK, + $ RSVEC, UCTOL, UPPER +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, CONJG, REAL, SIGN, SQRT +* .. +* .. External Functions .. +* .. +* from BLAS + REAL SCNRM2 + COMPLEX CDOTC + EXTERNAL CDOTC, SCNRM2 + INTEGER ISAMAX + EXTERNAL ISAMAX +* from LAPACK + REAL SLAMCH + EXTERNAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL CCOPY, CROT, CSSCAL, CSWAP, CAXPY +* from LAPACK + EXTERNAL CLASCL, CLASET, CLASSQ, SLASCL, XERBLA + EXTERNAL CGSVJ0, CGSVJ1 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + UCTOL = LSAME( JOBU, 'C' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. LSAME( JOBV, 'J' ) + APPLV = LSAME( JOBV, 'A' ) + UPPER = LSAME( JOBA, 'U' ) + LOWER = LSAME( JOBA, 'L' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) + IF( .NOT.( UPPER .OR. LOWER .OR. LSAME( JOBA, 'G' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSVEC .OR. UCTOL .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -5 + ELSE IF( LDA.LT.M ) THEN + INFO = -7 + ELSE IF( MV.LT.0 ) THEN + INFO = -9 + ELSE IF( ( RSVEC .AND. ( LDV.LT.N ) ) .OR. + $ ( APPLV .AND. ( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( UCTOL .AND. ( RWORK( 1 ).LE.ONE ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.( M+N ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + ELSE IF( LRWORK.LT.MAX( N, 6 ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -15 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVJ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = M + N + RWORK(1) = MAX( N, 6 ) + RETURN + END IF +* +* #:) Quick return for void matrix +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )RETURN +* +* Set numerical parameters +* The stopping criterion for Jacobi rotations is +* +* max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS +* +* where EPS is the round-off and CTOL is defined as follows: +* + IF( UCTOL ) THEN +* ... user controlled + CTOL = RWORK( 1 ) + ELSE +* ... default + IF( LSVEC .OR. RSVEC .OR. APPLV ) THEN + CTOL = SQRT( REAL( M ) ) + ELSE + CTOL = REAL( M ) + END IF + END IF +* ... and the machine dependent parameters are +*[!] (Make sure that SLAMCH() works properly on the target machine.) +* + EPSLN = SLAMCH( 'Epsilon' ) + ROOTEPS = SQRT( EPSLN ) + SFMIN = SLAMCH( 'SafeMinimum' ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPSLN +* BIG = SLAMCH( 'Overflow' ) + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN +* LARGE = BIG / SQRT( REAL( M*N ) ) + BIGTHETA = ONE / ROOTEPS +* + TOL = CTOL*EPSLN + ROOTTOL = SQRT( TOL ) +* + IF( REAL( M )*EPSLN.GE.ONE ) THEN + INFO = -4 + CALL XERBLA( 'CGESVJ', -INFO ) + RETURN + END IF +* +* Initialize the right singular vector matrix. +* + IF( RSVEC ) THEN + MVL = N + CALL CLASET( 'A', MVL, N, CZERO, CONE, V, LDV ) + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV +* +* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) +*(!) If necessary, scale A to protect the largest singular value +* from overflow. It is possible that saving the largest singular +* value destroys the information about the small ones. +* This initial scaling is almost minimal in the sense that the +* goal is to make sure that no column norm overflows, and that +* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries +* in A are detected, the procedure returns with INFO=-6. +* + SKL = ONE / SQRT( REAL( M )*REAL( N ) ) + NOSCALE = .TRUE. + GOSCALE = .TRUE. +* + IF( LOWER ) THEN +* the input matrix is M-by-N lower triangular (trapezoidal) + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL CLASSQ( M-p+1, A( p, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'CGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 1873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 1873 CONTINUE + END IF + END IF + 1874 CONTINUE + ELSE IF( UPPER ) THEN +* the input matrix is M-by-N upper triangular (trapezoidal) + DO 2874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL CLASSQ( p, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'CGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 2873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 2873 CONTINUE + END IF + END IF + 2874 CONTINUE + ELSE +* the input matrix is M-by-N general dense + DO 3874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'CGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 3873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 3873 CONTINUE + END IF + END IF + 3874 CONTINUE + END IF +* + IF( NOSCALE )SKL = ONE +* +* Move the smaller part of the spectrum from the underflow threshold +*(!) Start by determining the position of the nonzero entries of the +* array SVA() relative to ( SFMIN, BIG ). +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + IF( SVA( p ).NE.ZERO )AAQQ = MIN( AAQQ, SVA( p ) ) + AAPP = MAX( AAPP, SVA( p ) ) + 4781 CONTINUE +* +* #:) Quick return for zero matrix +* + IF( AAPP.EQ.ZERO ) THEN + IF( LSVEC )CALL CLASET( 'G', M, N, CZERO, CONE, A, LDA ) + RWORK( 1 ) = ONE + RWORK( 2 ) = ZERO + RWORK( 3 ) = ZERO + RWORK( 4 ) = ZERO + RWORK( 5 ) = ZERO + RWORK( 6 ) = ZERO + RETURN + END IF +* +* #:) Quick return for one-column matrix +* + IF( N.EQ.1 ) THEN + IF( LSVEC )CALL CLASCL( 'G', 0, 0, SVA( 1 ), SKL, M, 1, + $ A( 1, 1 ), LDA, IERR ) + RWORK( 1 ) = ONE / SKL + IF( SVA( 1 ).GE.SFMIN ) THEN + RWORK( 2 ) = ONE + ELSE + RWORK( 2 ) = ZERO + END IF + RWORK( 3 ) = ZERO + RWORK( 4 ) = ZERO + RWORK( 5 ) = ZERO + RWORK( 6 ) = ZERO + RETURN + END IF +* +* Protect small singular values from underflow, and try to +* avoid underflows/overflows in computing Jacobi rotations. +* + SN = SQRT( SFMIN / EPSLN ) + TEMP1 = SQRT( BIG / REAL( N ) ) + IF( ( AAPP.LE.SN ) .OR. ( AAQQ.GE.TEMP1 ) .OR. + $ ( ( SN.LE.AAQQ ) .AND. ( AAPP.LE.TEMP1 ) ) ) THEN + TEMP1 = MIN( BIG, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.LE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( AAPP*SQRT( REAL( N ) ) ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.GE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MAX( SN / AAQQ, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( SQRT( REAL( N ) )*AAPP ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE + TEMP1 = ONE + END IF +* +* Scale, if necessary +* + IF( TEMP1.NE.ONE ) THEN + CALL SLASCL( 'G', 0, 0, ONE, TEMP1, N, 1, SVA, N, IERR ) + END IF + SKL = TEMP1*SKL + IF( SKL.NE.ONE ) THEN + CALL CLASCL( JOBA, 0, 0, ONE, SKL, M, N, A, LDA, IERR ) + SKL = ONE / SKL + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + + DO 1868 q = 1, N + CWORK( q ) = CONE + 1868 CONTINUE +* +* +* + SWBAND = 3 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if CGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm CGEJSV. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* + IF( ( LOWER .OR. UPPER ) .AND. ( N.GT.MAX( 64, 4*KBL ) ) ) THEN +*[TP] The number of partition levels and the actual partition are +* tuning parameters. + N4 = N / 4 + N2 = N / 2 + N34 = 3*N4 + IF( APPLV ) THEN + q = 0 + ELSE + q = 1 + END IF +* + IF( LOWER ) THEN +* +* This works very well on lower triangular matrices, in particular +* in the framework of the preconditioned Jacobi SVD (xGEJSV). +* The idea is simple: +* [+ 0 0 0] Note that Jacobi transformations of [0 0] +* [+ + 0 0] [0 0] +* [+ + x 0] actually work on [x 0] [x 0] +* [+ + x x] [x x]. [x x] +* + CALL CGSVJ0( JOBV, M-N34, N-N34, A( N34+1, N34+1 ), LDA, + $ CWORK( N34+1 ), SVA( N34+1 ), MVL, + $ V( N34*q+1, N34+1 ), LDV, EPSLN, SFMIN, TOL, + $ 2, CWORK( N+1 ), LWORK-N, IERR ) + + CALL CGSVJ0( JOBV, M-N2, N34-N2, A( N2+1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 2, + $ CWORK( N+1 ), LWORK-N, IERR ) + + CALL CGSVJ1( JOBV, M-N2, N-N2, N4, A( N2+1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) +* + CALL CGSVJ0( JOBV, M-N4, N2-N4, A( N4+1, N4+1 ), LDA, + $ CWORK( N4+1 ), SVA( N4+1 ), MVL, + $ V( N4*q+1, N4+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) +* + CALL CGSVJ0( JOBV, M, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL CGSVJ1( JOBV, M, N2, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), + $ LWORK-N, IERR ) +* +* + ELSE IF( UPPER ) THEN +* +* + CALL CGSVJ0( JOBV, N4, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 2, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL CGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, CWORK( N4+1 ), + $ SVA( N4+1 ), MVL, V( N4*q+1, N4+1 ), LDV, + $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL CGSVJ1( JOBV, N2, N2, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), + $ LWORK-N, IERR ) +* + CALL CGSVJ0( JOBV, N2+N4, N4, A( 1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) + + END IF +* + END IF +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL CSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL CSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = CWORK(p) + CWORK(p) = CWORK(q) + CWORK(q) = AAPQ + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +*[!] Caveat: +* Unfortunately, some BLAS implementations compute SCNRM2(M,A(1,p),1) +* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, SCNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF +* below should be replaced with "AAPP = SCNRM2( M, A(1,p), 1 )". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = SCNRM2( M, A( 1, p ), 1 ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL CCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, CWORK(N+1), LDA, IERR ) + AAPQ = CDOTC( M, CWORK(N+1), 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAPP ) / AAQQ + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = CDOTC( M, A(1, p ), 1, + $ CWORK(N+1), 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/AAPQ1 +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + CS = ONE + + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF ( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF ( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + CWORK(p) = -CWORK(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL CCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, CWORK(N+1), LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + CALL CAXPY( M, -AAPQ, CWORK(N+1), 1, + $ A( 1, q ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SCNRM2( M, A( 1, q ), 1 ) + ELSE + T = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SCNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL CCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = CDOTC( M, CWORK(N+1), 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = CDOTC( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + CWORK(p) = -CWORK(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL CCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, CWORK(N+1),LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL CAXPY( M, -AAPQ, CWORK(N+1), + $ 1, A( 1, q ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, CWORK(N+1),LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL CAXPY( M, -CONJG(AAPQ), + $ CWORK(N+1), 1, A( 1, p ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SCNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SCNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SCNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( REAL( N ) )* + $ TOL ) .AND. ( REAL( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the singular values and find how many are above +* the underflow threshold. +* + N2 = 0 + N4 = 0 + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + CALL CSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL CSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + IF( SVA( p ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( p )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF + 5991 CONTINUE + IF( SVA( N ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( N )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF +* +* Normalize the left singular vectors. +* + IF( LSVEC .OR. UCTOL ) THEN + DO 1998 p = 1, N4 +* CALL CSSCAL( M, ONE / SVA( p ), A( 1, p ), 1 ) + CALL CLASCL( 'G',0,0, SVA(p), ONE, M, 1, A(1,p), M, IERR ) + 1998 CONTINUE + END IF +* +* Scale the product of Jacobi rotations. +* + IF( RSVEC ) THEN + DO 2399 p = 1, N + TEMP1 = ONE / SCNRM2( MVL, V( 1, p ), 1 ) + CALL CSSCAL( MVL, TEMP1, V( 1, p ), 1 ) + 2399 CONTINUE + END IF +* +* Undo scaling, if necessary (and possible). + IF( ( ( SKL.GT.ONE ) .AND. ( SVA( 1 ).LT.( BIG / SKL ) ) ) + $ .OR. ( ( SKL.LT.ONE ) .AND. ( SVA( MAX( N2, 1 ) ) .GT. + $ ( SFMIN / SKL ) ) ) ) THEN + DO 2400 p = 1, N + SVA( P ) = SKL*SVA( P ) + 2400 CONTINUE + SKL = ONE + END IF +* + RWORK( 1 ) = SKL +* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE +* then some of the singular values may overflow or underflow and +* the spectrum is given in this factored representation. +* + RWORK( 2 ) = REAL( N4 ) +* N4 is the number of computed nonzero singular values of A. +* + RWORK( 3 ) = REAL( N2 ) +* N2 is the number of singular values of A greater than SFMIN. +* If N2 \brief CGESVX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), C( * ), FERR( * ), R( * ), +* $ RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESVX uses the LU factorization to compute the solution to a complex +*> system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = P * L * U, +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by CGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> On exit, RWORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If RWORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 RWORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization has +*> been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, CLANTR, SLAMCH + EXTERNAL LSAME, CLANGE, CLANTR, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGECON, CGEEQU, CGERFS, CGETRF, CGETRS, CLACPY, + $ CLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL CLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL CGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = CLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGE( 'M', N, INFO, A, LDA, RWORK ) / + $ RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGE( NORM, N, N, A, LDA, RWORK ) + RPVGRW = CLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = CLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL CGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of CGESVX +* + END diff --git a/dspl/liblapack/SRC/cgesvxx.f b/dspl/liblapack/SRC/cgesvxx.f new file mode 100644 index 0000000..30d1beb --- /dev/null +++ b/dspl/liblapack/SRC/cgesvxx.f @@ -0,0 +1,772 @@ +*> \brief CGESVXX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGESVXX uses the LU factorization to compute the solution to a +*> complex system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. CGESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> CGESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> CGESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what CGESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by CGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In CGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, CLA_GERPVGRW + LOGICAL LSAME + REAL SLAMCH, CLA_GERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL CGEEQUB, CGETRF, CGETRS, CLACPY, CLAQGE, + $ XERBLA, CLASCL2, CGERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in CGERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until CGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CGEEQUB( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL CLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL CLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL CLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL CGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = CLA_GERPVGRW( N, INFO, A, LDA, AF, LDAF ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = CLA_GERPVGRW( N, N, A, LDA, AF, LDAF ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL CLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL CLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of CGESVXX +* + END diff --git a/dspl/liblapack/SRC/cgetc2.f b/dspl/liblapack/SRC/cgetc2.f new file mode 100644 index 0000000..e4a414f --- /dev/null +++ b/dspl/liblapack/SRC/cgetc2.f @@ -0,0 +1,234 @@ +*> \brief \b CGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETC2 computes an LU factorization, using complete pivoting, of the +*> n-by-n matrix A. The factorization has the form A = P * L * U * Q, +*> where P and Q are permutation matrices, L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> +*> This is a level 1 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the n-by-n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U*Q; the unit diagonal elements of L are not stored. +*> If U(k, k) appears to be less than SMIN, U(k, k) is given the +*> value of SMIN, giving a nonsingular perturbed system. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, U(k, k) is likely to produce overflow if +*> one tries to solve for x in Ax = b. So U is perturbed +*> to avoid the overflow. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSWAP, SLABAD +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Handle the case N=1 by itself +* + IF( N.EQ.1 ) THEN + IPIV( 1 ) = 1 + JPIV( 1 ) = 1 + IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN + INFO = 1 + A( 1, 1 ) = CMPLX( SMLNUM, ZERO ) + END IF + RETURN + END IF +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL CSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL CSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = CMPLX( SMIN, ZERO ) + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL CGERU( N-I, N-I, -CMPLX( ONE ), A( I+1, I ), 1, + $ A( I, I+1 ), LDA, A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = CMPLX( SMIN, ZERO ) + END IF +* +* Set last pivots to N +* + IPIV( N ) = N + JPIV( N ) = N +* + RETURN +* +* End of CGETC2 +* + END diff --git a/dspl/liblapack/SRC/cgetf2.f b/dspl/liblapack/SRC/cgetf2.f new file mode 100644 index 0000000..083e6cc --- /dev/null +++ b/dspl/liblapack/SRC/cgetf2.f @@ -0,0 +1,214 @@ +*> \brief \b CGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETF2 computes an LU factorization of a general m-by-n matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + REAL SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ICAMAX + EXTERNAL SLAMCH, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL CSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL CGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), + $ LDA, A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of CGETF2 +* + END diff --git a/dspl/liblapack/SRC/cgetrf.f b/dspl/liblapack/SRC/cgetrf.f new file mode 100644 index 0000000..4e72995 --- /dev/null +++ b/dspl/liblapack/SRC/cgetrf.f @@ -0,0 +1,225 @@ +*> \brief \b CGETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETRF computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGETRF2, CLASWP, CTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL CGETRF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL CGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL CGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CGETRF +* + END diff --git a/dspl/liblapack/SRC/cgetrf2.f b/dspl/liblapack/SRC/cgetrf2.f new file mode 100644 index 0000000..eda77e2 --- /dev/null +++ b/dspl/liblapack/SRC/cgetrf2.f @@ -0,0 +1,274 @@ +*> \brief \b CGETRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE CGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETRF2 computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = min(m,n)/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> [ A11 ] +*> The subroutine calls itself to factor [ --- ], +*> [ A12 ] +*> [ A12 ] +*> do the swaps on [ --- ], solve A12, update A22, +*> [ A22 ] +*> +*> then calls itself to factor A22 and do the swaps on A21. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE CGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + REAL SFMIN + COMPLEX TEMP + INTEGER I, IINFO, N1, N2 +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ICAMAX + EXTERNAL SLAMCH, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CSCAL, CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN + + IF ( M.EQ.1 ) THEN +* +* Use unblocked code for one row case +* Just need to handle IPIV and INFO +* + IPIV( 1 ) = 1 + IF ( A(1,1).EQ.ZERO ) + $ INFO = 1 +* + ELSE IF( N.EQ.1 ) THEN +* +* Use unblocked code for one column case +* +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* +* Find pivot and test for singularity +* + I = ICAMAX( M, A( 1, 1 ), 1 ) + IPIV( 1 ) = I + IF( A( I, 1 ).NE.ZERO ) THEN +* +* Apply the interchange +* + IF( I.NE.1 ) THEN + TEMP = A( 1, 1 ) + A( 1, 1 ) = A( I, 1 ) + A( I, 1 ) = TEMP + END IF +* +* Compute elements 2:M of the column +* + IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN + CALL CSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 ) + ELSE + DO 10 I = 1, M-1 + A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 ) + 10 CONTINUE + END IF +* + ELSE + INFO = 1 + END IF +* + ELSE +* +* Use recursive code +* + N1 = MIN( M, N ) / 2 + N2 = N-N1 +* +* [ A11 ] +* Factor [ --- ] +* [ A21 ] +* + CALL CGETRF2( M, N1, A, LDA, IPIV, IINFO ) + + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* [ A12 ] +* Apply interchanges to [ --- ] +* [ A22 ] +* + CALL CLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 ) +* +* Solve A12 +* + CALL CTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA, + $ A( 1, N1+1 ), LDA ) +* +* Update A22 +* + CALL CGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA ) +* +* Factor A22 +* + CALL CGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ), + $ IINFO ) +* +* Adjust INFO and the pivot indices +* + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + N1 + DO 20 I = N1+1, MIN( M, N ) + IPIV( I ) = IPIV( I ) + N1 + 20 CONTINUE +* +* Apply interchanges to A21 +* + CALL CLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 ) +* + END IF + RETURN +* +* End of CGETRF2 +* + END diff --git a/dspl/liblapack/SRC/cgetri.f b/dspl/liblapack/SRC/cgetri.f new file mode 100644 index 0000000..bc83f74 --- /dev/null +++ b/dspl/liblapack/SRC/cgetri.f @@ -0,0 +1,262 @@ +*> \brief \b CGETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETRI computes the inverse of a matrix using the LU factorization +*> computed by CGETRF. +*> +*> This method inverts U and then computes inv(A) by solving the system +*> inv(A)*L = inv(U) for inv(A). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. +*> On exit, if INFO = 0, the inverse of the original matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from CGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimal performance LWORK >= N*NB, where NB is +*> the optimal blocksize returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +*> singular and its inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from CTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of CGETRI +* + END diff --git a/dspl/liblapack/SRC/cgetrs.f b/dspl/liblapack/SRC/cgetrs.f new file mode 100644 index 0000000..6dc63e7 --- /dev/null +++ b/dspl/liblapack/SRC/cgetrs.f @@ -0,0 +1,225 @@ +*> \brief \b CGETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETRS solves a system of linear equations +*> A * X = B, A**T * X = B, or A**H * X = B +*> with a general N-by-N matrix A using the LU factorization computed +*> by CGETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from CGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B or A**H * X = B. +* +* Solve U**T *X = B or U**H *X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Solve L**T *X = B, or L**H *X = B overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A, + $ LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of CGETRS +* + END diff --git a/dspl/liblapack/SRC/cgetsls.f b/dspl/liblapack/SRC/cgetsls.f new file mode 100644 index 0000000..e7c5d81 --- /dev/null +++ b/dspl/liblapack/SRC/cgetsls.f @@ -0,0 +1,497 @@ +* Definition: +* =========== +* +* SUBROUTINE CGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGETSLS solves overdetermined or underdetermined complex linear systems +*> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an undetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> A is overwritten by details of its QR or LQ +*> factorization as returned by CGEQR or CGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors. +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEsolve +* +* ===================================================================== + SUBROUTINE CGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TRAN + INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, + $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, + $ WSIZEO, WSIZEM, INFO2 + REAL ANRM, BIGNUM, BNRM, SMLNUM, DUM( 1 ) + COMPLEX TQ( 5 ), WORKQ( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANGE + EXTERNAL LSAME, ILAENV, SLABAD, SLAMCH, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CGEQR, CGEMQR, CLASCL, CLASET, + $ CTRTRS, XERBLA, CGELQ, CGEMLQ +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, INT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNK = MAX( MINMN, NRHS ) + TRAN = LSAME( TRANS, 'C' ) +* + LQUERY = ( LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. + $ LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size and minimum LWORK +* + IF( M.GE.N ) THEN + CALL CGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL CGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL CGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL CGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZM, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + ELSE + CALL CGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL CGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + END IF +* + IF( ( LWORK.LT.WSIZEM ).AND.( .NOT.LQUERY ) ) THEN + INFO = -10 + END IF +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGETSLS', -INFO ) + WORK( 1 ) = REAL( WSIZEO ) + RETURN + END IF + IF( LQUERY ) THEN + IF( LWORK.EQ.-1 ) WORK( 1 ) = REAL( WSIZEO ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + RETURN + END IF + IF( LWORK.LT.WSIZEO ) THEN + LW1 = TSZM + LW2 = LWM + ELSE + LW1 = TSZO + LW2 = LWO + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL CLASET( 'FULL', MAX( M, N ), NRHS, CZERO, CZERO, + $ B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', M, N, A, LDA, DUM ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL CLASET( 'F', MAXMN, NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF ( TRAN ) THEN + BROW = N + END IF + BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, DUM ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF ( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL CGEQR( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) + IF ( .NOT.TRAN ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL CGEMQR( 'L' , 'C', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'U', 'N', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) + IF( INFO.GT.0 ) THEN + RETURN + END IF + SCLLEN = N + ELSE +* +* Overdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL CTRTRS( 'U', 'C', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = CZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL CGEMQR( 'L', 'N', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL CGELQ( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TRAN ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'L', 'N', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL CGEMLQ( 'L', 'C', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL CGEMLQ( 'L', 'N', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL CTRTRS( 'L', 'C', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( TSZO + LWO ) + RETURN +* +* End of ZGETSLS +* + END diff --git a/dspl/liblapack/SRC/cggbak.f b/dspl/liblapack/SRC/cggbak.f new file mode 100644 index 0000000..3626ecb --- /dev/null +++ b/dspl/liblapack/SRC/cggbak.f @@ -0,0 +1,307 @@ +*> \brief \b CGGBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, +* LDV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* REAL LSCALE( * ), RSCALE( * ) +* COMPLEX V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGBAK forms the right or left eigenvectors of a complex generalized +*> eigenvalue problem A*x = lambda*B*x, by backward transformation on +*> the computed eigenvectors of the balanced pair of matrices output by +*> CGGBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N': do nothing, return immediately; +*> = 'P': do backward transformation for permutation only; +*> = 'S': do backward transformation for scaling only; +*> = 'B': do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to CGGBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by CGGBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the left side of A and B, as returned by CGGBAL. +*> \endverbatim +*> +*> \param[in] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the right side of A and B, as returned by CGGBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by CTGEVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the matrix V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. Ward, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ) + COMPLEX V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( N.EQ.0 .AND. IHI.EQ.0 .AND. ILO.NE.1 ) THEN + INFO = -4 + ELSE IF( N.GT.0 .AND. ( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -5 + ELSE IF( N.EQ.0 .AND. ILO.EQ.1 .AND. IHI.NE.0 ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -8 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL CSSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL CSSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of CGGBAK +* + END diff --git a/dspl/liblapack/SRC/cggbal.f b/dspl/liblapack/SRC/cggbal.f new file mode 100644 index 0000000..8c7324c --- /dev/null +++ b/dspl/liblapack/SRC/cggbal.f @@ -0,0 +1,572 @@ +*> \brief \b CGGBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, +* RSCALE, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. +* REAL LSCALE( * ), RSCALE( * ), WORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGBAL balances a pair of general complex matrices (A,B). This +*> involves, first, permuting A and B by similarity transformations to +*> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +*> elements on the diagonal; and second, applying a diagonal similarity +*> transformation to rows and columns ILO to IHI to make the rows +*> and columns as close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrices, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors in the +*> generalized eigenvalue problem A*x = lambda*B*x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A and B: +*> = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +*> and RSCALE(I) = 1.0 for i=1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the input matrix B. +*> On exit, B is overwritten by the balanced matrix. +*> If JOB = 'N', B is not referenced. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If P(j) is the index of the +*> row interchanged with row j, and D(j) is the scaling factor +*> applied to row j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If P(j) is the index of the +*> column interchanged with column j, and D(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (lwork) +*> lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +*> at least 1 when JOB = 'N' or 'P'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. WARD, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), WORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ICAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, SAXPY, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = ONE + LSCALE( 1 ) = ONE + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL CSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL CSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL CSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) THEN + TA = ZERO + GO TO 210 + END IF + TA = LOG10( CABS1( A( I, J ) ) ) / BASL +* + 210 CONTINUE + IF( B( I, J ).EQ.CZERO ) THEN + TB = ZERO + GO TO 220 + END IF + TB = LOG10( CABS1( B( I, J ) ) ) / BASL +* + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ICAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ICAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ICAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ICAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL CSSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL CSSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL CSSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL CSSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of CGGBAL +* + END diff --git a/dspl/liblapack/SRC/cgges.f b/dspl/liblapack/SRC/cgges.f new file mode 100644 index 0000000..988b8a8 --- /dev/null +++ b/dspl/liblapack/SRC/cgges.f @@ -0,0 +1,599 @@ +*> \brief CGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, +* SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, +* LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGES computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the generalized complex Schur +*> form (S, T), and optionally left and/or right Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T. The leading +*> columns of VSL and VSR then form an unitary basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> CGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0, and even for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if S +*> and T are upper triangular and, in addition, the diagonal elements +*> of T are non-negative real numbers. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue ALPHA(j)/BETA(j) is selected if +*> SELCTG(ALPHA(j),BETA(j)) is true. +*> +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+2 (See INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +*> j=1,...,N are the diagonals of the complex Schur form (A,B) +*> output by CGGES. The BETA(j) will be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in CHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in CTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN, + $ LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = MAX( 1, N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) ) + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'CUNMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: none needed) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL CTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CGGES +* + END diff --git a/dspl/liblapack/SRC/cgges3.f b/dspl/liblapack/SRC/cgges3.f new file mode 100644 index 0000000..876a26d --- /dev/null +++ b/dspl/liblapack/SRC/cgges3.f @@ -0,0 +1,597 @@ +*> \brief CGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGES3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, +* $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, +* $ WORK, LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGES3 computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the generalized complex Schur +*> form (S, T), and optionally left and/or right Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T. The leading +*> columns of VSL and VSR then form an unitary basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> CGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0, and even for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if S +*> and T are upper triangular and, in addition, the diagonal elements +*> of T are non-negative real numbers. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue ALPHA(j)/BETA(j) is selected if +*> SELCTG(ALPHA(j),BETA(j)) is true. +*> +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+2 (See INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +*> j=1,...,N are the diagonals of the complex Schur form (A,B) +*> output by CGGES3. The BETA(j) will be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in CHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in CTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, + $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHD3, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + EXTERNAL LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL CGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 1, N + INT ( WORK( 1 ) ) ) + CALL CUNMQR( 'L', 'C', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + IF( ILVSL ) THEN + CALL CUNGQR( N, N, N, VSL, LDVSL, WORK, WORK, -1, + $ IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + END IF + CALL CGGHD3( JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, INT ( WORK( 1 ) ) ) + IF( WANTST ) THEN + CALL CTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, + $ PVSL, PVSR, DIF, WORK, -1, IDUM, 1, IERR ) + LWKOPT = MAX( LWKOPT, INT ( WORK( 1 ) ) ) + END IF + WORK( 1 ) = CMPLX( LWKOPT ) + END IF + +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGES3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL CGGHD3( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK( IWRK ), LWORK+1-IWRK, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWRK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL CTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* + IF( ILVSL ) + $ CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = CMPLX( LWKOPT ) +* + RETURN +* +* End of CGGES3 +* + END diff --git a/dspl/liblapack/SRC/cggesx.f b/dspl/liblapack/SRC/cggesx.f new file mode 100644 index 0000000..74169ff --- /dev/null +++ b/dspl/liblapack/SRC/cggesx.f @@ -0,0 +1,718 @@ +*> \brief CGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, +* B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, +* LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, +* IWORK, LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SENSE, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, +* $ SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* REAL RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGESX computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the complex Schur form (S,T), +*> and, optionally, the left and/or right matrices of Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T; computes +*> a reciprocal condition number for the average of the selected +*> eigenvalues (RCONDE); and computes a reciprocal condition number for +*> the right and left deflating subspaces corresponding to the selected +*> eigenvalues (RCONDV). The leading columns of VSL and VSR then form +*> an orthonormal basis for the corresponding left and right eigenspaces +*> (deflating subspaces). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if T is +*> upper triangular with non-negative diagonal and S is upper +*> triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+3 see INFO below). +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N' : None are computed; +*> = 'E' : Computed for average of selected eigenvalues only; +*> = 'V' : Computed for selected deflating subspaces only; +*> = 'B' : Computed for both. +*> If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are +*> the diagonals of the complex Schur form (S,T). BETA(j) will +*> be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension ( 2 ) +*> If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +*> reciprocal condition numbers for the average of the selected +*> eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension ( 2 ) +*> If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +*> reciprocal condition number for the selected deflating +*> subspaces. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', +*> LWORK >= MAX(1,2*N,2*SDIM*(N-SDIM)), else +*> LWORK >= MAX(1,2*N). Note that 2*SDIM*(N-SDIM) <= N*N/2. +*> Note also that an error is only returned if +*> LWORK < MAX(1,2*N), but if SENSE = 'E' or 'V' or 'B' this may +*> not be large enough. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the bound on the optimal size of the WORK +*> array and the minimum size of the IWORK array, returns these +*> values as the first entries of the WORK and IWORK arrays, and +*> no error message related to LWORK or LIWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension ( 8*N ) +*> Real workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array WORK. +*> If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise +*> LIWORK >= N+2. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the bound on the optimal size of the +*> WORK array and the minimum size of the IWORK array, returns +*> these values as the first entries of the WORK and IWORK +*> arrays, and no error message related to LWORK or LIWORK is +*> issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in CHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in CTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, + $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, + $ LIWMIN, LWRK, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SMLNUM +* .. +* .. Local Arrays .. + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) + IF( WANTSN ) THEN + IJOB = 0 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0) THEN + MINWRK = 2*N + MAXWRK = N*(1 + ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, N*( 1 + + $ ILAENV( 1, 'CUNMQR', ' ', N, 1, N, -1 ) ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, N*( 1 + + $ ILAENV( 1, 'CUNGQR', ' ', N, 1, N, -1 ) ) ) + END IF + LWRK = MAXWRK + IF( IJOB.GE.1 ) + $ LWRK = MAX( LWRK, N*N/2 ) + ELSE + MINWRK = 1 + MAXWRK = 1 + LWRK = 1 + END IF + WORK( 1 ) = LWRK + IF( WANTSN .OR. N.EQ.0 ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 2 + END IF + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY) THEN + INFO = -24 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGESX', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) +* otherwise, need 1 ) +* + CALL CTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK, + $ IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-21 ) THEN +* +* not enough complex workspace +* + INFO = -21 + ELSE + IF( IJOB.EQ.1 .OR. IJOB.EQ.4 ) THEN + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + END IF + IF( IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + END IF + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CGGESX +* + END diff --git a/dspl/liblapack/SRC/cggev.f b/dspl/liblapack/SRC/cggev.f new file mode 100644 index 0000000..678a0e5 --- /dev/null +++ b/dspl/liblapack/SRC/cggev.f @@ -0,0 +1,558 @@ +*> \brief CGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGEV computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right generalized eigenvector v(j) corresponding to the +*> generalized eigenvalue lambda(j) of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left generalized eigenvector u(j) corresponding to the +*> generalized eigenvalues lambda(j) of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: =N+1: other then QZ iteration failed in SHGEQZ, +*> =N+2: error return from STGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKMIN, LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = MAX( 1, N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) ) + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'CUNMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* (Real Workspace: need 2*N) +* (Complex Workspace: need 2*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + 70 CONTINUE +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CGGEV +* + END diff --git a/dspl/liblapack/SRC/cggev3.f b/dspl/liblapack/SRC/cggev3.f new file mode 100644 index 0000000..f34b8f2 --- /dev/null +++ b/dspl/liblapack/SRC/cggev3.f @@ -0,0 +1,560 @@ +*> \brief CGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGEV3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGEV3 computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right generalized eigenvector v(j) corresponding to the +*> generalized eigenvalue lambda(j) of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left generalized eigenvector u(j) corresponding to the +*> generalized eigenvalues lambda(j) of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: =N+1: other then QZ iteration failed in SHGEQZ, +*> =N+2: error return from STGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexGEeigen +* +* ===================================================================== + SUBROUTINE CGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHD3, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, SLABAD, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + EXTERNAL LSAME, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL CGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( N, N+INT( WORK( 1 ) ) ) + CALL CUNMQR( 'L', 'C', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + IF( ILVL ) THEN + CALL CUNGQR( N, N, N, VL, LDVL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + END IF + IF( ILV ) THEN + CALL CGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + CALL CHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + ELSE + CALL CGGHD3( 'N', 'N', N, 1, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + CALL CHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + END IF + WORK( 1 ) = CMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGEV3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'E' )*SLAMCH( 'B' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHD3( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK( IWRK ), LWORK+1-IWRK, + $ IERR ) + ELSE + CALL CGGHD3( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* + IF( ILVL ) THEN + CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + 70 CONTINUE +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = CMPLX( LWKOPT ) + RETURN +* +* End of CGGEV3 +* + END diff --git a/dspl/liblapack/SRC/cggevx.f b/dspl/liblapack/SRC/cggevx.f new file mode 100644 index 0000000..c5fb37e --- /dev/null +++ b/dspl/liblapack/SRC/cggevx.f @@ -0,0 +1,804 @@ +*> \brief CGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, +* ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, +* LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, +* WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* REAL LSCALE( * ), RCONDE( * ), RCONDV( * ), +* $ RSCALE( * ), RWORK( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGEVX computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B) the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> Optionally, it also computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +*> the eigenvalues (RCONDE), and reciprocal condition numbers for the +*> right eigenvectors (RCONDV). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> A * v(j) = lambda(j) * B * v(j) . +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> u(j)**H * A = lambda(j) * u(j)**H * B. +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Specifies the balance option to be performed: +*> = 'N': do not diagonally scale or permute; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> Computed reciprocal condition numbers will be for the +*> matrices after permuting and/or balancing. Permuting does +*> not change condition numbers (in exact arithmetic), but +*> balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': none are computed; +*> = 'E': computed for eigenvalues only; +*> = 'V': computed for eigenvectors only; +*> = 'B': computed for eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then A contains the first part of the complex Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then B contains the second part of the complex +*> Schur form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized +*> eigenvalues. +*> +*> Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio ALPHA/BETA. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector will be scaled so the largest component +*> will have abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector will be scaled so the largest component +*> will have abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If PL(j) is the index of the +*> row interchanged with row j, and DL(j) is the scaling +*> factor applied to row j, then +*> LSCALE(j) = PL(j) for j = 1,...,ILO-1 +*> = DL(j) for j = ILO,...,IHI +*> = PL(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If PR(j) is the index of the +*> column interchanged with column j, and DR(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = PR(j) for j = 1,...,ILO-1 +*> = DR(j) for j = ILO,...,IHI +*> = PR(j) for j = IHI+1,...,N +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is REAL +*> The one-norm of the balanced matrix A. +*> \endverbatim +*> +*> \param[out] BBNRM +*> \verbatim +*> BBNRM is REAL +*> The one-norm of the balanced matrix B. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension (N) +*> If SENSE = 'E' or 'B', the reciprocal condition numbers of +*> the eigenvalues, stored in consecutive elements of the array. +*> If SENSE = 'N' or 'V', RCONDE is not referenced. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension (N) +*> If SENSE = 'V' or 'B', the estimated reciprocal condition +*> numbers of the eigenvectors, stored in consecutive elements +*> of the array. If the eigenvalues cannot be reordered to +*> compute RCONDV(j), RCONDV(j) is set to 0; this can only occur +*> when the true value would be very small anyway. +*> If SENSE = 'N' or 'E', RCONDV is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> If SENSE = 'E', LWORK >= max(1,4*N). +*> If SENSE = 'V' or 'B', LWORK >= max(1,2*N*N+2*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (lrwork) +*> lrwork must be at least max(1,6*N) if BALANC = 'S' or 'B', +*> and at least max(1,2*N) otherwise. +*> Real workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+2) +*> If SENSE = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> If SENSE = 'N', BWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be correct +*> for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in CHGEQZ. +*> =N+2: error return from CTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing a matrix pair (A,B) includes, first, permuting rows and +*> columns to isolate eigenvalues, second, applying diagonal similarity +*> transformation to the rows and columns to make the rows and columns +*> as close in norm as possible. The computed reciprocal condition +*> numbers correspond to the balanced matrix. Permuting rows and columns +*> will not change the condition numbers (in exact arithmetic) but +*> diagonal scaling will. For further explanation of balancing, see +*> section 4.11.1.2 of LAPACK Users' Guide. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +*> +*> An approximate error bound for the angle between the i-th computed +*> eigenvector VL(i) or VR(i) is given by +*> +*> EPS * norm(ABNRM, BBNRM) / DIF(i). +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see section 4.11 of LAPACK User's Guide. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, + $ LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, + $ WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL LSCALE( * ), RCONDE( * ), RCONDV( * ), + $ RSCALE( * ), RWORK( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, NOSCL, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY, + $ CLASCL, CLASET, CTGEVC, CTGSNA, CUNGQR, CUNMQR, + $ SLABAD, SLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANGE, SLAMCH + EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + NOSCL = LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'P' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( NOSCL .OR. LSAME( BALANC,'S' ) .OR. + $ LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -15 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MINWRK = 2*N + IF( WANTSE ) THEN + MINWRK = 4*N + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*( N + 1) + END IF + MAXWRK = MINWRK + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'CUNMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N + + $ N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N, 0 ) ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -25 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = CLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = CLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Real Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) +* + CALL CGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ RWORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = CLANGE( '1', N, N, A, LDA, RWORK( 1 ) ) + IF( ILASCL ) THEN + RWORK( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + ABNRM = RWORK( 1 ) + END IF +* + BBNRM = CLANGE( '1', N, N, B, LDB, RWORK( 1 ) ) + IF( ILBSCL ) THEN + RWORK( 1 ) = BBNRM + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + BBNRM = RWORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to A +* (Complex Workspace: need N, prefer N*NB) +* + CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 90 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* CTGEVC: (Complex Workspace: need 2*N ) +* (Real Workspace: need 2*N ) +* CTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') +* (Integer Workspace: need N+2 ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK, + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (STGEVC) and estimate condition +* numbers (STGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to +* re-calculate eigenvectors and estimate the condition numbers +* one at a time. +* + DO 20 I = 1, N +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + BWORK( I ) = .TRUE. +* + IWRK = N + 1 + IWRK1 = IWRK + N +* + IF( WANTSE .OR. WANTSB ) THEN + CALL CTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, 1, M, + $ WORK( IWRK1 ), RWORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + CALL CTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), 1, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL CGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 50 JC = 1, N + TEMP = ZERO + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 30 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 40 CONTINUE + 50 CONTINUE + END IF +* + IF( ILVR ) THEN + CALL CGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 80 JC = 1, N + TEMP = ZERO + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 60 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 80 + TEMP = ONE / TEMP + DO 70 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 70 CONTINUE + 80 CONTINUE + END IF +* +* Undo scaling if necessary +* + 90 CONTINUE +* + IF( ILASCL ) + $ CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of CGGEVX +* + END diff --git a/dspl/liblapack/SRC/cggglm.f b/dspl/liblapack/SRC/cggglm.f new file mode 100644 index 0000000..336f419 --- /dev/null +++ b/dspl/liblapack/SRC/cggglm.f @@ -0,0 +1,349 @@ +*> \brief \b CGGGLM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGGLM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), +* $ X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGGLM solves a general Gauss-Markov linear model (GLM) problem: +*> +*> minimize || y ||_2 subject to d = A*x + B*y +*> x +*> +*> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +*> given N-vector. It is assumed that M <= N <= M+P, and +*> +*> rank(A) = M and rank( A B ) = N. +*> +*> Under these assumptions, the constrained equation is always +*> consistent, and there is a unique solution x and a minimal 2-norm +*> solution y, which is obtained using a generalized QR factorization +*> of the matrices (A, B) given by +*> +*> A = Q*(R), B = Q*T*Z. +*> (0) +*> +*> In particular, if matrix B is square nonsingular, then the problem +*> GLM is equivalent to the following weighted linear least squares +*> problem +*> +*> minimize || inv(B)*(d-A*x) ||_2 +*> x +*> +*> where inv(B) denotes the inverse of B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= N-M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the upper triangular part of the array A contains +*> the M-by-M upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> On entry, D is the left hand side of the GLM equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (M) +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (P) +*> +*> On exit, X and Y are the solutions of the GLM problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N+M+P). +*> For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> CGEQRF, CGERQF, CUNMQR and CUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with A in the +*> generalized QR factorization of the pair (A, B) is +*> singular, so that rank(A) < M; the least squares +*> solution could not be computed. +*> = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal +*> factor T associated with B in the generalized QR +*> factorization of the pair (A, B) is singular, so that +*> rank( A B ) < N; the least squares solution could not +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3, + $ NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CGGQRF, CTRTRS, CUNMQR, CUNMRQ, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = M + NP + MAX( N, P )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q**H*A = ( R11 ) M, Q**H*B*Z**H = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* unitary. +* + CALL CGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q**H*d = ( d1 ) M +* ( d2 ) N-M +* + CALL CUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK, + $ D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + IF( N.GT.M ) THEN + CALL CTRTRS( 'Upper', 'No transpose', 'Non unit', N-M, 1, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), N-M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* + CALL CCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) + END IF +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = CZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL CGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, CONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + IF( M.GT.0 ) THEN + CALL CTRTRS( 'Upper', 'No Transpose', 'Non unit', M, 1, A, LDA, + $ D, M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Copy D to X +* + CALL CCOPY( M, D, 1, X, 1 ) + END IF +* +* Backward transformation y = Z**H *y +* + CALL CUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of CGGGLM +* + END diff --git a/dspl/liblapack/SRC/cgghd3.f b/dspl/liblapack/SRC/cgghd3.f new file mode 100644 index 0000000..b86c9dc --- /dev/null +++ b/dspl/liblapack/SRC/cgghd3.f @@ -0,0 +1,900 @@ +*> \brief \b CGGHD3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGHD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* $ LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CGGHD3 reduces a pair of complex matrices (A,B) to generalized upper +*> Hessenberg form using unitary transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the unitary matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**H*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**H*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**H*x. +*> +*> The unitary matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> +*> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H +*> +*> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H +*> +*> If Q1 is the unitary matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then CGGHD3 reduces the original +*> problem to generalized Hessenberg form. +*> +*> This is a blocked variant of CGGHRD, using matrix-matrix +*> multiplications for parts of the computation to enhance performance. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> unitary matrix Z is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to CGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**H B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1, typically +*> from the QR factorization of B. +*> On exit, if COMPQ='I', the unitary matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1. +*> On exit, if COMPZ='I', the unitary matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= 6*N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg form and maintains B in +*> using a blocked variant of Moler and Stewart's original algorithm, +*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti +*> (BIT 2008). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ + CHARACTER*1 COMPQ2, COMPZ2 + INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K, + $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN, + $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ + REAL C + COMPLEX C1, C2, CTEMP, S, S1, S2, TEMP, TEMP1, TEMP2, + $ TEMP3 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGGHRD, CLARTG, CLASET, CUNM22, CROT, CGEMM, + $ CGEMV, CTRMV, CLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, CMPLX, CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'CGGHD3', ' ', N, ILO, IHI, -1 ) + LWKOPT = MAX( 6*N*NB, 1 ) + WORK( 1 ) = CMPLX( LWKOPT ) + INITQ = LSAME( COMPQ, 'I' ) + WANTQ = INITQ .OR. LSAME( COMPQ, 'V' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( WANTQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( WANTZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGHD3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( INITQ ) + $ CALL CLASET( 'All', N, N, CZERO, CONE, Q, LDQ ) + IF( INITZ ) + $ CALL CLASET( 'All', N, N, CZERO, CONE, Z, LDZ ) +* +* Zero out lower triangle of B. +* + IF( N.GT.1 ) + $ CALL CLASET( 'Lower', N-1, N-1, CZERO, CZERO, B(2, 1), LDB ) +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = CONE + RETURN + END IF +* +* Determine the blocksize. +* + NBMIN = ILAENV( 2, 'CGGHD3', ' ', N, ILO, IHI, -1 ) + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to use unblocked instead of blocked code. +* + NX = MAX( NB, ILAENV( 3, 'CGGHD3', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF( LWORK.LT.LWKOPT ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'CGGHD3', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.6*N*NBMIN ) THEN + NB = LWORK / ( 6*N ) + ELSE + NB = 1 + END IF + END IF + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + JCOL = ILO +* + ELSE +* +* Use blocked code +* + KACC22 = ILAENV( 16, 'CGGHD3', ' ', N, ILO, IHI, -1 ) + BLK22 = KACC22.EQ.2 + DO JCOL = ILO, IHI-2, NB + NNB = MIN( NB, IHI-JCOL-1 ) +* +* Initialize small unitary factors that will hold the +* accumulated Givens rotations in workspace. +* N2NB denotes the number of 2*NNB-by-2*NNB factors +* NBLST denotes the (possibly smaller) order of the last +* factor. +* + N2NB = ( IHI-JCOL-1 ) / NNB - 1 + NBLST = IHI - JCOL - N2NB*NNB + CALL CLASET( 'All', NBLST, NBLST, CZERO, CONE, WORK, NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL CLASET( 'All', 2*NNB, 2*NNB, CZERO, CONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. +* + DO J = JCOL, JCOL+NNB-1 +* +* Reduce Jth column of A. Store cosines and sines in Jth +* column of A and B, respectively. +* + DO I = IHI, J+2, -1 + TEMP = A( I-1, J ) + CALL CLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) ) + A( I, J ) = CMPLX( C ) + B( I, J ) = S + END DO +* +* Accumulate Givens rotations into workspace array. +* + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + CTEMP = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = CTEMP*TEMP - S*WORK( JJ ) + WORK( JJ ) = CONJG( S )*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + CTEMP = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = CTEMP*TEMP - S*WORK( JJ ) + WORK( JJ ) = CONJG( S )*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO +* +* TOP denotes the number of top rows in A and B that will +* not be updated during the next steps. +* + IF( JCOL.LE.2 ) THEN + TOP = 0 + ELSE + TOP = JCOL + END IF +* +* Propagate transformations through B and replace stored +* left sines/cosines by right sines/cosines. +* + DO JJ = N, J+1, -1 +* +* Update JJth column of B. +* + DO I = MIN( JJ+1, IHI ), J+2, -1 + CTEMP = A( I, J ) + S = B( I, J ) + TEMP = B( I, JJ ) + B( I, JJ ) = CTEMP*TEMP - CONJG( S )*B( I-1, JJ ) + B( I-1, JJ ) = S*TEMP + CTEMP*B( I-1, JJ ) + END DO +* +* Annihilate B( JJ+1, JJ ). +* + IF( JJ.LT.IHI ) THEN + TEMP = B( JJ+1, JJ+1 ) + CALL CLARTG( TEMP, B( JJ+1, JJ ), C, S, + $ B( JJ+1, JJ+1 ) ) + B( JJ+1, JJ ) = CZERO + CALL CROT( JJ-TOP, B( TOP+1, JJ+1 ), 1, + $ B( TOP+1, JJ ), 1, C, S ) + A( JJ+1, J ) = CMPLX( C ) + B( JJ+1, J ) = -CONJG( S ) + END IF + END DO +* +* Update A by transformations from right. +* + JJ = MOD( IHI-J-1, 3 ) + DO I = IHI-J-3, JJ+1, -3 + CTEMP = A( J+1+I, J ) + S = -B( J+1+I, J ) + C1 = A( J+2+I, J ) + S1 = -B( J+2+I, J ) + C2 = A( J+3+I, J ) + S2 = -B( J+3+I, J ) +* + DO K = TOP+1, IHI + TEMP = A( K, J+I ) + TEMP1 = A( K, J+I+1 ) + TEMP2 = A( K, J+I+2 ) + TEMP3 = A( K, J+I+3 ) + A( K, J+I+3 ) = C2*TEMP3 + CONJG( S2 )*TEMP2 + TEMP2 = -S2*TEMP3 + C2*TEMP2 + A( K, J+I+2 ) = C1*TEMP2 + CONJG( S1 )*TEMP1 + TEMP1 = -S1*TEMP2 + C1*TEMP1 + A( K, J+I+1 ) = CTEMP*TEMP1 + CONJG( S )*TEMP + A( K, J+I ) = -S*TEMP1 + CTEMP*TEMP + END DO + END DO +* + IF( JJ.GT.0 ) THEN + DO I = JJ, 1, -1 + C = DBLE( A( J+1+I, J ) ) + CALL CROT( IHI-TOP, A( TOP+1, J+I+1 ), 1, + $ A( TOP+1, J+I ), 1, C, + $ -CONJG( B( J+1+I, J ) ) ) + END DO + END IF +* +* Update (J+1)th column of A by transformations from left. +* + IF ( J .LT. JCOL + NNB - 1 ) THEN + LEN = 1 + J - JCOL +* +* Multiply with the trailing accumulated unitary +* matrix, which takes the form +* +* [ U11 U12 ] +* U = [ ], +* [ U21 U22 ] +* +* where U21 is a LEN-by-LEN matrix and U12 is lower +* triangular. +* + JROW = IHI - NBLST + 1 + CALL CGEMV( 'Conjugate', NBLST, LEN, CONE, WORK, + $ NBLST, A( JROW, J+1 ), 1, CZERO, + $ WORK( PW ), 1 ) + PPW = PW + LEN + DO I = JROW, JROW+NBLST-LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL CTRMV( 'Lower', 'Conjugate', 'Non-unit', + $ NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST, + $ WORK( PW+LEN ), 1 ) + CALL CGEMV( 'Conjugate', LEN, NBLST-LEN, CONE, + $ WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST, + $ A( JROW+NBLST-LEN, J+1 ), 1, CONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+NBLST-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO +* +* Multiply with the other accumulated unitary +* matrices, which take the form +* +* [ U11 U12 0 ] +* [ ] +* U = [ U21 U22 0 ], +* [ ] +* [ 0 0 I ] +* +* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity +* matrix, U21 is a LEN-by-LEN upper triangular matrix +* and U12 is an NNB-by-NNB lower triangular matrix. +* + PPWO = 1 + NBLST*NBLST + J0 = JROW - NNB + DO JROW = J0, JCOL+1, -NNB + PPW = PW + LEN + DO I = JROW, JROW+NNB-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + PPW = PW + DO I = JROW+NNB, JROW+NNB+LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL CTRMV( 'Upper', 'Conjugate', 'Non-unit', LEN, + $ WORK( PPWO + NNB ), 2*NNB, WORK( PW ), + $ 1 ) + CALL CTRMV( 'Lower', 'Conjugate', 'Non-unit', NNB, + $ WORK( PPWO + 2*LEN*NNB ), + $ 2*NNB, WORK( PW + LEN ), 1 ) + CALL CGEMV( 'Conjugate', NNB, LEN, CONE, + $ WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1, + $ CONE, WORK( PW ), 1 ) + CALL CGEMV( 'Conjugate', LEN, NNB, CONE, + $ WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB, + $ A( JROW+NNB, J+1 ), 1, CONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+LEN+NNB-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO +* +* Apply accumulated unitary matrices to A. +* + COLA = N - JCOL - NNB + 1 + J = IHI - NBLST + 1 + CALL CGEMM( 'Conjugate', 'No Transpose', NBLST, + $ COLA, NBLST, CONE, WORK, NBLST, + $ A( J, JCOL+NNB ), LDA, CZERO, WORK( PW ), + $ NBLST ) + CALL CLACPY( 'All', NBLST, COLA, WORK( PW ), NBLST, + $ A( J, JCOL+NNB ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of +* +* [ U11 U12 ] +* U = [ ] +* [ U21 U22 ], +* +* where all blocks are NNB-by-NNB, U21 is upper +* triangular and U12 is lower triangular. +* + CALL CUNM22( 'Left', 'Conjugate', 2*NNB, COLA, NNB, + $ NNB, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL CGEMM( 'Conjugate', 'No Transpose', 2*NNB, + $ COLA, 2*NNB, CONE, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, CZERO, WORK( PW ), + $ 2*NNB ) + CALL CLACPY( 'All', 2*NNB, COLA, WORK( PW ), 2*NNB, + $ A( J, JCOL+NNB ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* +* Apply accumulated unitary matrices to Q. +* + IF( WANTQ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL CGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, CONE, Q( TOPQ, J ), LDQ, + $ WORK, NBLST, CZERO, WORK( PW ), NH ) + CALL CLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL CUNM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Q( TOPQ, J ), LDQ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL CGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, CONE, Q( TOPQ, J ), LDQ, + $ WORK( PPWO ), 2*NNB, CZERO, WORK( PW ), + $ NH ) + CALL CLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Accumulate right Givens rotations if required. +* + IF ( WANTZ .OR. TOP.GT.0 ) THEN +* +* Initialize small unitary factors that will hold the +* accumulated Givens rotations in workspace. +* + CALL CLASET( 'All', NBLST, NBLST, CZERO, CONE, WORK, + $ NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL CLASET( 'All', 2*NNB, 2*NNB, CZERO, CONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Accumulate Givens rotations into workspace array. +* + DO J = JCOL, JCOL+NNB-1 + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + CTEMP = A( I, J ) + A( I, J ) = CZERO + S = B( I, J ) + B( I, J ) = CZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = CTEMP*TEMP - + $ CONJG( S )*WORK( JJ ) + WORK( JJ ) = S*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + CTEMP = A( I, J ) + A( I, J ) = CZERO + S = B( I, J ) + B( I, J ) = CZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = CTEMP*TEMP - + $ CONJG( S )*WORK( JJ ) + WORK( JJ ) = S*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END DO + ELSE +* + CALL CLASET( 'Lower', IHI - JCOL - 1, NNB, CZERO, CZERO, + $ A( JCOL + 2, JCOL ), LDA ) + CALL CLASET( 'Lower', IHI - JCOL - 1, NNB, CZERO, CZERO, + $ B( JCOL + 2, JCOL ), LDB ) + END IF +* +* Apply accumulated unitary matrices to A and B. +* + IF ( TOP.GT.0 ) THEN + J = IHI - NBLST + 1 + CALL CGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, CONE, A( 1, J ), LDA, + $ WORK, NBLST, CZERO, WORK( PW ), TOP ) + CALL CLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL CUNM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ A( 1, J ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL CGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, CONE, A( 1, J ), LDA, + $ WORK( PPWO ), 2*NNB, CZERO, + $ WORK( PW ), TOP ) + CALL CLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* + J = IHI - NBLST + 1 + CALL CGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, CONE, B( 1, J ), LDB, + $ WORK, NBLST, CZERO, WORK( PW ), TOP ) + CALL CLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL CUNM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ B( 1, J ), LDB, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL CGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, CONE, B( 1, J ), LDB, + $ WORK( PPWO ), 2*NNB, CZERO, + $ WORK( PW ), TOP ) + CALL CLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Apply accumulated unitary matrices to Z. +* + IF( WANTZ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL CGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, CONE, Z( TOPQ, J ), LDZ, + $ WORK, NBLST, CZERO, WORK( PW ), NH ) + CALL CLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL CUNM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Z( TOPQ, J ), LDZ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL CGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, CONE, Z( TOPQ, J ), LDZ, + $ WORK( PPWO ), 2*NNB, CZERO, WORK( PW ), + $ NH ) + CALL CLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO + END IF +* +* Use unblocked code to reduce the rest of the matrix +* Avoid re-initialization of modified Q and Z. +* + COMPQ2 = COMPQ + COMPZ2 = COMPZ + IF ( JCOL.NE.ILO ) THEN + IF ( WANTQ ) + $ COMPQ2 = 'V' + IF ( WANTZ ) + $ COMPZ2 = 'V' + END IF +* + IF ( JCOL.LT.IHI ) + $ CALL CGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, IERR ) + WORK( 1 ) = CMPLX( LWKOPT ) +* + RETURN +* +* End of CGGHD3 +* + END diff --git a/dspl/liblapack/SRC/cgghrd.f b/dspl/liblapack/SRC/cgghrd.f new file mode 100644 index 0000000..e615e3f --- /dev/null +++ b/dspl/liblapack/SRC/cgghrd.f @@ -0,0 +1,361 @@ +*> \brief \b CGGHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGHRD reduces a pair of complex matrices (A,B) to generalized upper +*> Hessenberg form using unitary transformations, where A is a +*> general matrix and B is upper triangular. The form of the generalized +*> eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the unitary matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**H*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**H*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**H*x. +*> +*> The unitary matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H +*> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H +*> If Q1 is the unitary matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then CGGHRD reduces the original +*> problem to generalized Hessenberg form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> unitary matrix Z is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to CGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**H B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1, typically +*> from the QR factorization of B. +*> On exit, if COMPQ='I', the unitary matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1. +*> On exit, if COMPZ='I', the unitary matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg and B to triangular form by +*> an unblocked reduction, as described in _Matrix_Computations_, +*> by Golub and van Loan (Johns Hopkins Press). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + REAL C + COMPLEX CTEMP, S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CLASET, CROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + CTEMP = A( JROW-1, JCOL ) + CALL CLARTG( CTEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = CZERO + CALL CROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL CROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL CROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, + $ CONJG( S ) ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + CTEMP = B( JROW, JROW ) + CALL CLARTG( CTEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = CZERO + CALL CROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL CROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL CROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of CGGHRD +* + END diff --git a/dspl/liblapack/SRC/cgglse.f b/dspl/liblapack/SRC/cgglse.f new file mode 100644 index 0000000..b84dbec --- /dev/null +++ b/dspl/liblapack/SRC/cgglse.f @@ -0,0 +1,355 @@ +*> \brief CGGLSE solves overdetermined or underdetermined systems for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGLSE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), C( * ), D( * ), +* $ WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGLSE solves the linear equality-constrained least squares (LSE) +*> problem: +*> +*> minimize || c - A*x ||_2 subject to B*x = d +*> +*> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +*> M-vector, and d is a given P-vector. It is assumed that +*> P <= N <= M+P, and +*> +*> rank(B) = P and rank( (A) ) = N. +*> ( (B) ) +*> +*> These conditions ensure that the LSE problem has a unique solution, +*> which is obtained using a generalized RQ factorization of the +*> matrices (B, A) given by +*> +*> B = (0 R)*Q, A = Z*T*Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. 0 <= P <= N <= M+P. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the upper triangle of the subarray B(1:P,N-P+1:N) +*> contains the P-by-P upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (M) +*> On entry, C contains the right hand side vector for the +*> least squares part of the LSE problem. +*> On exit, the residual sum of squares for the solution +*> is given by the sum of squares of elements N-P+1 to M of +*> vector C. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (P) +*> On entry, D contains the right hand side vector for the +*> constrained equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On exit, X is the solution of the LSE problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M+N+P). +*> For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> CGEQRF, CGERQF, CUNMQR and CUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with B in the +*> generalized RQ factorization of the pair (B, A) is +*> singular, so that rank(B) < P; the least squares +*> solution could not be computed. +*> = 2: the (N-P) by (N-P) part of the upper trapezoidal factor +*> T associated with A in the generalized RQ factorization +*> of the pair (B, A) is singular, so that +*> rank( (A) ) < N; the least squares solution could not +*> ( (B) ) +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsolve +* +* ===================================================================== + SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKMIN, LWKOPT, MN, NB, NB1, NB2, NB3, + $ NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGGRQF, CTRMV, CTRTRS, + $ CUNMQR, CUNMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = P + MN + MAX( M, N )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q**H = ( 0 T12 ) P Z**H*A*Q**H = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* unitary. +* + CALL CGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z**H *c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL CUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA, + $ WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ), + $ LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + IF( P.GT.0 ) THEN + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', P, 1, + $ B( 1, N-P+1 ), LDB, D, P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* +* Put the solution in X +* + CALL CCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Update c1 +* + CALL CGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, + $ D, 1, CONE, C, 1 ) + END IF +* +* Solve R11*x1 = c1 for x1 +* + IF( N.GT.P ) THEN + CALL CTRTRS( 'Upper', 'No transpose', 'Non-unit', N-P, 1, + $ A, LDA, C, N-P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Put the solutions in X +* + CALL CCOPY( N-P, C, 1, X, 1 ) + END IF +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + IF( NR.GT.0 ) + $ CALL CGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + IF( NR.GT.0 ) THEN + CALL CTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL CAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 ) + END IF +* +* Backward transformation x = Q**H*x +* + CALL CUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB, + $ WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of CGGLSE +* + END diff --git a/dspl/liblapack/SRC/cggqrf.f b/dspl/liblapack/SRC/cggqrf.f new file mode 100644 index 0000000..6d3496b --- /dev/null +++ b/dspl/liblapack/SRC/cggqrf.f @@ -0,0 +1,299 @@ +*> \brief \b CGGQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGQRF computes a generalized QR factorization of an N-by-M matrix A +*> and an N-by-P matrix B: +*> +*> A = Q*R, B = Q*T*Z, +*> +*> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, +*> and R and T assume one of the forms: +*> +*> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +*> ( 0 ) N-M N M-N +*> M +*> +*> where R11 is upper triangular, and +*> +*> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +*> P-N N ( T21 ) P +*> P +*> +*> where T12 or T21 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GQR factorization +*> of A and B implicitly gives the QR factorization of inv(B)*A: +*> +*> inv(B)*A = Z**H * (inv(T)*R) +*> +*> where inv(B) denotes the inverse of the matrix B, and Z' denotes the +*> conjugate transpose of matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(N,M)-by-M upper trapezoidal matrix R (R is +*> upper triangular if N >= M); the elements below the diagonal, +*> with the array TAUA, represent the unitary matrix Q as a +*> product of min(N,M) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is COMPLEX array, dimension (min(N,M)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)-th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T; the remaining +*> elements, with the array TAUB, represent the unitary +*> matrix Z as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is COMPLEX array, dimension (min(N,P)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the QR factorization +*> of an N-by-M matrix, NB2 is the optimal blocksize for the +*> RQ factorization of an N-by-P matrix, and NB3 is the optimal +*> blocksize for a call of CUNMQR. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(n,m). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**H +*> +*> where taua is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine CUNGQR. +*> To use Q to update another matrix, use LAPACK subroutine CUNMQR. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(n,p). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**H +*> +*> where taub is a complex scalar, and v is a complex vector with +*> v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +*> B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine CUNGRQ. +*> To use Z to update another matrix, use LAPACK subroutine CUNMRQ. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGERQF, CUNMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'CGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL CGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q**H*B. +* + CALL CUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, + $ LDA, TAUA, B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL CGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of CGGQRF +* + END diff --git a/dspl/liblapack/SRC/cggrqf.f b/dspl/liblapack/SRC/cggrqf.f new file mode 100644 index 0000000..cad1cc0 --- /dev/null +++ b/dspl/liblapack/SRC/cggrqf.f @@ -0,0 +1,299 @@ +*> \brief \b CGGRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGRQF computes a generalized RQ factorization of an M-by-N matrix A +*> and a P-by-N matrix B: +*> +*> A = R*Q, B = Z*T*Q, +*> +*> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary +*> matrix, and R and T assume one of the forms: +*> +*> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +*> N-M M ( R21 ) N +*> N +*> +*> where R12 or R21 is upper triangular, and +*> +*> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +*> ( 0 ) P-N P N-P +*> N +*> +*> where T11 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GRQ factorization +*> of A and B implicitly gives the RQ factorization of A*inv(B): +*> +*> A*inv(B) = (R*inv(T))*Z**H +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the +*> conjugate transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, if M <= N, the upper triangle of the subarray +*> A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +*> if M > N, the elements on and above the (M-N)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; the remaining +*> elements, with the array TAUA, represent the unitary +*> matrix Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(P,N)-by-N upper trapezoidal matrix T (T is +*> upper triangular if P >= N); the elements below the diagonal, +*> with the array TAUB, represent the unitary matrix Z as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is COMPLEX array, dimension (min(P,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the RQ factorization +*> of an M-by-N matrix, NB2 is the optimal blocksize for the +*> QR factorization of a P-by-N matrix, and NB3 is the optimal +*> blocksize for a call of CUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO=-i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**H +*> +*> where taua is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine CUNGRQ. +*> To use Q to update another matrix, use LAPACK subroutine CUNMRQ. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(p,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**H +*> +*> where taub is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +*> and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine CUNGQR. +*> To use Z to update another matrix, use LAPACK subroutine CUNMQR. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL CGEQRF, CGERQF, CUNMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'CGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL CGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q**H +* + CALL CUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL CGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of CGGRQF +* + END diff --git a/dspl/liblapack/SRC/cggsvd3.f b/dspl/liblapack/SRC/cggsvd3.f new file mode 100644 index 0000000..c9b4262 --- /dev/null +++ b/dspl/liblapack/SRC/cggsvd3.f @@ -0,0 +1,506 @@ +*> \brief CGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGSVD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL ALPHA( * ), BETA( * ), RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGSVD3 computes the generalized singular value decomposition (GSVD) +*> of an M-by-N complex matrix A and P-by-N complex matrix B: +*> +*> U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are unitary matrices. +*> Let K+L = the effective numerical rank of the +*> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper +*> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +*> matrices and of the following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the unitary +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**H. +*> If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also +*> equal to the CS decomposition of A and B. Furthermore, the GSVD can +*> be used to derive the solution of the eigenvalue problem: +*> A**H*A x = lambda* B**H*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, and D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains part of the triangular matrix R if +*> M-K-L < 0. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine CTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA REAL +*> TOLB REAL +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**H,B**H)**H. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup complexGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* +*> \par Further Details: +* ===================== +*> +*> CGGSVD3 replaces the deprecated subroutine CGGSVD. +*> +* ===================================================================== + SUBROUTINE CGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV, LQUERY + INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + EXTERNAL LSAME, CLANGE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGGSVP3, CTGSJA, SCOPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL CGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK, -1, INFO ) + LWKOPT = N + INT( WORK( 1 ) ) + LWKOPT = MAX( 2*N, LWKOPT ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = CMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVD3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = CLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = CLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL CGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), LWORK-N, INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL SCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + WORK( 1 ) = CMPLX( LWKOPT ) + RETURN +* +* End of CGGSVD3 +* + END diff --git a/dspl/liblapack/SRC/cggsvp3.f b/dspl/liblapack/SRC/cggsvp3.f new file mode 100644 index 0000000..8ea6036 --- /dev/null +++ b/dspl/liblapack/SRC/cggsvp3.f @@ -0,0 +1,579 @@ +*> \brief \b CGGSVP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGGSVP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, RWORK, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGGSVP3 computes unitary matrices U, V and Q such that +*> +*> N-K-L K L +*> U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**H*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> CGGSVD3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,M) +*> If JOBU = 'U', U contains the unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,P) +*> If JOBV = 'V', V contains the unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The subroutine uses LAPACK subroutine CGEQP3 for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +*> CGGSVP3 replaces the deprecated subroutine CGGSVP. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV, LQUERY + INTEGER I, J, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEQP3, CGEQR2, CGERQ2, CLACPY, CLAPMT, + $ CLASET, CUNG2R, CUNM2R, CUNMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL CGEQP3( P, N, B, LDB, IWORK, TAU, WORK, -1, RWORK, INFO ) + LWKOPT = INT( WORK ( 1 ) ) + IF( WANTV ) THEN + LWKOPT = MAX( LWKOPT, P ) + END IF + LWKOPT = MAX( LWKOPT, MIN( N, P ) ) + LWKOPT = MAX( LWKOPT, M ) + IF( WANTQ ) THEN + LWKOPT = MAX( LWKOPT, N ) + END IF + CALL CGEQP3( M, N, A, LDA, IWORK, TAU, WORK, -1, RWORK, INFO ) + LWKOPT = MAX( LWKOPT, INT( WORK ( 1 ) ) ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = CMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGGSVP3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL CGEQP3( P, N, B, LDB, IWORK, TAU, WORK, LWORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL CLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL CLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL CLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL CUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL CLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL CLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL CGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL CLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**H +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL CGEQP3( M, N-L, A, LDA, IWORK, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL CUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL CLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL CLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL CUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL CLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL CLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL CGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H +* + CALL CUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL CLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL CGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL CUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + WORK( 1 ) = CMPLX( LWKOPT ) + RETURN +* +* End of CGGSVP3 +* + END diff --git a/dspl/liblapack/SRC/cgsvj0.f b/dspl/liblapack/SRC/cgsvj0.f new file mode 100644 index 0000000..80e67a0 --- /dev/null +++ b/dspl/liblapack/SRC/cgsvj0.f @@ -0,0 +1,935 @@ +*> \brief \b CGSVJ0 pre-processor for the routine cgesvj. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGSVJ0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, +* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP +* REAL EPS, SFMIN, TOL +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) +* REAL SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGSVJ0 is called from CGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but +*> it does not check convergence (stopping criterion). Few tuning +*> parameters (marked by [TP]) are available for the implementer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * diag(D_onexit) represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The array D accumulates the scaling factors from the complex scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix A_onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is REAL +*> EPS = SLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is REAL +*> SFMIN = SLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> CGSVJ0 is used just to enable CGESVJ to call a simplified version of +*> itself to work on a submatrix of the original matrix. +*> +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) +*> +*> \par Bugs, Examples and Comments: +* ================================= +*> +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +* +* ===================================================================== + SUBROUTINE CGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, + $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* + IMPLICIT NONE +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP + REAL EPS, SFMIN, TOL + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) + REAL SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = (0.0E0, 0.0E0), CONE = (1.0E0, 0.0E0) ) +* .. +* .. Local Scalars .. + COMPLEX AAPQ, OMPQ + REAL AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, ROOTEPS, + $ ROOTSFMIN, ROOTTOL, SMALL, SN, T, TEMP1, THETA, + $ THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, NBL, + $ NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, CONJG, REAL, MIN, SIGN, SQRT +* .. +* .. External Functions .. + REAL SCNRM2 + COMPLEX CDOTC + INTEGER ISAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, LSAME, CDOTC, SCNRM2 +* .. +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL CCOPY, CROT, CSWAP, CAXPY +* from LAPACK + EXTERNAL CLASCL, CLASSQ, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( LDA.LT.M ) THEN + INFO = -5 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -8 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -10 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -13 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -14 + ELSE IF( LWORK.LT.M ) THEN + INFO = -16 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGSVJ0', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Row-cyclic Jacobi SVD algorithm with column pivoting .. +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + + SWBAND = 0 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if CGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm CGEJSV. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL CSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL CSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D(p) + D(p) = D(q) + D(q) = AAPQ + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Unfortunately, some BLAS implementations compute SNCRM2(M,A(1,p),1) +* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, SCNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF +* below should be replaced with "AAPP = SCNRM2( M, A(1,p), 1 )". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = SCNRM2( M, A( 1, p ), 1 ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + AAPQ = CDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAPP ) / AAQQ + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = CDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/AAPQ1 +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + CS = ONE + + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF ( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF ( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK, LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + CALL CAXPY( M, -AAPQ, WORK, 1, + $ A( 1, q ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SCNRM2( M, A( 1, q ), 1 ) + ELSE + T = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SCNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = CDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = CDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL CAXPY( M, -AAPQ, WORK, + $ 1, A( 1, q ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL CAXPY( M, -CONJG(AAPQ), + $ WORK, 1, A( 1, p ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SCNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SCNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SCNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( REAL( N ) )* + $ TOL ) .AND. ( REAL( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector SVA() of column norms. + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D( p ) + D( p ) = D( q ) + D( q ) = AAPQ + CALL CSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL CSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF CGSVJ0 +* .. + END diff --git a/dspl/liblapack/SRC/cgsvj1.f b/dspl/liblapack/SRC/cgsvj1.f new file mode 100644 index 0000000..bebcd5c --- /dev/null +++ b/dspl/liblapack/SRC/cgsvj1.f @@ -0,0 +1,705 @@ +*> \brief \b CGSVJ1 pre-processor for the routine cgesvj, applies Jacobi rotations targeting only particular pivots. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGSVJ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, +* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* REAL EPS, SFMIN, TOL +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) +* REAL SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGSVJ1 is called from CGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as CGESVJ does, but +*> it targets only particular pivots and it does not check convergence +*> (stopping criterion). Few tunning parameters (marked by [TP]) are +*> available for the implementer. +*> +*> Further Details +*> ~~~~~~~~~~~~~~~ +*> CGSVJ1 applies few sweeps of Jacobi rotations in the column space of +*> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) +*> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The +*> block-entries (tiles) of the (1,2) off-diagonal block are marked by the +*> [x]'s in the following scheme: +*> +*> | * * * [x] [x] [x]| +*> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +*> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> +*> In terms of the columns of A, the first N1 columns are rotated 'against' +*> the remaining N-N1 columns, trying to increase the angle between the +*> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is +*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. +*> The number of sweeps is given in NSWEEP and the orthogonality threshold +*> is given in TOL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> N1 specifies the 2 x 2 block partition, the first N1 columns are +*> rotated 'against' the remaining N-N1 columns of A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is REAL +*> EPS = SLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is REAL +*> SFMIN = SLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) +* +* ===================================================================== + SUBROUTINE CGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, + $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + REAL EPS, SFMIN, TOL + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) + REAL SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) +* .. +* .. Local Scalars .. + COMPLEX AAPQ, OMPQ + REAL AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, + $ ROOTEPS, ROOTSFMIN, ROOTTOL, SMALL, SN, T, + $ TEMP1, THETA, THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, igl, IERR, IJBLSK, + $ ISWROT, jbc, jgl, KBL, MVL, NOTROT, nblc, nblr, + $ p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, CONJG, REAL, MIN, SIGN, SQRT +* .. +* .. External Functions .. + REAL SCNRM2 + COMPLEX CDOTC + INTEGER ISAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, LSAME, CDOTC, SCNRM2 +* .. +* .. External Subroutines .. +* .. from BLAS + EXTERNAL CCOPY, CROT, CSWAP, CAXPY +* .. from LAPACK + EXTERNAL CLASCL, CLASSQ, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( N1.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.M ) THEN + INFO = -6 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -9 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -14 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -15 + ELSE IF( LWORK.LT.M ) THEN + INFO = -17 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGSVJ1', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN +* LARGE = BIG / SQRT( REAL( M*N ) ) + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Initialize the right singular vector matrix .. +* +* RSVEC = LSAME( JOBV, 'Y' ) +* + EMPTSW = N1*( N-N1 ) + NOTROT = 0 +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + KBL = MIN( 8, N ) + NBLR = N1 / KBL + IF( ( NBLR*KBL ).NE.N1 )NBLR = NBLR + 1 + +* .. the tiling is nblr-by-nblc [tiles] + + NBLC = ( N-N1 ) / KBL + IF( ( NBLC*KBL ).NE.( N-N1 ) )NBLC = NBLC + 1 + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if CGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm CGEJSV. +* +* +* | * * * [x] [x] [x]| +* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBLR +* + igl = ( ibr-1 )*KBL + 1 +* + +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* +* DO 2010 jbc = ibr + 1, NBL + DO 2010 jbc = 1, NBLC +* + jgl = ( jbc-1 )*KBL + N1 + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N1 ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = CDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( CDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = CDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL CROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL CROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL CCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL CAXPY( M, -AAPQ, WORK, + $ 1, A( 1, q ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL CCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL CLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL CLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL CAXPY( M, -CONJG(AAPQ), + $ WORK, 1, A( 1, p ), 1 ) + CALL CLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SCNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL CLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SCNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SCNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL CLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( REAL( N ) )* + $ TOL ) .AND. ( REAL( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector SVA() of column norms. + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D( p ) + D( p ) = D( q ) + D( q ) = AAPQ + CALL CSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL CSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* +* + RETURN +* .. +* .. END OF CGSVJ1 +* .. + END diff --git a/dspl/liblapack/SRC/cgtcon.f b/dspl/liblapack/SRC/cgtcon.f new file mode 100644 index 0000000..ed32605 --- /dev/null +++ b/dspl/liblapack/SRC/cgtcon.f @@ -0,0 +1,253 @@ +*> \brief \b CGTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTCON estimates the reciprocal of the condition number of a complex +*> tridiagonal matrix A using the LU factorization as computed by +*> CGTTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by CGTTRF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTcomputational +* +* ===================================================================== + SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGTTRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.CMPLX( ZERO ) ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL CGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L**H)*inv(U**H). +* + CALL CGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2, + $ IPIV, WORK, N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CGTCON +* + END diff --git a/dspl/liblapack/SRC/cgtrfs.f b/dspl/liblapack/SRC/cgtrfs.f new file mode 100644 index 0000000..22979a7 --- /dev/null +++ b/dspl/liblapack/SRC/cgtrfs.f @@ -0,0 +1,487 @@ +*> \brief \b CGTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), +* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is tridiagonal, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] DLF +*> \verbatim +*> DLF is COMPLEX array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by CGTTRF. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is COMPLEX array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DUF +*> \verbatim +*> DUF is COMPLEX array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CGTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTcomputational +* +* ===================================================================== + SUBROUTINE CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGTTRS, CLACN2, CLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK, N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DU( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DU( I ) )*CABS1( X( I+1, J ) ) + 30 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DL( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DL( I ) )*CABS1( X( I+1, J ) ) + 40 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N, + $ INFO ) + CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + DO 80 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 90 CONTINUE + CALL CGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of CGTRFS +* + END diff --git a/dspl/liblapack/SRC/cgtsv.f b/dspl/liblapack/SRC/cgtsv.f new file mode 100644 index 0000000..6063db0 --- /dev/null +++ b/dspl/liblapack/SRC/cgtsv.f @@ -0,0 +1,244 @@ +*> \brief CGTSV computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTSV solves the equation +*> +*> A*X = B, +*> +*> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with +*> partial pivoting. +*> +*> Note that the equation A**T *X = B may be solved by interchanging the +*> order of the arguments DU and DL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> On entry, DL must contain the (n-1) subdiagonal elements of +*> A. +*> On exit, DL is overwritten by the (n-2) elements of the +*> second superdiagonal of the upper triangular matrix U from +*> the LU factorization of A, in DL(1), ..., DL(n-2). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> On exit, D is overwritten by the n diagonal elements of U. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> On entry, DU must contain the (n-1) superdiagonal elements +*> of A. +*> On exit, DU is overwritten by the (n-1) elements of the first +*> superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero, and the solution +*> has not been computed. The factorization has not been +*> completed unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTsolve +* +* ===================================================================== + SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, K + COMPLEX MULT, TEMP, ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + DO 30 K = 1, N - 1 + IF( DL( K ).EQ.ZERO ) THEN +* +* Subdiagonal is zero, no elimination is required. +* + IF( D( K ).EQ.ZERO ) THEN +* +* Diagonal is zero: set INFO = K and return; a unique +* solution can not be found. +* + INFO = K + RETURN + END IF + ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN +* +* No row interchange required +* + MULT = DL( K ) / D( K ) + D( K+1 ) = D( K+1 ) - MULT*DU( K ) + DO 10 J = 1, NRHS + B( K+1, J ) = B( K+1, J ) - MULT*B( K, J ) + 10 CONTINUE + IF( K.LT.( N-1 ) ) + $ DL( K ) = ZERO + ELSE +* +* Interchange rows K and K+1 +* + MULT = D( K ) / DL( K ) + D( K ) = DL( K ) + TEMP = D( K+1 ) + D( K+1 ) = DU( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + DL( K ) = DU( K+1 ) + DU( K+1 ) = -MULT*DL( K ) + END IF + DU( K ) = TEMP + DO 20 J = 1, NRHS + TEMP = B( K, J ) + B( K, J ) = B( K+1, J ) + B( K+1, J ) = TEMP - MULT*B( K+1, J ) + 20 CONTINUE + END IF + 30 CONTINUE + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF +* +* Back solve with the matrix U from the factorization. +* + DO 50 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 40 K = N - 2, 1, -1 + B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )* + $ B( K+2, J ) ) / D( K ) + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTSV +* + END diff --git a/dspl/liblapack/SRC/cgtsvx.f b/dspl/liblapack/SRC/cgtsvx.f new file mode 100644 index 0000000..5ebb25d --- /dev/null +++ b/dspl/liblapack/SRC/cgtsvx.f @@ -0,0 +1,416 @@ +*> \brief CGTSVX computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, +* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), +* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTSVX uses the LU factorization to compute the solution to a complex +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +*> as A = L * U, where L is a product of permutation and unit lower +*> bidiagonal matrices and U is upper triangular with nonzeros in +*> only the main diagonal and first two superdiagonals. +*> +*> 2. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form +*> of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not +*> be modified. +*> = 'N': The matrix will be copied to DLF, DF, and DUF +*> and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The n diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in,out] DLF +*> \verbatim +*> DLF is COMPLEX array, dimension (N-1) +*> If FACT = 'F', then DLF is an input argument and on entry +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A as computed by CGTTRF. +*> +*> If FACT = 'N', then DLF is an output argument and on exit +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is COMPLEX array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DUF +*> \verbatim +*> DUF is COMPLEX array, dimension (N-1) +*> If FACT = 'F', then DUF is an input argument and on entry +*> contains the (n-1) elements of the first superdiagonal of U. +*> +*> If FACT = 'N', then DUF is an output argument and on exit +*> contains the (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> If FACT = 'F', then DU2 is an input argument and on entry +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> +*> If FACT = 'N', then DU2 is an output argument and on exit +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the LU factorization of A as +*> computed by CGTTRF. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the LU factorization of A; +*> row i of the matrix was interchanged with row IPIV(i). +*> IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +*> a row interchange was not required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has not been completed unless i = N, but the +*> factor U is exactly singular, so the solution +*> and error bounds could not be computed. +*> RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTsolve +* +* ===================================================================== + SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGT, SLAMCH + EXTERNAL LSAME, CLANGT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGTCON, CGTRFS, CGTTRF, CGTTRS, CLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL CCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL CCOPY( N-1, DL, 1, DLF, 1 ) + CALL CCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL CGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = CLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CGTSVX +* + END diff --git a/dspl/liblapack/SRC/cgttrf.f b/dspl/liblapack/SRC/cgttrf.f new file mode 100644 index 0000000..04a7a3f --- /dev/null +++ b/dspl/liblapack/SRC/cgttrf.f @@ -0,0 +1,243 @@ +*> \brief \b CGTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTTRF computes an LU factorization of a complex tridiagonal matrix A +*> using elimination with partial pivoting and row interchanges. +*> +*> The factorization has the form +*> A = L * U +*> where L is a product of permutation and unit lower bidiagonal +*> matrices and U is upper triangular with nonzeros in only the main +*> diagonal and first two superdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-1) multipliers that +*> define the matrix L from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of the +*> upper triangular matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[out] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> On exit, DU2 is overwritten by the (n-2) elements of the +*> second super-diagonal of U. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTcomputational +* +* ===================================================================== + SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX FACT, TEMP, ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(i) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( CABS1( D( I ) ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of CGTTRF +* + END diff --git a/dspl/liblapack/SRC/cgttrs.f b/dspl/liblapack/SRC/cgttrs.f new file mode 100644 index 0000000..64d675c --- /dev/null +++ b/dspl/liblapack/SRC/cgttrs.f @@ -0,0 +1,225 @@ +*> \brief \b CGTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTTRS solves one of the systems of equations +*> A * X = B, A**T * X = B, or A**H * X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by CGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTcomputational +* +* ===================================================================== + SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN + ITRANS = 1 + ELSE + ITRANS = 2 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of CGTTRS +* + END diff --git a/dspl/liblapack/SRC/cgtts2.f b/dspl/liblapack/SRC/cgtts2.f new file mode 100644 index 0000000..68d81ca --- /dev/null +++ b/dspl/liblapack/SRC/cgtts2.f @@ -0,0 +1,349 @@ +*> \brief \b CGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization computed by sgttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CGTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CGTTS2 solves one of the systems of equations +*> A * X = B, A**T * X = B, or A**H * X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by CGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITRANS +*> \verbatim +*> ITRANS is INTEGER +*> Specifies the form of the system of equations. +*> = 0: A * X = B (No transpose) +*> = 1: A**T * X = B (Transpose) +*> = 2: A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGTcomputational +* +* ===================================================================== + SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE IF( ITRANS.EQ.1 ) THEN +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 70 CONTINUE +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T * x = b. +* + DO 90 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE +* +* Solve L**T * x = b. +* + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE +* +* Solve A**H * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 130 CONTINUE +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 140 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )*B( I-1, J )- + $ CONJG( DU2( I-2 ) )*B( I-2, J ) ) / + $ CONJG( D( I ) ) + 140 CONTINUE +* +* Solve L**H * x = b. +* + DO 150 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 150 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 130 + END IF + ELSE + DO 180 J = 1, NRHS +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) / + $ CONJG( D( 2 ) ) + DO 160 I = 3, N + B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )* + $ B( I-1, J )-CONJG( DU2( I-2 ) )* + $ B( I-2, J ) ) / CONJG( D( I ) ) + 160 CONTINUE +* +* Solve L**H * x = b. +* + DO 170 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - CONJG( DL( I ) )* + $ B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 170 CONTINUE + 180 CONTINUE + END IF + END IF +* +* End of CGTTS2 +* + END diff --git a/dspl/liblapack/SRC/chb2st_kernels.f b/dspl/liblapack/SRC/chb2st_kernels.f new file mode 100644 index 0000000..25c9ab7 --- /dev/null +++ b/dspl/liblapack/SRC/chb2st_kernels.f @@ -0,0 +1,377 @@ +*> \brief \b CHB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> c, Wed Dec 7 08:22:40 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHB2ST_KERNELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHB2ST_KERNELS is an internal routine used by the CHETRD_HB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim +*> +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim +*> +*> \param[in] ST +*> \verbatim +*> ST is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] ED +*> \verbatim +*> ED is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is COMPLEX array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array. Workspace of size nb. +*> \endverbatim +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + COMPLEX CTMP +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CLARFX, CLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = CONJG( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = CONJG( A( OFDPOS, ST ) ) + CALL CLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + LM = ED - ST + 1 + CALL CLARFY( UPLO, LM, V( VPOS ), 1, + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.3 ) THEN +* + LM = ED - ST + 1 + CALL CLARFY( UPLO, LM, V( VPOS ), 1, + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL CLARFX( 'Left', LN, LM, V( VPOS ), + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS-NB, J1 ), LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = + $ CONJG( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = CONJG( A( DPOS-NB, J1 ) ) + CALL CLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL CLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + ENDIF +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL CLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + LM = ED - ST + 1 +* + CALL CLARFY( UPLO, LM, V( VPOS ), 1, + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.3 ) THEN + LM = ED - ST + 1 +* + CALL CLARFY( UPLO, LM, V( VPOS ), 1, + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL CLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL CLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL CLARFX( 'Left', LM, LN-1, V( VPOS ), + $ CONJG( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + ENDIF + ENDIF +* + RETURN +* +* END OF CHB2ST_KERNELS +* + END diff --git a/dspl/liblapack/SRC/chbev.f b/dspl/liblapack/SRC/chbev.f new file mode 100644 index 0000000..2e7022f --- /dev/null +++ b/dspl/liblapack/SRC/chbev.f @@ -0,0 +1,294 @@ +*> \brief CHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEV computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHBTRD, CLASCL, CSTEQR, SSCAL, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of CHBEV +* + END diff --git a/dspl/liblapack/SRC/chbev_2stage.f b/dspl/liblapack/SRC/chbev_2stage.f new file mode 100644 index 0000000..179fb5a --- /dev/null +++ b/dspl/liblapack/SRC/chbev_2stage.f @@ -0,0 +1,389 @@ +*> \brief CHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from zhbev_2stage.f, fortran z -> c, Sat Nov 5 23:18:20 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, INDRWK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, + $ CHETRD_2STAGE, CHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = REAL( AB( 1, 1 ) ) + ELSE + W( 1 ) = REAL( AB( KD+1, 1 ) ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHBEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chbevd.f b/dspl/liblapack/SRC/chbevd.f new file mode 100644 index 0000000..a54da43 --- /dev/null +++ b/dspl/liblapack/SRC/chbevd.f @@ -0,0 +1,401 @@ +*> \brief CHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHBTRD, CLACPY, CLASCL, CSTEDC, SSCAL, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDWRK + 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBEVD +* + END diff --git a/dspl/liblapack/SRC/chbevd_2stage.f b/dspl/liblapack/SRC/chbevd_2stage.f new file mode 100644 index 0000000..4002339 --- /dev/null +++ b/dspl/liblapack/SRC/chbevd_2stage.f @@ -0,0 +1,458 @@ +*> \brief CHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from zhbevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:17 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE, + $ LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CGEMM, CLACPY, + $ CLASCL, CSTEDC, CHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = MAX( N, LHTRD + LWTRD ) + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( AB( 1, 1 ) ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDHOUS = 1 + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + INDWK2 = INDWK + N*N + LLWK2 = LWORK - INDWK2 + 1 +* + CALL CHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chbevx.f b/dspl/liblapack/SRC/chbevx.f new file mode 100644 index 0000000..c7060bb --- /dev/null +++ b/dspl/liblapack/SRC/chbevx.f @@ -0,0 +1,553 @@ +*> \brief CHBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, +* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CHBTRD, CLACPY, CLASCL, CSTEIN, + $ CSTEQR, CSWAP, SCOPY, SSCAL, SSTEBZ, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = REAL( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = CTMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of CHBEVX +* + END diff --git a/dspl/liblapack/SRC/chbevx_2stage.f b/dspl/liblapack/SRC/chbevx_2stage.f new file mode 100644 index 0000000..ddc95e1 --- /dev/null +++ b/dspl/liblapack/SRC/chbevx_2stage.f @@ -0,0 +1,649 @@ +*> \brief CHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from zhbevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:22 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, +* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, +* Z, LDZ, WORK, LWORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, + $ Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, WORK, LWORK, RWORK, IWORK, + $ IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHB + EXTERNAL LSAME, SLAMCH, CLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CCOPY, + $ CGEMV, CLACPY, CLASCL, CSTEIN, CSTEQR, + $ CSWAP, CHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = REAL( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = REAL( CTMP1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N +* + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_HB2ST( 'N', JOBZ, UPLO, N, KD, AB, LDAB, + $ RWORK( INDD ), RWORK( INDE ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHBEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chbgst.f b/dspl/liblapack/SRC/chbgst.f new file mode 100644 index 0000000..c849998 --- /dev/null +++ b/dspl/liblapack/SRC/chbgst.f @@ -0,0 +1,1469 @@ +*> \brief \b CHBGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, +* LDX, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBGST reduces a complex Hermitian-definite banded generalized +*> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +*> such that C has the same bandwidth as A. +*> +*> B must have been previously factorized as S**H*S by CPBSTF, using a +*> split Cholesky factorization. A is overwritten by C = X**H*A*X, where +*> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the +*> bandwidth of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form the transformation matrix X; +*> = 'V': form X. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the transformed matrix X**H*A*X, stored in the same +*> format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in] BB +*> \verbatim +*> BB is COMPLEX array, dimension (LDBB,N) +*> The banded factor S from the split Cholesky factorization of +*> B, as returned by CPBSTF, stored in the first kb+1 rows of +*> the array. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,N) +*> If VECT = 'V', the n-by-n matrix X. +*> If VECT = 'N', the array X is not referenced. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. +*> LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + REAL ONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ), ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + REAL BII + COMPLEX RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGERC, CGERU, CLACGV, CLAR2V, CLARGV, CLARTG, + $ CLARTV, CLASET, CROT, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in CPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**H*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The real cosines and complex sines of the rotations are stored in +* the arrays RWORK and WORK, those of the 1st set in elements +* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( KB1, I ) ) + AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII + DO 20 J = I + 1, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )* + $ CONJG( AB( K-I+KA1, I ) ) - + $ CONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + + $ REAL( AB( KA1, I ) )* + $ BB( J-I+KB1, I )* + $ CONJG( BB( K-I+KB1, I ) ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ CONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERC( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), + $ LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL CLARTG( AB( K+1, I-K+KA ), RA1, + $ RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ CONJG( WORK( I-K+KA-M ) )* + $ AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) +* + CALL CLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), CONJG( WORK( J-M ) ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) +* + CALL CLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, J2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( 1, I ) ) + AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII + DO 250 J = I + 1, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*CONJG( AB( I-K+1, + $ K ) ) - CONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + REAL( AB( 1, I ) )* + $ BB( I-J+1, J )*CONJG( BB( I-K+1, + $ K ) ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ CONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERU( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL CLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ CONJG( WORK( I-K+KA-M ) )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2-M ), WORK( J2-M ), KA1 ) +* + CALL CLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ RWORK( J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2 ), WORK( J2 ), KA1 ) +* + CALL CLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, J2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( KB1, I ) ) + AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII + DO 500 J = I1, I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I + 1, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )* + $ CONJG( AB( I-K+KA1, K ) ) - + $ CONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + + $ REAL( AB( KA1, I ) )* + $ BB( I-J+KB1, J )* + $ CONJG( BB( I-K+KB1, K ) ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ CONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERU( NX, KBT, -CONE, X( 1, I ), 1, + $ BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL CLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ CONJG( WORK( I+K-KA ) )* + $ AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ), + $ KA1 ) +* + CALL CLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = REAL( BB( 1, I ) ) + AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII + DO 730 J = I1, I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I + 1, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*CONJG( AB( K-I+1, + $ I ) ) - CONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + REAL( AB( 1, I ) )* + $ BB( J-I+1, I )*CONJG( BB( K-I+1, + $ I ) ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ CONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL CGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ), + $ 1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL CLARTG( AB( KA1-K, I+K-KA ), RA1, + $ RWORK( I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ CONJG( WORK( I+K-KA ) )* + $ AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL CLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), CONJG( WORK( J ) ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL CLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), CONJG( WORK( M-KB+J ) ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of CHBGST +* + END diff --git a/dspl/liblapack/SRC/chbgv.f b/dspl/liblapack/SRC/chbgv.f new file mode 100644 index 0000000..d20372e --- /dev/null +++ b/dspl/liblapack/SRC/chbgv.f @@ -0,0 +1,287 @@ +*> \brief \b CHBGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, +* LDZ, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by CPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHBGST, CHBTRD, CPBSTF, CSTEQR, SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDWRK ), INFO ) + END IF + RETURN +* +* End of CHBGV +* + END diff --git a/dspl/liblapack/SRC/chbgvd.f b/dspl/liblapack/SRC/chbgvd.f new file mode 100644 index 0000000..a6d38f0 --- /dev/null +++ b/dspl/liblapack/SRC/chbgvd.f @@ -0,0 +1,407 @@ +*> \brief \b CHBGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, +* Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, +* $ LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by CPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, + $ LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK, + $ LLWK2, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSTERF, XERBLA, CGEMM, CHBGST, CHBTRD, CLACPY, + $ CPBSTF, CSTEDC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1+N + LRWMIN = 1+N + LIWMIN = 1 + ELSE IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 2 + LLRWK = LRWORK - INDWRK + 2 + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK, IINFO ) +* +* Reduce Hermitian band matrix to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHBGVD +* + END diff --git a/dspl/liblapack/SRC/chbgvx.f b/dspl/liblapack/SRC/chbgvx.f new file mode 100644 index 0000000..db4632e --- /dev/null +++ b/dspl/liblapack/SRC/chbgvx.f @@ -0,0 +1,535 @@ +*> \brief \b CHBGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, +* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, +* $ N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBGVX computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. Eigenvalues and +*> eigenvectors can be selected by specifying either all eigenvalues, +*> a range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by CPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> If JOBZ = 'V', the n-by-n matrix used in the reduction of +*> A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +*> and consequently C to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'N', +*> LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: then i eigenvectors failed to converge. Their +*> indices are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then CPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT + REAL TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CHBGST, CHBTRD, CLACPY, CPBSTF, + $ CSTEIN, CSTEQR, CSWAP, SCOPY, SSTEBZ, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( WANTZ .AND. LDQ.LT.N ) ) THEN + INFO = -12 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -14 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -16 + END IF + END IF + END IF + IF( INFO.EQ.0) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, RWORK, IINFO ) +* +* Solve the standard eigenvalue problem. +* Reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, +* call CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + DO 20 J = 1, M + CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of CHBGVX +* + END diff --git a/dspl/liblapack/SRC/chbtrd.f b/dspl/liblapack/SRC/chbtrd.f new file mode 100644 index 0000000..126cc12 --- /dev/null +++ b/dspl/liblapack/SRC/chbtrd.f @@ -0,0 +1,677 @@ +*> \brief \b CHBTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHBTRD reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form Q; +*> = 'V': form Q; +*> = 'U': update a matrix X, by forming X*Q. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if VECT = 'U', then Q must contain an N-by-N +*> matrix X; if VECT = 'N' or 'V', then Q need not be set. +*> +*> On exit: +*> if VECT = 'V', Q contains the N-by-N unitary matrix Q; +*> if VECT = 'U', Q contains the product X*Q; +*> if VECT = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by Linda Kaufman, Bell Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + REAL ABST + COMPLEX T, TEMP +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLAR2V, CLARGV, CLARTG, CLARTV, CLASET, + $ CROT, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The real cosines and complex sines of the plane rotations are +* stored in the arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( KD1, 1 ) = REAL( AB( KD1, 1 ) ) + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL CLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL CLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL CROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL CLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL CROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL CLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + CALL CLACGV( NR, WORK( J1 ), KD1 ) + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL CROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL CROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), CONJG( WORK( J ) ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), CONJG( WORK( J ) ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 100 I = 1, N - 1 + T = AB( KD, I+1 ) + ABST = ABS( T ) + AB( KD, I+1 ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( KD, I+2 ) = AB( KD, I+2 )*T + IF( WANTQ ) THEN + CALL CSCAL( N, CONJG( T ), Q( 1, I+1 ), 1 ) + END IF + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( 1, 1 ) = REAL( AB( 1, 1 ) ) + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL CLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL CLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL CROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL CLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL CROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL CLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* CLARTV or CROT is used +* + IF( NR.GT.0 ) THEN + CALL CLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL CLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL CROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL CROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 220 I = 1, N - 1 + T = AB( 2, I ) + ABST = ABS( T ) + AB( 2, I ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( 2, I+1 ) = AB( 2, I+1 )*T + IF( WANTQ ) THEN + CALL CSCAL( N, T, Q( 1, I+1 ), 1 ) + END IF + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of CHBTRD +* + END diff --git a/dspl/liblapack/SRC/checon.f b/dspl/liblapack/SRC/checon.f new file mode 100644 index 0000000..d305232 --- /dev/null +++ b/dspl/liblapack/SRC/checon.f @@ -0,0 +1,239 @@ +*> \brief \b CHECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHECON estimates the reciprocal of the condition number of a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL CHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHECON +* + END diff --git a/dspl/liblapack/SRC/checon_3.f b/dspl/liblapack/SRC/checon_3.f new file mode 100644 index 0000000..6427dd5 --- /dev/null +++ b/dspl/liblapack/SRC/checon_3.f @@ -0,0 +1,285 @@ +*> \brief \b CHECON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHECON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* COMPLEX A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHECON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian matrix A using the factorization +*> computed by CHETRF_RK or CHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver CHETRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by CHETRF_RK and CHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_RK or CHETRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRS_3, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHECON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL CHETRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHECON_3 +* + END diff --git a/dspl/liblapack/SRC/checon_rook.f b/dspl/liblapack/SRC/checon_rook.f new file mode 100644 index 0000000..0fe4ffe --- /dev/null +++ b/dspl/liblapack/SRC/checon_rook.f @@ -0,0 +1,253 @@ +*> \brief CHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorization obtained with one of the bounded diagonal pivoting methods (max 2 interchanges) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHECON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHECON_ROOK estimates the reciprocal of the condition number of a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRS_ROOK, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHECON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL CHETRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHECON_ROOK +* + END diff --git a/dspl/liblapack/SRC/cheequb.f b/dspl/liblapack/SRC/cheequb.f new file mode 100644 index 0000000..3c6085e --- /dev/null +++ b/dspl/liblapack/SRC/cheequb.f @@ -0,0 +1,343 @@ +*> \brief \b CHEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), WORK( * ) +* REAL S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEQUB computes row and column scalings intended to equilibrate a +*> Hermitian matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The N-by-N Hermitian matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexHEcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ) + REAL S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + REAL AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, INT, LOG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'CHEEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0E0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0E0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0E0 + SUMSQ = 0.0E0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0E0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0E0 + DO I = N+1, 2*N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL CLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = CABS1( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = SLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = SLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/cheev.f b/dspl/liblapack/SRC/cheev.f new file mode 100644 index 0000000..913c032 --- /dev/null +++ b/dspl/liblapack/SRC/cheev.f @@ -0,0 +1,298 @@ +*> \brief CHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEV computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N-1). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for CHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEeigen +* +* ===================================================================== + SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLASCL, CSTEQR, CUNGTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUNGTR to generate the unitary matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEEV +* + END diff --git a/dspl/liblapack/SRC/cheev_2stage.f b/dspl/liblapack/SRC/cheev_2stage.f new file mode 100644 index 0000000..a9d04ab --- /dev/null +++ b/dspl/liblapack/SRC/cheev_2stage.f @@ -0,0 +1,355 @@ +*> \brief CHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @generated from zheev_2stage.f, fortran z -> c, Sat Nov 5 23:18:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLASCL, CSTEQR, + $ CUNGTR, CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( A( 1, 1 ) ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUNGTR to generate the unitary matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/cheevd.f b/dspl/liblapack/SRC/cheevd.f new file mode 100644 index 0000000..ff89c29 --- /dev/null +++ b/dspl/liblapack/SRC/cheevd.f @@ -0,0 +1,398 @@ +*> \brief CHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, +* LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVD computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LOPT, LROPT, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CLACPY, CLASCL, CSTEDC, CUNMTR, SSCAL, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = MAX( LWMIN, N + + $ ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) ) + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + INDRWK = INDE + N + INDWK2 = INDWRK + N*N + LLWORK = LWORK - INDWRK + 1 + LLWRK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call CUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL CUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL CLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of CHEEVD +* + END diff --git a/dspl/liblapack/SRC/cheevd_2stage.f b/dspl/liblapack/SRC/cheevd_2stage.f new file mode 100644 index 0000000..6b31d4b --- /dev/null +++ b/dspl/liblapack/SRC/cheevd_2stage.f @@ -0,0 +1,455 @@ +*> \brief CHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @generated from zheevd_2stage.f, fortran z -> c, Sat Nov 5 23:18:14 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LRWMIN, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + + + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTERF, XERBLA, CLACPY, CLASCL, + $ CSTEDC, CUNMTR, CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LHTRD + LWTRD + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = REAL( A( 1, 1 ) ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call CUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL CUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL CLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/cheevr.f b/dspl/liblapack/SRC/cheevr.f new file mode 100644 index 0000000..0b055ba --- /dev/null +++ b/dspl/liblapack/SRC/cheevr.f @@ -0,0 +1,724 @@ +*> \brief CHEEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> CHEEVR first reduces the matrix A to tridiagonal form T with a call +*> to CHETRD. Then, whenever possible, CHEEVR calls CSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. CSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : CHEEVR calls CSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> CHEEVR calls SSTEBZ and CSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of CSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> CSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of CSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by CUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the max of the blocksize for CHETRD and for +*> CUNMTR as returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE CHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWKOPT, LWMIN, NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANSY, SLAMCH + EXTERNAL LSAME, ILAENV, CLANSY, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHETRD, CSSCAL, CSTEMR, CSTEIN, CSWAP, CUNMTR, + $ SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'CHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) + LWMIN = MAX( 1, 2*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or CSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in CHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDWK = INDTAU + N + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from CHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by CSTEMR (the SSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and CSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* SSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + CALL CHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or CSTEMR and CUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL CSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* Also call SSTEBZ and CSTEIN if CSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVR +* + END diff --git a/dspl/liblapack/SRC/cheevr_2stage.f b/dspl/liblapack/SRC/cheevr_2stage.f new file mode 100644 index 0000000..20a1cb3 --- /dev/null +++ b/dspl/liblapack/SRC/cheevr_2stage.f @@ -0,0 +1,779 @@ +*> \brief CHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @generated from zheevr_2stage.f, fortran z -> c, Sat Nov 5 23:18:11 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVR_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> CHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to CHETRD. Then, whenever possible, CHEEVR_2STAGE calls CSTEMR to compute +*> eigenspectrum using Relatively Robust Representations. CSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : CHEEVR_2STAGE calls CSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> CHEEVR_2STAGE calls SSTEBZ and CSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of CSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> CSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of CSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by CUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWMIN, NSPLIT, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ILAENV2STAGE + REAL SLAMCH, CLANSY + EXTERNAL LSAME, SLAMCH, CLANSY, ILAENV, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, + $ CHETRD_2STAGE, CSTEMR, CSTEIN, CSWAP, CUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'CHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or CSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in CHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDHOUS = INDTAU + N + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from CHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by CSTEMR (the SSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and CSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* CSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDRD ), + $ RWORK( INDRE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or CSTEMR and CUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL SCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL CSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* Also call SSTEBZ and CSTEIN if CSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CHEEVR_2STAGE +* + END diff --git a/dspl/liblapack/SRC/cheevx.f b/dspl/liblapack/SRC/cheevx.f new file mode 100644 index 0000000..e2a2c3d --- /dev/null +++ b/dspl/liblapack/SRC/cheevx.f @@ -0,0 +1,564 @@ +*> \brief CHEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise 2*N. +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the max of the blocksize for CHETRD and for +*> CUNMTR as returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +* ===================================================================== + SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LWKMIN, LWKOPT, NB, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, CLANHE + EXTERNAL LSAME, ILAENV, SLAMCH, CLANHE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, + $ CHETRD, CLACPY, CSTEIN, CSTEQR, CSWAP, CUNGTR, + $ CUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWKMIN = 1 + WORK( 1 ) = LWKMIN + ELSE + LWKMIN = 2*N + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL CHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL CUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEEVX +* + END diff --git a/dspl/liblapack/SRC/cheevx_2stage.f b/dspl/liblapack/SRC/cheevx_2stage.f new file mode 100644 index 0000000..59c2ae8 --- /dev/null +++ b/dspl/liblapack/SRC/cheevx_2stage.f @@ -0,0 +1,622 @@ +*> \brief CHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @generated from zheevx_2stage.f, fortran z -> c, Sat Nov 5 23:18:09 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, CLANHE + EXTERNAL LSAME, SLAMCH, CLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA, CSSCAL, + $ CLACPY, CSTEIN, CSTEQR, CSWAP, CUNGTR, CUNMTR, + $ CHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = REAL( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL CSSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL CHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDD ), + $ RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, WORK( INDWRK ), + $ LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or CUNGTR and CSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL CUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chegs2.f b/dspl/liblapack/SRC/chegs2.f new file mode 100644 index 0000000..68d2f66 --- /dev/null +++ b/dspl/liblapack/SRC/chegs2.f @@ -0,0 +1,296 @@ +*> \brief \b CHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factorization results obtained from cpotrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGS2 reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. +*> +*> B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H *A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored, and how B has been factorized. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + REAL AKK, BKK + COMPLEX CT +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHER2, CLACGV, CSSCAL, CTRMV, CTRSV, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL CLACGV( N-K, A( K, K+1 ), LDA ) + CALL CLACGV( N-K, B( K, K+1 ), LDB ) + CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CLACGV( N-K, B( K, K+1 ), LDB ) + CALL CTRSV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-K, B( K+1, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL CLACGV( N-K, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL CHER2( UPLO, N-K, -CONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL CTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL CTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL CHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL CSSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**H *A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL CLACGV( K-1, A( K, 1 ), LDA ) + CALL CTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, + $ B, LDB, A( K, 1 ), LDA ) + CT = HALF*AKK + CALL CLACGV( K-1, B( K, 1 ), LDB ) + CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL CHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL CLACGV( K-1, B( K, 1 ), LDB ) + CALL CSSCAL( K-1, BKK, A( K, 1 ), LDA ) + CALL CLACGV( K-1, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of CHEGS2 +* + END diff --git a/dspl/liblapack/SRC/chegst.f b/dspl/liblapack/SRC/chegst.f new file mode 100644 index 0000000..2f93372 --- /dev/null +++ b/dspl/liblapack/SRC/chegst.f @@ -0,0 +1,331 @@ +*> \brief \b CHEGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGST reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +*> +*> B must have been previously factorized as U**H*U or L*L**H by CPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**H*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE, HALF + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL CHEGS2, CHEMM, CHER2K, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CHEGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL CTRSM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K, K ), LDB, A( K, K+KB ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL CHER2K( UPLO, 'Conjugate transpose', N-K-KB+1, + $ KB, -CONE, A( K, K+KB ), LDA, + $ B( K, K+KB ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL CTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL CTRSM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K, K ), LDB, A( K+KB, K ), LDA ) + CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL CHER2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -CONE, A( K+KB, K ), LDA, + $ B( K+KB, K ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL CTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL CTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, CONE, B, LDB, A( 1, K ), LDA ) + CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL CHER2K( UPLO, 'No transpose', K-1, KB, CONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL CTRMM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', K-1, KB, CONE, B( K, K ), LDB, + $ A( 1, K ), LDA ) + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L**H*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL CTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, CONE, B, LDB, A( K, 1 ), LDA ) + CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL CHER2K( UPLO, 'Conjugate transpose', K-1, KB, + $ CONE, A( K, 1 ), LDA, B( K, 1 ), LDB, + $ ONE, A, LDA ) + CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL CTRMM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, K-1, CONE, B( K, K ), LDB, + $ A( K, 1 ), LDA ) + CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of CHEGST +* + END diff --git a/dspl/liblapack/SRC/chegv.f b/dspl/liblapack/SRC/chegv.f new file mode 100644 index 0000000..16b3a43 --- /dev/null +++ b/dspl/liblapack/SRC/chegv.f @@ -0,0 +1,321 @@ +*> \brief \b CHEGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N-1). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for CHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPOTRF or CHEEV returned an error code: +*> <= N: if INFO = i, CHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEeigen +* +* ===================================================================== + SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEV, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ. -1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEGV +* + END diff --git a/dspl/liblapack/SRC/chegv_2stage.f b/dspl/liblapack/SRC/chegv_2stage.f new file mode 100644 index 0000000..77bc7fc --- /dev/null +++ b/dspl/liblapack/SRC/chegv_2stage.f @@ -0,0 +1,379 @@ +*> \brief \b CHEGV_2STAGE +* +* @generated from zhegv_2stage.f, fortran z -> c, Sun Nov 6 13:09:52 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +*> sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPOTRF or CHEEV returned an error code: +*> <= N: if INFO = i, CHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHEGST, CPOTRF, CTRMM, CTRSM, + $ CHEEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'CHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, + $ WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CHEGV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chegvd.f b/dspl/liblapack/SRC/chegvd.f new file mode 100644 index 0000000..38b2341 --- /dev/null +++ b/dspl/liblapack/SRC/chegvd.f @@ -0,0 +1,412 @@ +*> \brief \b CHEGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian and B is also positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the Hermitian matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N + 1. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK >= 1. +*> If JOBZ = 'N' and N > 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPOTRF or CHEEVD returned an error code: +*> <= N: if INFO = i and JOBZ = 'N', then the algorithm +*> failed to converge; i off-diagonal elements of an +*> intermediate tridiagonal form did not converge to +*> zero; +*> if INFO = i and JOBZ = 'V', then the algorithm +*> failed to compute an eigenvalue while working on +*> the submatrix lying in rows and columns INFO/(N+1) +*> through mod(INFO,N+1); +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified so that no backsubstitution is performed if CHEEVD fails to +*> converge (NEIG in old code could be greater than N causing out of +*> bounds reference to A - reported by Ralf Meyer). Also corrected the +*> description of INFO and the test on ITYPE. Sven, 16 Feb 05. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +*> +* ===================================================================== + SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N*N + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, INFO ) + LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) ) + LROPT = MAX( REAL( LROPT ), REAL( RWORK( 1 ) ) ) + LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, N, CONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, N, CONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of CHEGVD +* + END diff --git a/dspl/liblapack/SRC/chegvx.f b/dspl/liblapack/SRC/chegvx.f new file mode 100644 index 0000000..bf153f5 --- /dev/null +++ b/dspl/liblapack/SRC/chegvx.f @@ -0,0 +1,476 @@ +*> \brief \b CHEGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHEGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, +* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHEGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian and B is also positive definite. +*> Eigenvalues and eigenvectors can be selected by specifying either a +*> range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, the Hermitian matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing C to tridiagonal form, where C is the symmetric +*> matrix of the standard symmetric problem to which the +*> generalized problem is transformed. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for CHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPOTRF or CHEEVX returned an error code: +*> <= N: if INFO = i, CHEEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEeigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHEEVX, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF (INFO.EQ.0) THEN + IF (LDZ.LT.1 .OR. (WANTZ .AND. LDZ.LT.N)) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHEGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL CPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL, + $ INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHEGVX +* + END diff --git a/dspl/liblapack/SRC/cherfs.f b/dspl/liblapack/SRC/cherfs.f new file mode 100644 index 0000000..1484482 --- /dev/null +++ b/dspl/liblapack/SRC/cherfs.f @@ -0,0 +1,446 @@ +*> \brief \b CHERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHERFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**H or +*> A = L*D*L**H as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CHETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHEMV, CHETRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CHERFS +* + END diff --git a/dspl/liblapack/SRC/cherfsx.f b/dspl/liblapack/SRC/cherfsx.f new file mode 100644 index 0000000..4ed2c99 --- /dev/null +++ b/dspl/liblapack/SRC/cherfsx.f @@ -0,0 +1,700 @@ +*> \brief \b CHERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHERFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHECON, CLA_HERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL SLAMCH, CLANHE, CLA_HERCOND_X, CLA_HERCOND_C + REAL SLAMCH, CLANHE, CLA_HERCOND_X, CLA_HERCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS(LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = CLANHE( NORM, UPLO, N, A, LDA, RWORK ) + CALL CHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + CALL CLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = CLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = CLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF (RCOND_TMP .LT. ILLRCOND_THRESH) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = CLA_HERCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ IPIV, X( 1, J ), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( .NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of CHERFSX +* + END diff --git a/dspl/liblapack/SRC/chesv.f b/dspl/liblapack/SRC/chesv.f new file mode 100644 index 0000000..261cbbe --- /dev/null +++ b/dspl/liblapack/SRC/chesv.f @@ -0,0 +1,271 @@ +*> \brief CHESV computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**H or A = L*D*L**H as computed by +*> CHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by CHETRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> CHETRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEsolve +* +* ===================================================================== + SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRF, CHETRS, CHETRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL CHETRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESV +* + END diff --git a/dspl/liblapack/SRC/chesv_aa.f b/dspl/liblapack/SRC/chesv_aa.f new file mode 100644 index 0000000..0bf636b --- /dev/null +++ b/dspl/liblapack/SRC/chesv_aa.f @@ -0,0 +1,252 @@ +*> \brief CHESV_AA computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESV_AA computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and tridiagonal. The factored form +*> of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**H or A = L*T*L**H as computed by +*> CHETRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best +*> performance LWORK >= MAX(1,N*NB), where NB is the optimal +*> blocksize for CHETRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEsolve +* +* ===================================================================== + SUBROUTINE CHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_HETRF, LWKOPT_HETRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRF_AA, CHETRS_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_HETRF = INT( WORK(1) ) + CALL CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_HETRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESV_AA +* + END diff --git a/dspl/liblapack/SRC/chesv_aa_2stage.f b/dspl/liblapack/SRC/chesv_aa_2stage.f new file mode 100644 index 0000000..057d9c5 --- /dev/null +++ b/dspl/liblapack/SRC/chesv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief CHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHETRF_AA_2STAGE, CHETRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of CHESV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chesv_rk.f b/dspl/liblapack/SRC/chesv_rk.f new file mode 100644 index 0000000..f1ff523 --- /dev/null +++ b/dspl/liblapack/SRC/chesv_rk.f @@ -0,0 +1,316 @@ +*> \brief CHESV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHESV_RK computes the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N Hermitian matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CHETRF_RK is called to compute the factorization of a complex +*> Hermitian matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine CHETRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by CHETRF_RK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of CHETRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine CHETRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of CHETRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by CHETRF_RK. +*> +*> For more info see the description of CHETRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for CHETRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRF_RK, CHETRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL CHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL CHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESV_RK +* + END diff --git a/dspl/liblapack/SRC/chesv_rook.f b/dspl/liblapack/SRC/chesv_rook.f new file mode 100644 index 0000000..76a6f5e --- /dev/null +++ b/dspl/liblapack/SRC/chesv_rook.f @@ -0,0 +1,295 @@ +*> \brief \b CHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices using the bounded Bunch-Kaufman ("rook") diagonal pivoting method +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESV_ROOK computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used +*> to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CHETRF_ROOK is called to compute the factorization of a complex +*> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling CHETRS_ROOK (uses BLAS 2). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**H or A = L*D*L**H as computed by +*> CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> CHETRF_ROOK. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEsolve +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* +* ===================================================================== + SUBROUTINE CHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRF_ROOK, CHETRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CHETRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL CHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL CHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESV_ROOK +* + END diff --git a/dspl/liblapack/SRC/chesvx.f b/dspl/liblapack/SRC/chesvx.f new file mode 100644 index 0000000..b3b1e9c --- /dev/null +++ b/dspl/liblapack/SRC/chesvx.f @@ -0,0 +1,417 @@ +*> \brief CHESVX computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESVX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form +*> of A. A, AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by CHETRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by CHETRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,2*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,2*N,N*NB), where +*> NB is the optimal blocksize for CHETRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexHEsolve +* +* ===================================================================== + SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANHE, SLAMCH + EXTERNAL ILAENV, LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHECON, CHERFS, CHETRF, CHETRS, CLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 2*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHE( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHESVX +* + END diff --git a/dspl/liblapack/SRC/chesvxx.f b/dspl/liblapack/SRC/chesvxx.f new file mode 100644 index 0000000..3f4466d --- /dev/null +++ b/dspl/liblapack/SRC/chesvxx.f @@ -0,0 +1,700 @@ +*> \brief CHESVXX computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESVXX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, where +*> A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. CHESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> CHESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> CHESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what CHESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by SSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by CHETRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by CHETRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexHEsolve +* +* ===================================================================== + SUBROUTINE CHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) + REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, CLA_HERPVGRW + LOGICAL LSAME + REAL SLAMCH, CLA_HERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL CHEEQUB, CHETRF, CHETRS, CLACPY, + $ CLAQHE, XERBLA, CLASCL2, CHERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in CHERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until CHERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL CLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CHETRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF( N.GT.0 ) + $ RPVGRW = CLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF( N.GT.0 ) + $ RPVGRW = CLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, + $ RWORK ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL CLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of CHESVXX +* + END diff --git a/dspl/liblapack/SRC/cheswapr.f b/dspl/liblapack/SRC/cheswapr.f new file mode 100644 index 0000000..f251b92 --- /dev/null +++ b/dspl/liblapack/SRC/cheswapr.f @@ -0,0 +1,203 @@ +*> \brief \b CHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHESWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHESWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHESWAPR applies an elementary permutation on the rows and the columns of +*> a hermitian matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEauxiliary +* +* ===================================================================== + SUBROUTINE CHESWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL CSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 +* - swap A(I2,I1) and A(I1,I2) + + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=CONJG(A(I1+I,I2)) + A(I1+I,I2)=CONJG(TMP) + END DO +* + A(I1,I2)=CONJG(A(I1,I2)) + +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from 1 to I1-1 + CALL CSWAP ( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 +* - swap A(I2,I1) and A(I1,I2) + + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=CONJG(A(I2,I1+I)) + A(I2,I1+I)=CONJG(TMP) + END DO +* + A(I2,I1)=CONJG(A(I2,I1)) +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + + END SUBROUTINE CHESWAPR + diff --git a/dspl/liblapack/SRC/chetd2.f b/dspl/liblapack/SRC/chetd2.f new file mode 100644 index 0000000..b80bfec --- /dev/null +++ b/dspl/liblapack/SRC/chetd2.f @@ -0,0 +1,334 @@ +*> \brief \b CHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETD2 reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHEMV, CHER2, CLARFG, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = REAL( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL CHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL CAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL CHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = REAL( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = REAL( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL CHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL CHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = REAL( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of CHETD2 +* + END diff --git a/dspl/liblapack/SRC/chetf2.f b/dspl/liblapack/SRC/chetf2.f new file mode 100644 index 0000000..15585bd --- /dev/null +++ b/dspl/liblapack/SRC/chetf2.f @@ -0,0 +1,634 @@ +*> \brief \b CHETF2 computes the factorization of a complex Hermitian matrix, using the diagonal pivoting method (unblocked algorithm calling Level 2 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETF2 computes the factorization of a complex Hermitian matrix A +*> using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**H is the conjugate transpose of U, and D is +*> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.210 and l.392 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + INTEGER ICAMAX + REAL SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAPY2, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CHER, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + DO 20 J = KP + 1, KK - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 20 CONTINUE + A( KP, KK ) = CONJG( A( KP, KK ) ) + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = REAL( A( K, K ) ) + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = REAL( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H +* + R1 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H +* + IF( K.GT.2 ) THEN +* + D = SLAPY2( REAL( A( K-1, K ) ), + $ AIMAG( A( K-1, K ) ) ) + D22 = REAL( A( K-1, K-1 ) ) / D + D11 = REAL( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = A( K-1, K ) / D + D = TT / D +* + DO 40 J = K - 2, 1, -1 + WKM1 = D*( D11*A( J, K-1 )-CONJG( D12 )*A( J, K ) ) + WK = D*( D22*A( J, K )-D12*A( J, K-1 ) ) + DO 30 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) - + $ A( I, K-1 )*CONJG( WKM1 ) + 30 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 ) + 40 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 50 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + DO 60 J = KK + 1, KP - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 60 CONTINUE + A( KP, KK ) = CONJG( A( KP, KK ) ) + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = REAL( A( K, K ) ) + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = REAL( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H +* + R1 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = SLAPY2( REAL( A( K+1, K ) ), + $ AIMAG( A( K+1, K ) ) ) + D11 = REAL( A( K+1, K+1 ) ) / D + D22 = REAL( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = A( K+1, K ) / D + D = TT / D +* + DO 80 J = K + 2, N + WK = D*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = D*( D22*A( J, K+1 )-CONJG( D21 )*A( J, K ) ) + DO 70 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) - + $ A( I, K+1 )*CONJG( WKP1 ) + 70 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 ) + 80 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 50 +* + END IF +* + 90 CONTINUE + RETURN +* +* End of CHETF2 +* + END diff --git a/dspl/liblapack/SRC/chetf2_rk.f b/dspl/liblapack/SRC/chetf2_rk.f new file mode 100644 index 0000000..38a0ce3 --- /dev/null +++ b/dspl/liblapack/SRC/chetf2_rk.f @@ -0,0 +1,1039 @@ +*> \brief \b CHETF2_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHETF2_RK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ) +* .. +* +* ====================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE, UPPER + INTEGER I, II, IMAX, ITEMP, J, JMAX, K, KK, KP, KSTEP, + $ P + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, STEMP, + $ ROWMAX, TT, SFMIN + COMPLEX D12, D21, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. +* + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSSCAL, CHER, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the leading submatrix A(1:k,1:k) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.GT.1 ) + $ CALL CSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 14 J = P + 1, K - 1 + T = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( P, J ) ) + A( P, J ) = T + 14 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = CONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( K, K ) ) + A( K, K ) = REAL( A( P, P ) ) + A( P, P ) = R1 +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the leading submatrix A(1:k,1:k) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 15 J = KP + 1, KK - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 15 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = CONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) +* (5) Swap row elements + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = REAL( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( REAL( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = REAL( A( K, K ) ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* D = |A12| + D = SLAPY2( REAL( A( K-1, K ) ), + $ AIMAG( A( K-1, K ) ) ) + D11 = A( K, K ) / D + D22 = A( K-1, K-1 ) / D + D12 = A( K-1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WKM1 = TT*( D11*A( J, K-1 )-CONJG( D12 )* + $ A( J, K ) ) + WK = TT*( D22*A( J, K )-D12*A( J, K-1 ) ) +* +* Perform a rank-2 update of A(1:k-2,1:k-2) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*CONJG( WK ) - + $ ( A( I, K-1 ) / D )*CONJG( WKM1 ) + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D + A( J, K-1 ) = WKM1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = CMPLX( REAL( A( J, J ) ), ZERO ) +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = CZERO + A( K-1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 42 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the trailing submatrix A(k:n,k:n) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.LT.N ) + $ CALL CSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 44 J = K + 1, P - 1 + T = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( P, J ) ) + A( P, J ) = T + 44 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = CONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( K, K ) ) + A( K, K ) = REAL( A( P, P ) ) + A( P, P ) = R1 +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL CSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the trailing submatrix A(k:n,k:n) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 45 J = KK + 1, KP - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 45 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = CONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) +* (5) Swap row elements + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = REAL( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of A now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* +* Handle division by a small number +* + IF( ABS( REAL( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL CSSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = REAL( A( K, K ) ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* D = |A21| + D = SLAPY2( REAL( A( K+1, K ) ), + $ AIMAG( A( K+1, K ) ) ) + D11 = REAL( A( K+1, K+1 ) ) / D + D22 = REAL( A( K, K ) ) / D + D21 = A( K+1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = TT*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = TT*( D22*A( J, K+1 )-CONJG( D21 )* + $ A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*CONJG( WK ) - + $ ( A( I, K+1 ) / D )*CONJG( WKP1 ) + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D + A( J, K+1 ) = WKP1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = CMPLX( REAL( A( J, J ) ), ZERO ) +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = CZERO + A( K+1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of CHETF2_RK +* + END diff --git a/dspl/liblapack/SRC/chetf2_rook.f b/dspl/liblapack/SRC/chetf2_rook.f new file mode 100644 index 0000000..ccd6a7f --- /dev/null +++ b/dspl/liblapack/SRC/chetf2_rook.f @@ -0,0 +1,910 @@ +*> \brief \b CHETF2_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETF2_ROOK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**H is the conjugate transpose of U, and D is +*> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ====================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE, UPPER + INTEGER I, II, IMAX, ITEMP, J, JMAX, K, KK, KP, KSTEP, + $ P + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, STEMP, + $ ROWMAX, TT, SFMIN + COMPLEX D12, D21, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. +* + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSSCAL, CHER, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the leading submatrix A(1:k,1:k) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.GT.1 ) + $ CALL CSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 14 J = P + 1, K - 1 + T = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( P, J ) ) + A( P, J ) = T + 14 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = CONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( K, K ) ) + A( K, K ) = REAL( A( P, P ) ) + A( P, P ) = R1 + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the leading submatrix A(1:k,1:k) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 15 J = KP + 1, KK - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 15 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = CONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) +* (5) Swap row elements + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = REAL( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( REAL( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = REAL( A( K, K ) ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* D = |A12| + D = SLAPY2( REAL( A( K-1, K ) ), + $ AIMAG( A( K-1, K ) ) ) + D11 = A( K, K ) / D + D22 = A( K-1, K-1 ) / D + D12 = A( K-1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WKM1 = TT*( D11*A( J, K-1 )-CONJG( D12 )* + $ A( J, K ) ) + WK = TT*( D22*A( J, K )-D12*A( J, K-1 ) ) +* +* Perform a rank-2 update of A(1:k-2,1:k-2) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*CONJG( WK ) - + $ ( A( I, K-1 ) / D )*CONJG( WKM1 ) + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D + A( J, K-1 ) = WKM1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = CMPLX( REAL( A( J, J ) ), ZERO ) +* + 30 CONTINUE +* + END IF +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 42 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the trailing submatrix A(k:n,k:n) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.LT.N ) + $ CALL CSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 44 J = K + 1, P - 1 + T = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( P, J ) ) + A( P, J ) = T + 44 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = CONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( K, K ) ) + A( K, K ) = REAL( A( P, P ) ) + A( P, P ) = R1 + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the trailing submatrix A(k:n,k:n) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 45 J = KK + 1, KP - 1 + T = CONJG( A( J, KK ) ) + A( J, KK ) = CONJG( A( KP, J ) ) + A( KP, J ) = T + 45 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = CONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = REAL( A( KK, KK ) ) + A( KK, KK ) = REAL( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) +* (5) Swap row elements + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = REAL( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = REAL( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of A now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* +* Handle division by a small number +* + IF( ABS( REAL( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / REAL( A( K, K ) ) + CALL CHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL CSSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = REAL( A( K, K ) ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* D = |A21| + D = SLAPY2( REAL( A( K+1, K ) ), + $ AIMAG( A( K+1, K ) ) ) + D11 = REAL( A( K+1, K+1 ) ) / D + D22 = REAL( A( K, K ) ) / D + D21 = A( K+1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = TT*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = TT*( D22*A( J, K+1 )-CONJG( D21 )* + $ A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*CONJG( WK ) - + $ ( A( I, K+1 ) / D )*CONJG( WKP1 ) + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D + A( J, K+1 ) = WKP1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = CMPLX( REAL( A( J, J ) ), ZERO ) +* + 60 CONTINUE +* + END IF +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of CHETF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/chetrd.f b/dspl/liblapack/SRC/chetrd.f new file mode 100644 index 0000000..22ea351 --- /dev/null +++ b/dspl/liblapack/SRC/chetrd.f @@ -0,0 +1,378 @@ +*> \brief \b CHETRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CHER2K, CHETD2, CLATRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'CHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'CHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**H - W*V**H +* + CALL CHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL CLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W**H - W*V**H +* + CALL CHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL CHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRD +* + END diff --git a/dspl/liblapack/SRC/chetrd_2stage.f b/dspl/liblapack/SRC/chetrd_2stage.f new file mode 100644 index 0000000..e7370a4 --- /dev/null +++ b/dspl/liblapack/SRC/chetrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b CHETRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> c, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q1**H Q2**H* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the unitary +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the unitary matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is COMPLEX array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHETRD_HE2HB, CHETRD_HB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV2STAGE( 1, 'CHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'CHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'CHETRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'CHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL CHETRD_HE2HB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HE2HB', -INFO ) + RETURN + END IF + CALL CHETRD_HB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chetrd_hb2st.F b/dspl/liblapack/SRC/chetrd_hb2st.F new file mode 100644 index 0000000..b96e5db --- /dev/null +++ b/dspl/liblapack/SRC/chetrd_hb2st.F @@ -0,0 +1,587 @@ +*> \brief \b CHBTRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHBTRD_HB2ST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the chetrd_he2hb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the chetrd_he2hb +*> routine has been called to produce AB (e.g., AB is +*> the output of chetrd_he2hb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is COMPLEX array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RZERO + COMPLEX ZERO, ONE + PARAMETER ( RZERO = 0.0E+0, + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SICEV, SIZETAU, LDV, LHMIN, LWMIN + REAL ABSTMP + COMPLEX TMP +* .. +* .. External Subroutines .. + EXTERNAL CHB2ST_KERNELS, CLACPY, CLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'CHETRD_HB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SICEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* complex because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = REAL( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = REAL( AB( ABDPOS, I ) ) + 50 CONTINUE +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL CSCAL( N, CONJG( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL CSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Main code start here. +* Reduce the hermitian band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL CLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL CLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) EXIT + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + EXIT + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = REAL( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = REAL( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = REAL( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_HB2ST +* + END + diff --git a/dspl/liblapack/SRC/chetrd_he2hb.f b/dspl/liblapack/SRC/chetrd_he2hb.f new file mode 100644 index 0000000..fd8c3fb --- /dev/null +++ b/dspl/liblapack/SRC/chetrd_he2hb.f @@ -0,0 +1,517 @@ +*> \brief \b CHETRD_HE2HB +* +* @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec 7 08:22:40 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRD_HE2HB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian +*> band-diagonal form AB by a unitary similarity transformation: +*> Q**H * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +*> A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RONE + COMPLEX ZERO, ONE, HALF + PARAMETER ( RONE = 1.0E+0, + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, CCOPY, + $ CLARFT, CGELQF, CGEQRF, CLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'CHETRD_HE2HB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRD_HE2HB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL CCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL CGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL CLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL CLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL CHEMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL CHER2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL CGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL CLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL CLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL CHEMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL CHER2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of CHETRD_HE2HB +* + END diff --git a/dspl/liblapack/SRC/chetrf.f b/dspl/liblapack/SRC/chetrf.f new file mode 100644 index 0000000..aa8f7f2 --- /dev/null +++ b/dspl/liblapack/SRC/chetrf.f @@ -0,0 +1,357 @@ +*> \brief \b CHETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRF computes the factorization of a complex Hermitian matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CHETF2, CLAHEF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CHETRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLAHEF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CHETF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLAHEF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRF +* + END diff --git a/dspl/liblapack/SRC/chetrf_aa.f b/dspl/liblapack/SRC/chetrf_aa.f new file mode 100644 index 0000000..2c55648 --- /dev/null +++ b/dspl/liblapack/SRC/chetrf_aa.f @@ -0,0 +1,469 @@ +*> \brief \b CHETRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRF_AA computes the factorization of a complex hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**H or A = L*T*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 2*N. For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLAHEF_AA, CGEMM, CCOPY, CSWAP, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, CONJG, MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'CHETRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + A( 1, 1 ) = REAL( A( 1, 1 ) ) + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**H using the upper triangle of A +* ..................................................... +* +* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLAHEF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* if the first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = CONJG( A( J, J+1 ) ) + A( J, J+1 ) = ONE + CALL CCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=0 and K2=1 for the first panel, +* and K1=1 and K2=0 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMM( 'Conjugate transpose', 'Transpose', + $ 1, MJ, JB+1, + $ -ONE, A( J1-K2, J3 ), LDA, + $ WORK( (J3-J1+1)+K1*N ), N, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with CGEMM +* + CALL CGEMM( 'Conjugate transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( (J3-J1+1)+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = CONJG( ALPHA ) + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**H using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLAHEF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if the first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = CONJG( A( J+1, J ) ) + A( J+1, J ) = ONE + CALL CCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=0 and K2=1 for the first panel, +* and K1=1 and K2=0 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ MJ, 1, JB+1, + $ -ONE, WORK( (J3-J1+1)+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block column with CGEMM +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( (J3-J1+1)+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = CONJG( ALPHA ) + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of CHETRF_AA +* + END diff --git a/dspl/liblapack/SRC/chetrf_aa_2stage.f b/dspl/liblapack/SRC/chetrf_aa_2stage.f new file mode 100644 index 0000000..0fa2ae3 --- /dev/null +++ b/dspl/liblapack/SRC/chetrf_aa_2stage.f @@ -0,0 +1,664 @@ +*> \brief \b CHETRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRF_AA_2STAGE computes the factorization of a real hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV + +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CCOPY, CLACGV, CLACPY, + $ CLASET, CGBTRF, CGEMM, CGETRF, + $ CHEGST, CSWAP, CTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'CHETRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL CHEGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = CONJG( TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'Conjugate transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call CGETRF +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB +* +* Copy only L-factor +* + CALL CCOPY( N-K-(J+1)*NB, + $ WORK( K+1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+K+1 ), LDA ) +* +* Transpose U-factor to be copied back into T(J+1, J) +* + CALL CLACGV( K, WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = CONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL CLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) + CALL CLACGV( I2-I1, A( I1, I1+1 ), LDA ) + CALL CLACGV( I2-I1-1, A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL CHEGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = CONJG( TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'L', 'C', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = CONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL CLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) + CALL CLACGV( I2-I1, A( I1+1, I1 ), 1 ) + CALL CLACGV( I2-I1-1, A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL CLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of CHETRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chetrf_rk.f b/dspl/liblapack/SRC/chetrf_rk.f new file mode 100644 index 0000000..9e42b57 --- /dev/null +++ b/dspl/liblapack/SRC/chetrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b CHETRF_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHETRF_RK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLAHEF_RK, CHETF2_RK, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CHETRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CHETRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLAHEF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLAHEF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CHETF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL CSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLAHEF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLAHEF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CHETF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL CSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRF_RK +* + END diff --git a/dspl/liblapack/SRC/chetrf_rook.f b/dspl/liblapack/SRC/chetrf_rook.f new file mode 100644 index 0000000..0217150 --- /dev/null +++ b/dspl/liblapack/SRC/chetrf_rook.f @@ -0,0 +1,397 @@ +*> \brief \b CHETRF_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRF_ROOK computes the factorization of a comlex Hermitian matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLAHEF_ROOK, CHETF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CHETRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CHETRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLAHEF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLAHEF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CHETF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLAHEF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLAHEF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CHETF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CHETRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/chetri.f b/dspl/liblapack/SRC/chetri.f new file mode 100644 index 0000000..a5762b2 --- /dev/null +++ b/dspl/liblapack/SRC/chetri.f @@ -0,0 +1,397 @@ +*> \brief \b CHETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRI computes the inverse of a complex Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> CHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CHETRF. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, ZERO + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + REAL AK, AKP1, D, T + COMPLEX AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHEMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = REAL( A( K, K ) ) / T + AKP1 = REAL( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ REAL( CDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + DO 40 J = KP + 1, K - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE + A( KP, K ) = CONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = REAL( A( K-1, K-1 ) ) / T + AKP1 = REAL( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ REAL( CDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + DO 70 J = K + 1, KP - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 70 CONTINUE + A( KP, K ) = CONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CHETRI +* + END diff --git a/dspl/liblapack/SRC/chetri2.f b/dspl/liblapack/SRC/chetri2.f new file mode 100644 index 0000000..722d130 --- /dev/null +++ b/dspl/liblapack/SRC/chetri2.f @@ -0,0 +1,205 @@ +*> \brief \b CHETRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRI2 computes the inverse of a COMPLEX hermitian indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> CHETRF. CHETRI2 set the LEADING DIMENSION of the workspace +*> before calling CHETRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CHETRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CHETRI2X, CHETRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL CHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL CHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of CHETRI2 +* + END diff --git a/dspl/liblapack/SRC/chetri2x.f b/dspl/liblapack/SRC/chetri2x.f new file mode 100644 index 0000000..2ac18b5 --- /dev/null +++ b/dspl/liblapack/SRC/chetri2x.f @@ -0,0 +1,590 @@ +*> \brief \b CHETRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRI2X computes the inverse of a complex Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> CHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CHETRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, ZERO + PARAMETER ( ONE = 1.0E+0, + $ CONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + COMPLEX AK, AKKP1, AKP1, D, T + COMPLEX U01_I_J, U01_IP1_J + COMPLEX U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSYCONV, XERBLA, CTRTRI + EXTERNAL CGEMM, CTRMM, CHESWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL CSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**H)*inv(D)*inv(U)*P**H. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / REAL ( A( K, K ) ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = ABS ( WORK(K+1,1) ) + AK = REAL ( A( K, K ) ) / T + AKP1 = REAL ( A( K+1, K+1 ) ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = CONJG (WORK(K,INVD+1) ) + K=K+2 + END IF + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=CONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**H*invD1*U11->U11 +* + CALL CTRMM('L','U','C','U',NNB, NNB, + $ CONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**H*invD*U01->A(CUT+I,CUT+J) +* + CALL CGEMM('C','N',NNB,NNB,CUT,CONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) +* +* U11 = U11**H*invD1*U11 + U01**H*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**H*invD0*U01 +* + CALL CTRMM('L',UPLO,'C','U',CUT, NNB, + $ CONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**H: P * inv(U**H)*inv(D)*inv(U) *P**H +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL CHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CHESWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL CHESWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL CHESWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**H)*inv(D)*inv(U)*P**H. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / REAL ( A( K, K ) ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = ABS ( WORK(K-1,1) ) + AK = REAL ( A( K-1, K-1 ) ) / T + AKP1 = REAL ( A( K, K ) ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = CONJG (WORK(K,INVD+1) ) + K=K-2 + END IF + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GE. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=CONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**H*invD1*L11->L11 +* + CALL CTRMM('L',UPLO,'C','U',NNB, NNB, + $ CONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**H*invD2*L21->A(CUT+I,CUT+J) +* + CALL CGEMM('C','N',NNB,NNB,N-NNB-CUT,CONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**H*invD1*L11 + U01**H*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**H*invD2*L21 +* + CALL CTRMM('L',UPLO,'C','U', N-NNB-CUT, NNB, + $ CONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) + +* Update L21 + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + ELSE +* +* L11 = L11**H*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**H: P * inv(U**H)*inv(D)*inv(U) *P**H +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL CHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CHESWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL CHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL CHESWAPR( UPLO, N, A, LDA, IP ,I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of CHETRI2X +* + END + diff --git a/dspl/liblapack/SRC/chetri_3.f b/dspl/liblapack/SRC/chetri_3.f new file mode 100644 index 0000000..0b31f53 --- /dev/null +++ b/dspl/liblapack/SRC/chetri_3.f @@ -0,0 +1,248 @@ +*> \brief \b CHETRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHETRI_3 computes the inverse of a complex Hermitian indefinite +*> matrix A using the factorization computed by CHETRF_RK or CHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CHETRI_3 sets the leading dimension of the workspace before calling +*> CHETRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by CHETRF_RK and CHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the Hermitian inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_RK or CHETRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CHETRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'CHETRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CHETRI_3 +* + END diff --git a/dspl/liblapack/SRC/chetri_3x.f b/dspl/liblapack/SRC/chetri_3x.f new file mode 100644 index 0000000..d4cddc1 --- /dev/null +++ b/dspl/liblapack/SRC/chetri_3x.f @@ -0,0 +1,649 @@ +*> \brief \b CHETRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHETRI_3X computes the inverse of a complex Hermitian indefinite +*> matrix A using the factorization computed by CHETRF_RK or CHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by CHETRF_RK and CHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the Hermitian inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_RK or CHETRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + REAL AK, AKP1, T + COMPLEX AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J, + $ U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHESWAPR, CTRTRI, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**H) * inv(D) * inv(U) * P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / REAL( A( K, K ) ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = ABS( WORK( K+1, 1 ) ) + AK = REAL( A( K, K ) ) / T + AKP1 = REAL( A( K+1, K+1 ) ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = CONJG( WORK( K, INVD+1 ) ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = CONE + DO J = 1, I-1 + WORK( U11+I, J ) = CZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**H * invD1 * U11 -> U11 +* + CALL CTRMM( 'L', 'U', 'C', 'U', NNB, NNB, + $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**H * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL CGEMM( 'C', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1), + $ N+NB+1 ) + +* +* U11 = U11**H * invD1 * U11 + U01**H * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**H * invD0 * U01 +* + CALL CTRMM( 'L', UPLO, 'C', 'U', CUT, NNB, + $ CONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**H) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL CHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CHESWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**H) * inv(D) * inv(L) * P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / REAL( A( K, K ) ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = ABS( WORK( K-1, 1 ) ) + AK = REAL( A( K-1, K-1 ) ) / T + AKP1 = REAL( A( K, K ) ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = CONJG( WORK( K, INVD+1 ) ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**H) = (inv(L))**H +* +* inv(L**H) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = CONE + DO J = I+1, NNB + WORK( U11+I, J ) = CZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**H * invD1 * L11 -> L11 +* + CALL CTRMM( 'L', UPLO, 'C', 'U', NNB, NNB, CONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**H * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL CGEMM( 'C', 'N', NNB, NNB, N-NNB-CUT, CONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ CZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**H * invD1 * L11 + U01**H * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**H * invD2 * L21 +* + CALL CTRMM( 'L', UPLO, 'C', 'U', N-NNB-CUT, NNB, CONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**H * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**H) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL CHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CHESWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of CHETRI_3X +* + END diff --git a/dspl/liblapack/SRC/chetri_rook.f b/dspl/liblapack/SRC/chetri_rook.f new file mode 100644 index 0000000..00d99e2 --- /dev/null +++ b/dspl/liblapack/SRC/chetri_rook.f @@ -0,0 +1,516 @@ +*> \brief \b CHETRI_ROOK computes the inverse of HE matrix using the factorization obtained with the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRI_ROOK computes the inverse of a complex Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> CHETRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CHETRF_ROOK. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, CZERO + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + REAL AK, AKP1, D, T + COMPLEX AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHEMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 70 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = REAL( A( K, K ) ) / T + AKP1 = REAL( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ REAL( CDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k,1:k) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 40 J = KP + 1, K - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1) in the leading submatrix A(k+1:n,k+1:n) +* +* (1) Interchange rows and columns K and -IPIV(K) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 50 J = KP + 1, K - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 50 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP +* + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* +* (2) Interchange rows and columns K+1 and -IPIV(K+1) +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 60 J = KP + 1, K - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 60 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 70 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 80 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 120 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / REAL( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = REAL( A( K-1, K-1 ) ) / T + AKP1 = REAL( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ REAL( CDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k:n,k:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 90 J = K + 1, KP - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 90 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* +* (1) Interchange rows and columns K and -IPIV(K) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 100 J = K + 1, KP - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 100 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP +* + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* +* (2) Interchange rows and columns K-1 and -IPIV(K-1) +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 110 J = K + 1, KP - 1 + TEMP = CONJG( A( J, K ) ) + A( J, K ) = CONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 110 CONTINUE +* + A( KP, K ) = CONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 80 + 120 CONTINUE + END IF +* + RETURN +* +* End of CHETRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/chetrs.f b/dspl/liblapack/SRC/chetrs.f new file mode 100644 index 0000000..a786462 --- /dev/null +++ b/dspl/liblapack/SRC/chetrs.f @@ -0,0 +1,469 @@ +*> \brief \b CHETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / CONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / CONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / CONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CHETRS +* + END diff --git a/dspl/liblapack/SRC/chetrs2.f b/dspl/liblapack/SRC/chetrs2.f new file mode 100644 index 0000000..7041568 --- /dev/null +++ b/dspl/liblapack/SRC/chetrs2.f @@ -0,0 +1,359 @@ +*> \brief \b CHETRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS2 solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF and converted by CSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0E+0,0.0E+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSYCONV, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL CSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + S = REAL( ONE ) / REAL( A( I, I ) ) + CALL CSSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / CONJG( AKM1K ) + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**H \ B) -> B [ U**H \ (D \ (U \P**T * B) ) ] +* + CALL CTRSM('L','U','C','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**H \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + S = REAL( ONE ) / REAL( A( I, I ) ) + CALL CSSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / CONJG( AKM1K ) + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / CONJG( AKM1K ) + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**H \ B) -> B [ L**H \ (D \ (L \P**T * B) ) ] +* + CALL CTRSM('L','L','C','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**H \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL CSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of CHETRS2 +* + END diff --git a/dspl/liblapack/SRC/chetrs_3.f b/dspl/liblapack/SRC/chetrs_3.f new file mode 100644 index 0000000..c7c77b9 --- /dev/null +++ b/dspl/liblapack/SRC/chetrs_3.f @@ -0,0 +1,374 @@ +*> \brief \b CHETRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CHETRS_3 solves a system of linear equations A * X = B with a complex +*> Hermitian matrix A using the factorization computed +*> by CHETRF_RK or CHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by CHETRF_RK and CHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_RK or CHETRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0,0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**H. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + S = REAL( ONE ) / REAL( A( I, I ) ) + CALL CSSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / CONJG( AKM1K ) + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**H \ B) -> B [ U**H \ (D \ (U \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'C', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**H \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**H. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + S = REAL( ONE ) / REAL( A( I, I ) ) + CALL CSSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / CONJG( AKM1K ) + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / CONJG( AKM1K ) + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**H \ B) -> B [ L**H \ (D \ (L \P**T * B) ) ] +* + CALL CTRSM('L', 'L', 'C', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**H \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of CHETRS_3 +* + END diff --git a/dspl/liblapack/SRC/chetrs_aa.f b/dspl/liblapack/SRC/chetrs_aa.f new file mode 100644 index 0000000..50e5692 --- /dev/null +++ b/dspl/liblapack/SRC/chetrs_aa.f @@ -0,0 +1,295 @@ +*> \brief \b CHETRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS_AA solves a system of linear equations A*X = B with a complex +*> hermitian matrix A using the factorization A = U*T*U**H or +*> A = L*T*L**H computed by CHETRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**H; +*> = 'L': Lower triangular, form is A = L*T*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Details of factors computed by CHETRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by CHETRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLACGV, CGTSV, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* P**T * B +* + K = 1 + DO WHILE ( K.LE.N ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM('L', 'U', 'C', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1) + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1) + CALL CLACGV( N-1, WORK( 1 ), 1 ) + END IF + CALL CGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B(2, 1), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + K = N + DO WHILE ( K.GE.1 ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + K = 1 + DO WHILE ( K.LE.N ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1), LDA, + $ B(2, 1), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1) + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1) + CALL CLACGV( N-1, WORK( 2*N ), 1 ) + END IF + CALL CGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'C', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + K = N + DO WHILE ( K.GE.1 ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + END DO +* + END IF +* + RETURN +* +* End of CHETRS_AA +* + END diff --git a/dspl/liblapack/SRC/chetrs_aa_2stage.f b/dspl/liblapack/SRC/chetrs_aa_2stage.f new file mode 100644 index 0000000..3f85766 --- /dev/null +++ b/dspl/liblapack/SRC/chetrs_aa_2stage.f @@ -0,0 +1,283 @@ +*> \brief \b CHETRS_AA_2STAGE +* +* @generated from SRC/dsytrs_aa_2stage.f, fortran d -> c, Mon Oct 30 11:59:02 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> hermitian matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CHETRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Details of factors computed by CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> Details of factors computed by CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGBTRS, CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'C', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'C', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of CHETRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/chetrs_rook.f b/dspl/liblapack/SRC/chetrs_rook.f new file mode 100644 index 0000000..2b0cc6d --- /dev/null +++ b/dspl/liblapack/SRC/chetrs_rook.f @@ -0,0 +1,503 @@ +*> \brief \b CHETRS_ROOK computes the solution to a system of linear equations A * X = B for HE matrices using factorization obtained with one of the bounded diagonal pivoting methods (max 2 interchanges) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHETRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHETRS_ROOK solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHETRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K), then K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / CONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K), then K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( A( K, K ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K), then K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / CONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / CONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K), then K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CHETRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/chfrk.f b/dspl/liblapack/SRC/chfrk.f new file mode 100644 index 0000000..3378e6f --- /dev/null +++ b/dspl/liblapack/SRC/chfrk.f @@ -0,0 +1,553 @@ +*> \brief \b CHFRK performs a Hermitian rank-k operation for matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHFRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, +* C ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER K, LDA, N +* CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for C in RFP Format. +*> +*> CHFRK performs one of the Hermitian rank--k operations +*> +*> C := alpha*A*A**H + beta*C, +*> +*> or +*> +*> C := alpha*A**H*A + beta*C, +*> +*> where alpha and beta are real scalars, C is an n--by--n Hermitian +*> matrix and A is an n--by--k matrix in the first case and a k--by--n +*> matrix in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'C': The Conjugate-transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*A**H + beta*C. +*> +*> TRANS = 'C' or 'c' C := alpha*A**H*A + beta*C. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrix A, and on entry with +*> TRANS = 'C' or 'c', K specifies the number of rows of the +*> matrix A. K must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,ka) +*> where KA +*> is K when TRANS = 'N' or 'n', and is N otherwise. Before +*> entry with TRANS = 'N' or 'n', the leading N--by--K part of +*> the array A must contain the matrix A, otherwise the leading +*> K--by--N part of the array A must contain the matrix A. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the matrix A in RFP Format. RFP Format is +*> described by TRANSR, UPLO and N. Note that the imaginary +*> parts of the diagonal elements need not be set, they are +*> assumed to be zero, and on exit they are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, + $ C ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER K, LDA, N + CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Parameters .. + REAL ONE, ZERO + COMPLEX CZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR, NISODD, NOTRANS + INTEGER INFO, NROWA, J, NK, N1, N2 + COMPLEX CALPHA, CBETA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, CMPLX +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) +* + IF( NOTRANS ) THEN + NROWA = N + ELSE + NROWA = K + END IF +* + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHFRK ', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* +* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not +* done (it is in CHERK for example) and left in the general case. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* + IF( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ZERO ) ) THEN + DO J = 1, ( ( N*( N+1 ) ) / 2 ) + C( J ) = CZERO + END DO + RETURN + END IF +* + CALPHA = CMPLX( ALPHA, ZERO ) + CBETA = CMPLX( BETA, ZERO ) +* +* C is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and NK. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + NK = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL CHERK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL CHERK( 'U', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL CGEMM( 'N', 'C', N2, N1, K, CALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( N1+1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' +* + CALL CHERK( 'L', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL CHERK( 'U', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL CGEMM( 'C', 'N', N2, N1, K, CALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( N1+1 ), N ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL CHERK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL CHERK( 'U', 'N', N2, K, ALPHA, A( N2, 1 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL CGEMM( 'N', 'C', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( N2, 1 ), LDA, CBETA, C( 1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' +* + CALL CHERK( 'L', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL CHERK( 'U', 'C', N2, K, ALPHA, A( 1, N2 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL CGEMM( 'C', 'N', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, N2 ), LDA, CBETA, C( 1 ), N ) +* + END IF +* + END IF +* + ELSE +* +* N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' +* + CALL CHERK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL CHERK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL CGEMM( 'N', 'C', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( N1+1, 1 ), LDA, CBETA, + $ C( N1*N1+1 ), N1 ) +* + ELSE +* +* N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' +* + CALL CHERK( 'U', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL CHERK( 'L', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL CGEMM( 'C', 'N', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, N1+1 ), LDA, CBETA, + $ C( N1*N1+1 ), N1 ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' +* + CALL CHERK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL CHERK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL CGEMM( 'N', 'C', N2, N1, K, CALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), N2 ) +* + ELSE +* +* N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' +* + CALL CHERK( 'U', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL CHERK( 'L', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL CGEMM( 'C', 'N', N2, N1, K, CALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), N2 ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL CHERK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL CHERK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL CGEMM( 'N', 'C', NK, NK, K, CALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( NK+2 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' +* + CALL CHERK( 'L', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL CHERK( 'U', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL CGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( NK+2 ), + $ N+1 ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL CHERK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL CHERK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL CGEMM( 'N', 'C', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, CBETA, C( 1 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' +* + CALL CHERK( 'L', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL CHERK( 'U', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL CGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, CBETA, C( 1 ), + $ N+1 ) +* + END IF +* + END IF +* + ELSE +* +* N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' +* + CALL CHERK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL CHERK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL CGEMM( 'N', 'C', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, CBETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' +* + CALL CHERK( 'U', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL CHERK( 'L', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL CGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, CBETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' +* + CALL CHERK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL CHERK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL CGEMM( 'N', 'C', NK, NK, K, CALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' +* + CALL CHERK( 'U', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL CHERK( 'L', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL CGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), NK ) +* + END IF +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CHFRK +* + END diff --git a/dspl/liblapack/SRC/chgeqz.f b/dspl/liblapack/SRC/chgeqz.f new file mode 100644 index 0000000..73d3562 --- /dev/null +++ b/dspl/liblapack/SRC/chgeqz.f @@ -0,0 +1,875 @@ +*> \brief \b CHGEQZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHGEQZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, +* ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ, JOB +* INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX ALPHA( * ), BETA( * ), H( LDH, * ), +* $ Q( LDQ, * ), T( LDT, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHGEQZ computes the eigenvalues of a complex matrix pair (H,T), +*> where H is an upper Hessenberg matrix and T is upper triangular, +*> using the single-shift QZ method. +*> Matrix pairs of this type are produced by the reduction to +*> generalized upper Hessenberg form of a complex matrix pair (A,B): +*> +*> A = Q1*H*Z1**H, B = Q1*T*Z1**H, +*> +*> as computed by CGGHRD. +*> +*> If JOB='S', then the Hessenberg-triangular pair (H,T) is +*> also reduced to generalized Schur form, +*> +*> H = Q*S*Z**H, T = Q*P*Z**H, +*> +*> where Q and Z are unitary matrices and S and P are upper triangular. +*> +*> Optionally, the unitary matrix Q from the generalized Schur +*> factorization may be postmultiplied into an input matrix Q1, and the +*> unitary matrix Z may be postmultiplied into an input matrix Z1. +*> If Q1 and Z1 are the unitary matrices from CGGHRD that reduced +*> the matrix pair (A,B) to generalized Hessenberg form, then the output +*> matrices Q1*Q and Z1*Z are the unitary factors from the generalized +*> Schur factorization of (A,B): +*> +*> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. +*> +*> To avoid overflow, eigenvalues of the matrix pair (H,T) +*> (equivalently, of (A,B)) are computed as a pair of complex values +*> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an +*> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) +*> A*x = lambda*B*x +*> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the +*> alternate form of the GNEP +*> mu*A*y = B*y. +*> The values of alpha and beta for the i-th eigenvalue can be read +*> directly from the generalized Schur form: alpha = S(i,i), +*> beta = P(i,i). +*> +*> Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +*> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +*> pp. 241--256. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': Compute eigenvalues only; +*> = 'S': Computer eigenvalues and the Schur form. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': Left Schur vectors (Q) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Q +*> of left Schur vectors of (H,T) is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry and +*> the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Right Schur vectors (Z) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Z +*> of right Schur vectors of (H,T) is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry and +*> the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices H, T, Q, and Z. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI mark the rows and columns of H which are in +*> Hessenberg form. It is assumed that A is already upper +*> triangular in rows and columns 1:ILO-1 and IHI+1:N. +*> If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH, N) +*> On entry, the N-by-N upper Hessenberg matrix H. +*> On exit, if JOB = 'S', H contains the upper triangular +*> matrix S from the generalized Schur factorization. +*> If JOB = 'E', the diagonal of H matches that of S, but +*> the rest of H is unspecified. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT, N) +*> On entry, the N-by-N upper triangular matrix T. +*> On exit, if JOB = 'S', T contains the upper triangular +*> matrix P from the generalized Schur factorization. +*> If JOB = 'E', the diagonal of T matches that of P, but +*> the rest of T is unspecified. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. ALPHA(i) = S(i,i) in the generalized Schur +*> factorization. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> The real non-negative scalars beta that define the +*> eigenvalues of GNEP. BETA(i) = P(i,i) in the generalized +*> Schur factorization. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1 used in the +*> reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPQ = 'I', the unitary matrix of left Schur +*> vectors of (H,T), and if COMPQ = 'V', the unitary matrix of +*> left Schur vectors of (A,B). +*> Not referenced if COMPQ = 'N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If COMPQ='V' or 'I', then LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1 used in the +*> reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPZ = 'I', the unitary matrix of right Schur +*> vectors of (H,T), and if COMPZ = 'V', the unitary matrix of +*> right Schur vectors of (A,B). +*> Not referenced if COMPZ = 'N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If COMPZ='V' or 'I', then LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1,...,N: the QZ iteration did not converge. (H,T) is not +*> in Schur form, but ALPHA(i) and BETA(i), +*> i=INFO+1,...,N should be correct. +*> = N+1,...,2*N: the shift calculation failed. (H,T) is not +*> in Schur form, but ALPHA(i) and BETA(i), +*> i=INFO-N+1,...,N should be correct. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We assume that complex ABS works as long as its value is less than +*> overflow. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX ALPHA( * ), BETA( * ), H( LDH, * ), + $ Q( LDQ, * ), T( LDT, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL HALF + PARAMETER ( HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + REAL ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL, + $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP + COMPLEX ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, + $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, + $ U12, X +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHS, SLAMCH + EXTERNAL LSAME, CLANHS, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CLASET, CROT, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDH.LT.N ) THEN + INFO = -8 + ELSE IF( LDT.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* +* WORK( 1 ) = CMPLX( 1 ) + IF( N.LE.0 ) THEN + WORK( 1 ) = CMPLX( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = SLAMCH( 'S' ) + ULP = SLAMCH( 'E' )*SLAMCH( 'B' ) + ANORM = CLANHS( 'F', IN, H( ILO, ILO ), LDH, RWORK ) + BNORM = CLANHS( 'F', IN, T( ILO, ILO ), LDT, RWORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* +* Set Eigenvalues IHI+1:N +* + DO 10 J = IHI + 1, N + ABSB = ABS( T( J, J ) ) + IF( ABSB.GT.SAFMIN ) THEN + SIGNBC = CONJG( T( J, J ) / ABSB ) + T( J, J ) = ABSB + IF( ILSCHR ) THEN + CALL CSCAL( J-1, SIGNBC, T( 1, J ), 1 ) + CALL CSCAL( J, SIGNBC, H( 1, J ), 1 ) + ELSE + CALL CSCAL( 1, SIGNBC, H( J, J ), 1 ) + END IF + IF( ILZ ) + $ CALL CSCAL( N, SIGNBC, Z( 1, J ), 1 ) + ELSE + T( J, J ) = CZERO + END IF + ALPHA( J ) = H( J, J ) + BETA( J ) = T( J, J ) + 10 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 190 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever +* Row operations modify columns whatever:ILASTM +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = CZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 170 JITER = 1, MAXIT +* +* Check for too many iterations. +* + IF( JITER.GT.MAXIT ) + $ GO TO 180 +* +* Split the matrix if possible. +* +* Two tests: +* 1: H(j,j-1)=0 or j=ILO +* 2: T(j,j)=0 +* +* Special case: j=ILAST +* + IF( ILAST.EQ.ILO ) THEN + GO TO 60 + ELSE + IF( ABS1( H( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + H( ILAST, ILAST-1 ) = CZERO + GO TO 60 + END IF + END IF +* + IF( ABS( T( ILAST, ILAST ) ).LE.BTOL ) THEN + T( ILAST, ILAST ) = CZERO + GO TO 50 + END IF +* +* General case: j \brief \b CHLA_TRANSTYPE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHLA_TRANSTYPE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* CHARACTER*1 FUNCTION CHLA_TRANSTYPE( TRANS ) +* +* .. Scalar Arguments .. +* INTEGER TRANS +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine translates from a BLAST-specified integer constant to +*> the character string specifying a transposition operation. +*> +*> CHLA_TRANSTYPE returns an CHARACTER*1. If CHLA_TRANSTYPE is 'X', +*> then input is not an integer indicating a transposition operator. +*> Otherwise CHLA_TRANSTYPE returns the constant value corresponding to +*> TRANS. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + CHARACTER*1 FUNCTION CHLA_TRANSTYPE( TRANS ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER TRANS +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLAS_NO_TRANS, BLAS_TRANS, BLAS_CONJ_TRANS + PARAMETER ( BLAS_NO_TRANS = 111, BLAS_TRANS = 112, + $ BLAS_CONJ_TRANS = 113 ) +* .. +* .. Executable Statements .. + IF( TRANS.EQ.BLAS_NO_TRANS ) THEN + CHLA_TRANSTYPE = 'N' + ELSE IF( TRANS.EQ.BLAS_TRANS ) THEN + CHLA_TRANSTYPE = 'T' + ELSE IF( TRANS.EQ.BLAS_CONJ_TRANS ) THEN + CHLA_TRANSTYPE = 'C' + ELSE + CHLA_TRANSTYPE = 'X' + END IF + RETURN +* +* End of CHLA_TRANSTYPE +* + END diff --git a/dspl/liblapack/SRC/chpcon.f b/dspl/liblapack/SRC/chpcon.f new file mode 100644 index 0000000..d48a8bd --- /dev/null +++ b/dspl/liblapack/SRC/chpcon.f @@ -0,0 +1,231 @@ +*> \brief \b CHPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPCON estimates the reciprocal of the condition number of a complex +*> Hermitian packed matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPTRS, CLACN2, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL CHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CHPCON +* + END diff --git a/dspl/liblapack/SRC/chpev.f b/dspl/liblapack/SRC/chpev.f new file mode 100644 index 0000000..4897090 --- /dev/null +++ b/dspl/liblapack/SRC/chpev.f @@ -0,0 +1,276 @@ +*> \brief CHPEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPEV computes all the eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix in packed storage. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(1, 2*N-1)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEQR, CUPGTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + RWORK( 1 ) = 1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUPGTR to generate the orthogonal matrix, then call CSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + INDRWK = INDE + N + CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of CHPEV +* + END diff --git a/dspl/liblapack/SRC/chpevd.f b/dspl/liblapack/SRC/chpevd.f new file mode 100644 index 0000000..b9c8720 --- /dev/null +++ b/dspl/liblapack/SRC/chpevd.f @@ -0,0 +1,378 @@ +*> \brief CHPEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian matrix A in packed storage. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the required LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEDC, CUPMTR, SSCAL, SSTERF, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDRWK = INDE + N + INDWRK = INDTAU + N + LLWRK = LWORK - INDWRK + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* CUPGTR to generate the orthogonal matrix, then call CSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL CSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL CUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHPEVD +* + END diff --git a/dspl/liblapack/SRC/chpevx.f b/dspl/liblapack/SRC/chpevx.f new file mode 100644 index 0000000..6a111f0 --- /dev/null +++ b/dspl/liblapack/SRC/chpevx.f @@ -0,0 +1,507 @@ +*> \brief CHPEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A in packed storage. +*> Eigenvalues/vectors can be selected by specifying either a range of +*> values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the selected eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and +*> the index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CHPTRD, CSSCAL, CSTEIN, CSTEQR, CSWAP, CUPGTR, + $ CUPMTR, SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.REAL( AP( 1 ) ) .AND. VU.GE.REAL( AP( 1 ) ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = CLANHP( 'M', UPLO, N, AP, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + CALL CHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or CUPGTR and CSTEQR. If this fails +* for some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL SSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by CSTEIN. +* + INDWRK = INDTAU + N + CALL CUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of CHPEVX +* + END diff --git a/dspl/liblapack/SRC/chpgst.f b/dspl/liblapack/SRC/chpgst.f new file mode 100644 index 0000000..3813914 --- /dev/null +++ b/dspl/liblapack/SRC/chpgst.f @@ -0,0 +1,281 @@ +*> \brief \b CHPGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), BP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPGST reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form, using packed storage. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +*> +*> B must have been previously factorized as U**H*U or L*L**H by CPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**H*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] BP +*> \verbatim +*> BP is COMPLEX array, dimension (N*(N+1)/2) +*> The triangular factor from the Cholesky factorization of B, +*> stored in the same format as A, as returned by CPPTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), BP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0E+0, HALF = 0.5E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + REAL AJJ, AKK, BJJ, BKK + COMPLEX CT +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHPMV, CHPR2, CSSCAL, CTPMV, CTPSV, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + AP( JJ ) = REAL( AP( JJ ) ) + BJJ = BP( JJ ) + CALL CTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J, + $ BP, AP( J1 ), 1 ) + CALL CHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE, + $ AP( J1 ), 1 ) + CALL CSSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-CDOTC( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL CSSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL CHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL CTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL CTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL CHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL CSSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**H *A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + CDOTC( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL CSSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL CHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ CONE, AP( JJ+1 ), 1 ) + CALL CTPMV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-J+1, BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of CHPGST +* + END diff --git a/dspl/liblapack/SRC/chpgv.f b/dspl/liblapack/SRC/chpgv.f new file mode 100644 index 0000000..999031d --- /dev/null +++ b/dspl/liblapack/SRC/chpgv.f @@ -0,0 +1,282 @@ +*> \brief \b CHPGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPGV computes all the eigenvalues and, optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian, stored in packed format, +*> and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (max(1, 2*N-1)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPPTRF or CHPEV returned an error code: +*> <= N: if INFO = i, CHPEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not convergeto zero; +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +* ===================================================================== + SUBROUTINE CHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEV, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of CHPGV +* + END diff --git a/dspl/liblapack/SRC/chpgvd.f b/dspl/liblapack/SRC/chpgvd.f new file mode 100644 index 0000000..6c1ef32 --- /dev/null +++ b/dspl/liblapack/SRC/chpgvd.f @@ -0,0 +1,393 @@ +*> \brief \b CHPGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPGVD computes all the eigenvalues and, optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian, stored in packed format, and B is also +*> positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the required LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPPTRF or CHPEVD returned an error code: +*> <= N: if INFO = i, CHPEVD failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not convergeto zero; +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) + LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) ) + LRWMIN = MAX( REAL( LRWMIN ), REAL( RWORK( 1 ) ) ) + LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CHPGVD +* + END diff --git a/dspl/liblapack/SRC/chpgvx.f b/dspl/liblapack/SRC/chpgvx.f new file mode 100644 index 0000000..06a0879 --- /dev/null +++ b/dspl/liblapack/SRC/chpgvx.f @@ -0,0 +1,421 @@ +*> \brief \b CHPGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL RWORK( * ), W( * ) +* COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPGVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian, stored in packed format, and B is also +*> positive definite. Eigenvalues and eigenvectors can be selected by +*> specifying either a range of values or a range of indices for the +*> desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: CPPTRF or CHPEVX returned an error code: +*> <= N: if INFO = i, CHPEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE CHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL RWORK( * ), W( * ) + COMPLEX AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPEVX, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -11 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL CPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, M + CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H*y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of CHPGVX +* + END diff --git a/dspl/liblapack/SRC/chprfs.f b/dspl/liblapack/SRC/chprfs.f new file mode 100644 index 0000000..9fe0532 --- /dev/null +++ b/dspl/liblapack/SRC/chprfs.f @@ -0,0 +1,438 @@ +*> \brief \b CHPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**H or +*> A = L*D*L**H as computed by CHPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CHPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHPMV, CHPTRS, CLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CHPRFS +* + END diff --git a/dspl/liblapack/SRC/chpsv.f b/dspl/liblapack/SRC/chpsv.f new file mode 100644 index 0000000..af62b5a --- /dev/null +++ b/dspl/liblapack/SRC/chpsv.f @@ -0,0 +1,224 @@ +*> \brief CHPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is Hermitian and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by CHPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHPTRF, CHPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL CHPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CHPSV +* + END diff --git a/dspl/liblapack/SRC/chpsvx.f b/dspl/liblapack/SRC/chpsvx.f new file mode 100644 index 0000000..5783a18 --- /dev/null +++ b/dspl/liblapack/SRC/chpsvx.f @@ -0,0 +1,387 @@ +*> \brief CHPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPSVX uses the diagonal pivoting factorization A = U*D*U**H or +*> A = L*D*L**H to compute the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N Hermitian matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form of +*> A. AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by CHPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by CHPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHPCON, CHPRFS, CHPTRF, CHPTRS, CLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CHPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CHPSVX +* + END diff --git a/dspl/liblapack/SRC/chptrd.f b/dspl/liblapack/SRC/chptrd.f new file mode 100644 index 0000000..e5caab7 --- /dev/null +++ b/dspl/liblapack/SRC/chptrd.f @@ -0,0 +1,310 @@ +*> \brief \b CHPTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX AP( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPTRD reduces a complex Hermitian matrix A stored in packed form to +*> real symmetric tridiagonal form T by a unitary similarity +*> transformation: Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +*> overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +*> overwriting A(i+2:n,i), and tau is stored in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX AP( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + COMPLEX ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHPMV, CHPR2, CLARFG, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + AP( I1+N-1 ) = REAL( AP( I1+N-1 ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(1:i-1,i+1) +* + ALPHA = AP( I1+I-1 ) + CALL CLARFG( I, ALPHA, AP( I1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL CHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y**H *v) * v +* + ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, AP( I1 ), 1 ) + CALL CAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL CHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + END IF + AP( I1+I-1 ) = E( I ) + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + AP( 1 ) = REAL( AP( 1 ) ) + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(i+2:n,i) +* + ALPHA = AP( II+1 ) + CALL CLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL CHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y**H *v) * v +* + ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL CAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL CHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + END IF + AP( II+1 ) = E( I ) + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of CHPTRD +* + END diff --git a/dspl/liblapack/SRC/chptrf.f b/dspl/liblapack/SRC/chptrf.f new file mode 100644 index 0000000..97c4e96 --- /dev/null +++ b/dspl/liblapack/SRC/chptrf.f @@ -0,0 +1,649 @@ +*> \brief \b CHPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPTRF computes the factorization of a complex Hermitian packed +*> matrix A using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> J. Lewis, Boeing Computer Services Company +*> +* ===================================================================== + SUBROUTINE CHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAPY2 + EXTERNAL LSAME, ICAMAX, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( AP( KC+K-1 ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( AP( KPC+IMAX-1 ) ) ).GE.ALPHA* + $ ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = CONJG( AP( KNC+J-1 ) ) + AP( KNC+J-1 ) = CONJG( AP( KX ) ) + AP( KX ) = T + 30 CONTINUE + AP( KX+KK-1 ) = CONJG( AP( KX+KK-1 ) ) + R1 = REAL( AP( KNC+KK-1 ) ) + AP( KNC+KK-1 ) = REAL( AP( KPC+KP-1 ) ) + AP( KPC+KP-1 ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + ELSE + AP( KC+K-1 ) = REAL( AP( KC+K-1 ) ) + IF( KSTEP.EQ.2 ) + $ AP( KC-1 ) = REAL( AP( KC-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H +* + R1 = ONE / REAL( AP( KC+K-1 ) ) + CALL CHPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL CSSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H +* + IF( K.GT.2 ) THEN +* + D = SLAPY2( REAL( AP( K-1+( K-1 )*K / 2 ) ), + $ AIMAG( AP( K-1+( K-1 )*K / 2 ) ) ) + D22 = REAL( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D + D11 = REAL( AP( K+( K-1 )*K / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = AP( K-1+( K-1 )*K / 2 ) / D + D = TT / D +* + DO 50 J = K - 2, 1, -1 + WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ CONJG( D12 )*AP( J+( K-1 )*K / 2 ) ) + WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12* + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*CONJG( WK ) - + $ AP( I+( K-2 )*( K-1 ) / 2 )*CONJG( WKM1 ) + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + AP( J+( J-1 )*J / 2 ) = CMPLX( REAL( AP( J+( J-1 )* + $ J / 2 ) ), 0.0E+0 ) + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( AP( KC ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC ) = REAL( AP( KC ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( REAL( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = CONJG( AP( KNC+J-KK ) ) + AP( KNC+J-KK ) = CONJG( AP( KX ) ) + AP( KX ) = T + 80 CONTINUE + AP( KNC+KP-KK ) = CONJG( AP( KNC+KP-KK ) ) + R1 = REAL( AP( KNC ) ) + AP( KNC ) = REAL( AP( KPC ) ) + AP( KPC ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC ) = REAL( AP( KC ) ) + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + ELSE + AP( KC ) = REAL( AP( KC ) ) + IF( KSTEP.EQ.2 ) + $ AP( KNC ) = REAL( AP( KNC ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H +* + R1 = ONE / REAL( AP( KC ) ) + CALL CHPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL CSSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = SLAPY2( REAL( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ), + $ AIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) ) + D11 = REAL( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D + D22 = REAL( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D + D = TT / D +* + DO 100 J = K + 2, N + WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21* + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ CONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*CONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )* + $ CONJG( WKP1 ) + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + AP( J+( J-1 )*( 2*N-J ) / 2 ) + $ = CMPLX( REAL( AP( J+( J-1 )*( 2*N-J ) / 2 ) ), + $ 0.0E+0 ) + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of CHPTRF +* + END diff --git a/dspl/liblapack/SRC/chptri.f b/dspl/liblapack/SRC/chptri.f new file mode 100644 index 0000000..878c881 --- /dev/null +++ b/dspl/liblapack/SRC/chptri.f @@ -0,0 +1,410 @@ +*> \brief \b CHPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPTRI computes the inverse of a complex Hermitian indefinite matrix +*> A in packed storage using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CHPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE, ZERO + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + REAL AK, AKP1, D, T + COMPLEX AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CHPMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / REAL( AP( KC+K-1 ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = REAL( AP( KC+K-1 ) ) / T + AKP1 = REAL( AP( KCNEXT+K ) ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ CDOTC( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ REAL( CDOTC( K-1, WORK, 1, AP( KCNEXT ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = CONJG( AP( KC+J-1 ) ) + AP( KC+J-1 ) = CONJG( AP( KX ) ) + AP( KX ) = TEMP + 40 CONTINUE + AP( KC+KP-1 ) = CONJG( AP( KC+KP-1 ) ) + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / REAL( AP( KC ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = REAL( AP( KCNEXT ) ) / T + AKP1 = REAL( AP( KC ) ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ CDOTC( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ REAL( CDOTC( N-K, WORK, 1, AP( KCNEXT+2 ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = CONJG( AP( KC+J-K ) ) + AP( KC+J-K ) = CONJG( AP( KX ) ) + AP( KX ) = TEMP + 70 CONTINUE + AP( KC+KP-K ) = CONJG( AP( KC+KP-K ) ) + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CHPTRI +* + END diff --git a/dspl/liblapack/SRC/chptrs.f b/dspl/liblapack/SRC/chptrs.f new file mode 100644 index 0000000..bfb8777 --- /dev/null +++ b/dspl/liblapack/SRC/chptrs.f @@ -0,0 +1,474 @@ +*> \brief \b CHPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHPTRS solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A stored in packed format using the factorization +*> A = U*D*U**H or A = L*D*L**H computed by CHPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + REAL S + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( AP( KC+K-1 ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / CONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / CONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = REAL( ONE ) / REAL( AP( KC ) ) + CALL CSSCAL( NRHS, S, B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / CONJG( AKM1K ) + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / CONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CLACGV( NRHS, B( K, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL CLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL CLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CHPTRS +* + END diff --git a/dspl/liblapack/SRC/chsein.f b/dspl/liblapack/SRC/chsein.f new file mode 100644 index 0000000..5c15b0b --- /dev/null +++ b/dspl/liblapack/SRC/chsein.f @@ -0,0 +1,468 @@ +*> \brief \b CHSEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHSEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, +* LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, +* IFAILR, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EIGSRC, INITV, SIDE +* INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IFAILL( * ), IFAILR( * ) +* REAL RWORK( * ) +* COMPLEX H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHSEIN uses inverse iteration to find specified right and/or left +*> eigenvectors of a complex upper Hessenberg matrix H. +*> +*> The right eigenvector x and the left eigenvector y of the matrix H +*> corresponding to an eigenvalue w are defined by: +*> +*> H * x = w * x, y**h * H = w * y**h +*> +*> where y**h denotes the conjugate transpose of the vector y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] EIGSRC +*> \verbatim +*> EIGSRC is CHARACTER*1 +*> Specifies the source of eigenvalues supplied in W: +*> = 'Q': the eigenvalues were found using CHSEQR; thus, if +*> H has zero subdiagonal elements, and so is +*> block-triangular, then the j-th eigenvalue can be +*> assumed to be an eigenvalue of the block containing +*> the j-th row/column. This property allows CHSEIN to +*> perform inverse iteration on just one diagonal block. +*> = 'N': no assumptions are made on the correspondence +*> between eigenvalues and diagonal blocks. In this +*> case, CHSEIN must always perform inverse iteration +*> using the whole matrix H. +*> \endverbatim +*> +*> \param[in] INITV +*> \verbatim +*> INITV is CHARACTER*1 +*> = 'N': no initial vectors are supplied; +*> = 'U': user-supplied initial vectors are stored in the arrays +*> VL and/or VR. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> Specifies the eigenvectors to be computed. To select the +*> eigenvector corresponding to the eigenvalue W(j), +*> SELECT(j) must be set to .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> If a NaN is detected in H, the routine will return with INFO=-6. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> On entry, the eigenvalues of H. +*> On exit, the real parts of W may have been altered since +*> close eigenvalues are perturbed slightly in searching for +*> independent eigenvectors. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,MM) +*> On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +*> contain starting vectors for the inverse iteration for the +*> left eigenvectors; the starting vector for each eigenvector +*> must be in the same column in which the eigenvector will be +*> stored. +*> On exit, if SIDE = 'L' or 'B', the left eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VL, in the same order as their eigenvalues. +*> If SIDE = 'R', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,MM) +*> On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +*> contain starting vectors for the inverse iteration for the +*> right eigenvectors; the starting vector for each eigenvector +*> must be in the same column in which the eigenvector will be +*> stored. +*> On exit, if SIDE = 'R' or 'B', the right eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VR, in the same order as their eigenvalues. +*> If SIDE = 'L', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR required to +*> store the eigenvectors (= the number of .TRUE. elements in +*> SELECT). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAILL +*> \verbatim +*> IFAILL is INTEGER array, dimension (MM) +*> If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +*> eigenvector in the i-th column of VL (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +*> eigenvector converged satisfactorily. +*> If SIDE = 'R', IFAILL is not referenced. +*> \endverbatim +*> +*> \param[out] IFAILR +*> \verbatim +*> IFAILR is INTEGER array, dimension (MM) +*> If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +*> eigenvector in the i-th column of VR (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +*> eigenvector converged satisfactorily. +*> If SIDE = 'L', IFAILR is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, i is the number of eigenvectors which +*> failed to converge; see IFAILL and IFAILR for further +*> details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x|+|y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + REAL RWORK( * ) + COMPLEX H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KS, LDWORK + REAL EPS3, HNORM, SMLNUM, ULP, UNFL + COMPLEX CDUM, WK +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + REAL CLANHS, SLAMCH + EXTERNAL LSAME, CLANHS, SLAMCH, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -12 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = SLAMCH( 'Safe minimum' ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* + LDWORK = N +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KS = 1 +* + DO 100 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = CLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK ) + IF( SISNAN( HNORM ) ) THEN + INFO = -6 + RETURN + ELSE IF( (HNORM.GT.RZERO) ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WK = W( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN + WK = WK + EPS3 + GO TO 60 + END IF + 70 CONTINUE + W( K ) = WK +* + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL CLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3, + $ SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILL( KS ) = K + ELSE + IFAILL( KS ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KS ) = ZERO + 80 CONTINUE + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL CLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ), + $ WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILR( KS ) = K + ELSE + IFAILR( KS ) = 0 + END IF + DO 90 I = KR + 1, N + VR( I, KS ) = ZERO + 90 CONTINUE + END IF + KS = KS + 1 + END IF + 100 CONTINUE +* + RETURN +* +* End of CHSEIN +* + END diff --git a/dspl/liblapack/SRC/chseqr.f b/dspl/liblapack/SRC/chseqr.f new file mode 100644 index 0000000..34bf492 --- /dev/null +++ b/dspl/liblapack/SRC/chseqr.f @@ -0,0 +1,498 @@ +*> \brief \b CHSEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CHSEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CHSEQR computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': compute eigenvalues only; +*> = 'S': compute eigenvalues and the Schur form T. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': no Schur vectors are computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of Schur vectors of H is returned; +*> = 'V': Z must contain an unitary matrix Q on entry, and +*> the product Q*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to CGEBAL, and then passed to ZGEHRD +*> when the matrix output by CGEBAL is reduced to Hessenberg +*> form. Otherwise ILO and IHI should be set to 1 and N +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and JOB = 'S', H contains the upper +*> triangular matrix T from the Schur decomposition (the +*> Schur form). If INFO = 0 and JOB = 'E', the contents of +*> H are unspecified on exit. (The output value of H when +*> INFO.GT.0 is given under the description of INFO below.) +*> +*> Unlike earlier versions of CHSEQR, this subroutine may +*> explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +*> or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The computed eigenvalues. If JOB = 'S', the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> If COMPZ = 'N', Z is not referenced. +*> If COMPZ = 'I', on entry Z need not be set and on exit, +*> if INFO = 0, Z contains the unitary matrix Z of the Schur +*> vectors of H. If COMPZ = 'V', on entry Z must contain an +*> N-by-N matrix Q, which is assumed to be equal to the unit +*> matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +*> if INFO = 0, Z contains Q*Z. +*> Normally Q is the unitary matrix generated by CUNGHR +*> after the call to CGEHRD which formed the Hessenberg matrix +*> H. (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if COMPZ = 'I' or +*> COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient and delivers very good and sometimes +*> optimal performance. However, LWORK as large as 11*N +*> may be required for optimal performance. A workspace +*> query is recommended to determine the optimal workspace +*> size. +*> +*> If LWORK = -1, then CHSEQR does a workspace query. +*> In this case, CHSEQR checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .LT. 0: if INFO = -i, the i-th argument had an illegal +*> value +*> .GT. 0: if INFO = i, CHSEQR failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and JOB = 'E', then on exit, the +*> remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and JOB = 'S', then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and COMPZ = 'V', then on exit +*> +*> (final value of Z) = (initial value of Z)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'I', then on exit +*> (final value of Z) = U +*> where U is the unitary matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'N', then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Default values supplied by +*> ILAENV(ISPEC,'CHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +*> It is suggested that these defaults be adjusted in order +*> to attain best performance in each particular +*> computational environment. +*> +*> ISPEC=12: The CLAHQR vs CLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> ISPEC=13: Recommended deflation window size. +*> This depends on ILO, IHI and NS. NS is the +*> number of simultaneous shifts returned +*> by ILAENV(ISPEC=15). (See ISPEC=15 below.) +*> The default for (IHI-ILO+1).LE.500 is NS. +*> The default for (IHI-ILO+1).GT.500 is 3*NS/2. +*> +*> ISPEC=14: Nibble crossover point. (See IPARMQ for +*> details.) Default: 14% of deflation window +*> size. +*> +*> ISPEC=15: Number of simultaneous shifts in a multishift +*> QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 1 30 NS = 2(+) +*> 30 60 NS = 4(+) +*> 60 150 NS = 10(+) +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default some or all matrices of this order +*> are passed to the implicit double shift routine +*> CLAHQR and this parameter is ignored. See +*> ISPEC=12 above and comments in IPARMQ for +*> details. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function of N increasing from 10 to 64. +*> +*> ISPEC=16: Select structured matrix multiply. +*> If the number of simultaneous shifts (specified +*> by ISPEC=15) is less than 14, then the default +*> for ISPEC=16 is 0. Otherwise the default for +*> ISPEC=16 is 2. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ===================================================================== + SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare CLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== + INTEGER NL + PARAMETER ( NL = 49 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0e0 ) +* .. +* .. Local Arrays .. + COMPLEX HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAHQR, CLAQR0, CLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = CMPLX( REAL( MAX( 1, N ) ), RZERO ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'CHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z, + $ LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = CMPLX( MAX( REAL( WORK( 1 ) ), REAL( MAX( 1, + $ N ) ) ), RZERO ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by CGEBAL ==== +* + IF( ILO.GT.1 ) + $ CALL CCOPY( ILO-1, H, LDH+1, W, 1 ) + IF( IHI.LT.N ) + $ CALL CCOPY( N-IHI, H( IHI+1, IHI+1 ), LDH+1, W( IHI+1 ), 1 ) +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL CLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'CHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, + $ ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== CLAQR0 for big matrices; CLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare CLAHQR failure! CLAQR0 sometimes succeeds +* . when CLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call CLAQR0 directly. ==== +* + CALL CLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, W, + $ ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from CLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling CLAQR0. ==== +* + CALL CLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL CLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL CLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, W, + $ ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL CLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL CLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = CMPLX( MAX( REAL( MAX( 1, N ) ), + $ REAL( WORK( 1 ) ) ), RZERO ) + END IF +* +* ==== End of CHSEQR ==== +* + END diff --git a/dspl/liblapack/SRC/cla_gbamv.f b/dspl/liblapack/SRC/cla_gbamv.f new file mode 100644 index 0000000..e043d74 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gbamv.f @@ -0,0 +1,422 @@ +*> \brief \b CLA_GBAMV performs a matrix-vector operation to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GBAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, +* INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ), X( * ) +* REAL Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GBAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,n) +*> Before entry, the leading m by n part of the array AB must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> On entry, LDAB specifies the first dimension of AB as declared +*> in the calling (sub) program. LDAB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, + $ INCX, BETA, Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), X( * ) + REAL Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY, KD, KE + COMPLEX CDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, REAL, AIMAG, SIGN +* .. +* .. Statement Functions + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN + INFO = 4 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = 5 + ELSE IF( LDAB.LT.KL+KU+1 )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CLA_GBAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + KD = KU + 1 + KE = KL + 1 + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of CLA_GBAMV +* + END diff --git a/dspl/liblapack/SRC/cla_gbrcond_c.f b/dspl/liblapack/SRC/cla_gbrcond_c.f new file mode 100644 index 0000000..123aee2 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gbrcond_c.f @@ -0,0 +1,342 @@ +*> \brief \b CLA_GBRCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for general banded matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GBRCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, +* LDAFB, IPIV, C, CAPPLY, INFO, WORK, +* RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* LOGICAL CAPPLY +* INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ) +* REAL C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GBRCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a REAL vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, C, CAPPLY, INFO, WORK, + $ RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + LOGICAL CAPPLY + INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ) + REAL C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + CLA_GBRCOND_C = 0.0E+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_GBRCOND_C', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0E+0 + KD = KU + 1 + KE = KL + 1 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J ) ) / C( J ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) ) / C( J ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_GBRCOND_C = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL CGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL CGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( NOTRANS ) THEN + CALL CGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ELSE + CALL CGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_GBRCOND_C = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_gbrcond_x.f b/dspl/liblapack/SRC/cla_gbrcond_x.f new file mode 100644 index 0000000..d04aa7f --- /dev/null +++ b/dspl/liblapack/SRC/cla_gbrcond_x.f @@ -0,0 +1,319 @@ +*> \brief \b CLA_GBRCOND_X computes the infinity norm condition number of op(A)*diag(x) for general banded matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GBRCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GBRCOND_X( TRANS, N, KL, KU, AB, LDAB, AFB, +* LDAFB, IPIV, X, INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), +* $ X( * ) +* REAL RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GBRCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GBRCOND_X( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, X, INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), + $ X( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_GBRCOND_X = 0.0E+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_GBRCOND_X', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + KD = KU + 1 + KE = KL + 1 + ANORM = 0.0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0E+0 + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_GBRCOND_X = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL CGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL CGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL CGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ELSE + CALL CGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_GBRCOND_X = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_gbrfsx_extended.f b/dspl/liblapack/SRC/cla_gbrfsx_extended.f new file mode 100644 index 0000000..888ecd4 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gbrfsx_extended.f @@ -0,0 +1,713 @@ +*> \brief \b CLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general banded matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GBRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_GBRFSX_EXTENDED ( PREC_TYPE, TRANS_TYPE, N, KL, KU, +* NRHS, AB, LDAB, AFB, LDAFB, IPIV, +* COLEQU, C, B, LDB, Y, LDY, +* BERR_OUT, N_NORMS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, +* $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB(*), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GBRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by CGBRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0 +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the N-by-N matrix AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= max(1,N). +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by CGBTRF. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by CGBTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by CLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to CGBTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CLA_GBRFSX_EXTENDED ( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ COLEQU, C, B, LDB, Y, LDY, + $ BERR_OUT, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, + $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB(*), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, M, X_STATE, Z_STATE, Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGBTRS, CGBMV, BLAS_CGBMV_X, + $ BLAS_CGBMV2_X, CLA_GBAMV, CLA_WWADDW, SLAMCH, + $ CHLA_TRANSTYPE, CLA_LIN_BERR + REAL SLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions.. + INTRINSIC ABS, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS + M = KL+KU+1 + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) then + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0E+0 + DXRATMAX = 0.0E+0 + DZRAT = 0.0E+0 + DZRATMAX = 0.0E+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL CGBMV( TRANS, M, N, KL, KU, (-1.0E+0,0.0E+0), AB, + $ LDAB, Y( 1, J ), 1, (1.0E+0,0.0E+0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_CGBMV_X( TRANS_TYPE, N, N, KL, KU, + $ (-1.0E+0,0.0E+0), AB, LDAB, Y( 1, J ), 1, + $ (1.0E+0,0.0E+0), RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_CGBMV2_X( TRANS_TYPE, N, N, KL, KU, + $ (-1.0E+0,0.0E+0), AB, LDAB, Y( 1, J ), Y_TAIL, 1, + $ (1.0E+0,0.0E+0), RES, 1, PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL CCOPY( N, RES, 1, DY, 1 ) + CALL CGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, DY, N, + $ INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0E+0 + NORMY = 0.0E+0 + NORMDX = 0.0E+0 + DZ_Z = 0.0E+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF (YK .NE. 0.0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX(NORMDX, DYK * C(I)) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL CAXPY( N, (1.0E+0,0.0E+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL CLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL CGBMV( TRANS, N, N, KL, KU, (-1.0E+0,0.0E+0), AB, LDAB, + $ Y(1,J), 1, (1.0E+0,0.0E+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL CLA_GBAMV( TRANS_TYPE, N, N, KL, KU, 1.0E+0, + $ AB, LDAB, Y(1, J), 1, 1.0E+0, AYB, 1 ) + + CALL CLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/cla_gbrpvgrw.f b/dspl/liblapack/SRC/cla_gbrpvgrw.f new file mode 100644 index 0000000..f60b714 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gbrpvgrw.f @@ -0,0 +1,167 @@ +*> \brief \b CLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GBRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, +* LDAFB ) +* +* .. Scalar Arguments .. +* INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GBRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by CGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, + $ LDAFB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J, KD + REAL AMAX, UMAX, RPVGRW + COMPLEX ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0 + + KD = KU + 1 + DO J = 1, NCOLS + AMAX = 0.0 + UMAX = 0.0 + DO I = MAX( J-KU, 1 ), MIN( J+KL, N ) + AMAX = MAX( CABS1( AB( KD+I-J, J ) ), AMAX ) + END DO + DO I = MAX( J-KU, 1 ), J + UMAX = MAX( CABS1( AFB( KD+I-J, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + CLA_GBRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/cla_geamv.f b/dspl/liblapack/SRC/cla_geamv.f new file mode 100644 index 0000000..cbbcca8 --- /dev/null +++ b/dspl/liblapack/SRC/cla_geamv.f @@ -0,0 +1,406 @@ +*> \brief \b CLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, +* Y, INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDA, M, N +* INTEGER TRANS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), X( * ) +* REAL Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_GEAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,n) +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, + $ Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + INTEGER TRANS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) + REAL Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY + COMPLEX CDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, REAL, AIMAG, SIGN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CLA_GEAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + DO J = 1, LENX + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + DO J = 1, LENX + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + JX = KX + DO J = 1, LENX + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. 0.0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0 ) THEN + JX = KX + DO J = 1, LENX + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of CLA_GEAMV +* + END diff --git a/dspl/liblapack/SRC/cla_gercond_c.f b/dspl/liblapack/SRC/cla_gercond_c.f new file mode 100644 index 0000000..aabdc0b --- /dev/null +++ b/dspl/liblapack/SRC/cla_gercond_c.f @@ -0,0 +1,317 @@ +*> \brief \b CLA_GERCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for general matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GERCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, C, +* CAPPLY, INFO, WORK, RWORK ) +* +* .. Scalar Aguments .. +* CHARACTER TRANS +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) +* REAL C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_GERCOND_C computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a REAL vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, C, + $ CAPPLY, INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Aguments .. + CHARACTER TRANS + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) + REAL C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + CLA_GERCOND_C = 0.0E+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_GERCOND_C', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0E+0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_GERCOND_C = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF (NOTRANS) THEN + CALL CGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( NOTRANS ) THEN + CALL CGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_GERCOND_C = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_gercond_x.f b/dspl/liblapack/SRC/cla_gercond_x.f new file mode 100644 index 0000000..6dce99f --- /dev/null +++ b/dspl/liblapack/SRC/cla_gercond_x.f @@ -0,0 +1,293 @@ +*> \brief \b CLA_GERCOND_X computes the infinity norm condition number of op(A)*diag(x) for general matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GERCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GERCOND_X( TRANS, N, A, LDA, AF, LDAF, IPIV, X, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* REAL RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_GERCOND_X computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GERCOND_X( TRANS, N, A, LDA, AF, LDAF, IPIV, X, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE + REAL AINVNM, ANORM, TMP + INTEGER I, J + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_GERCOND_X = 0.0E+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_GERCOND_X', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_GERCOND_X = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* Multiply by R. + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL CGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL CGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_GERCOND_X = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_gerfsx_extended.f b/dspl/liblapack/SRC/cla_gerfsx_extended.f new file mode 100644 index 0000000..2e05963 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gerfsx_extended.f @@ -0,0 +1,698 @@ +*> \brief \b CLA_GERFSX_EXTENDED +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, +* LDA, AF, LDAF, IPIV, COLEQU, C, B, +* LDB, Y, LDY, BERR_OUT, N_NORMS, +* ERRS_N, ERRS_C, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ TRANS_TYPE, N_NORMS +* LOGICAL COLEQU, IGNORE_CWISE +* INTEGER ITHRESH +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_GERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by CGERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERRS_N +*> and ERRS_C for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERRS_N and ERRS_C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by CGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by CGETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by CLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERRS_N +*> and ERRS_C). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERRS_N +*> \verbatim +*> ERRS_N is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERRS_N(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_N(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERRS_C +*> \verbatim +*> ERRS_C is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERRS_C is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERRS_C(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_C(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERRS_N and ERRS_C may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to CGETRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, + $ LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, Y, LDY, BERR_OUT, N_NORMS, + $ ERRS_N, ERRS_C, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ TRANS_TYPE, N_NORMS + LOGICAL COLEQU, IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, X_STATE, Z_STATE, Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, + $ NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGETRS, CGEMV, BLAS_CGEMV_X, + $ BLAS_CGEMV2_X, CLA_GEAMV, CLA_WWADDW, SLAMCH, + $ CHLA_TRANSTYPE, CLA_LIN_BERR + REAL SLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF ( INFO.NE.0 ) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS +* + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL CGEMV( TRANS, N, N, (-1.0E+0,0.0E+0), A, LDA, + $ Y( 1, J ), 1, (1.0E+0,0.0E+0), RES, 1) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_CGEMV_X( TRANS_TYPE, N, N, (-1.0E+0,0.0E+0), A, + $ LDA, Y( 1, J ), 1, (1.0E+0,0.0E+0), + $ RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_CGEMV2_X( TRANS_TYPE, N, N, (-1.0E+0,0.0E+0), + $ A, LDA, Y(1, J), Y_TAIL, 1, (1.0E+0,0.0E+0), RES, 1, + $ PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL CCOPY( N, RES, 1, DY, 1 ) + CALL CGETRS( TRANS, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0E+0 + NORMY = 0.0E+0 + NORMDX = 0.0E+0 + DZ_Z = 0.0E+0 + YMIN = HUGEVAL +* + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF ( YK .NE. 0.0E+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF (DX_X .LE. EPS) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL CAXPY( N, (1.0E+0,0.0E+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL CLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds +* + IF (N_NORMS .GE. 1) THEN + ERRS_N( J, LA_LINRX_ERR_I ) = FINAL_DX_X / (1 - DXRATMAX) + + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERRS_C( J, LA_LINRX_ERR_I ) = FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL CGEMV( TRANS, N, N, (-1.0E+0,0.0E+0), A, LDA, Y(1,J), 1, + $ (1.0E+0,0.0E+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL CLA_GEAMV ( TRANS_TYPE, N, N, 1.0E+0, + $ A, LDA, Y(1, J), 1, 1.0E+0, AYB, 1 ) + + CALL CLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/cla_gerpvgrw.f b/dspl/liblapack/SRC/cla_gerpvgrw.f new file mode 100644 index 0000000..f6d27c8 --- /dev/null +++ b/dspl/liblapack/SRC/cla_gerpvgrw.f @@ -0,0 +1,147 @@ +*> \brief \b CLA_GERPVGRW multiplies a square real matrix by a complex matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_GERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) +* +* .. Scalar Arguments .. +* INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_GERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by CGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL AMAX, UMAX, RPVGRW + COMPLEX ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, ABS, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0 + + DO J = 1, NCOLS + AMAX = 0.0 + UMAX = 0.0 + DO I = 1, N + AMAX = MAX( CABS1( A( I, J ) ), AMAX ) + END DO + DO I = 1, J + UMAX = MAX( CABS1( AF( I, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + CLA_GERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/cla_heamv.f b/dspl/liblapack/SRC/cla_heamv.f new file mode 100644 index 0000000..38a9492 --- /dev/null +++ b/dspl/liblapack/SRC/cla_heamv.f @@ -0,0 +1,426 @@ +*> \brief \b CLA_HEAMV computes a matrix-vector product using a Hermitian indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_HEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), X( * ) +* REAL Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexHEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) + REAL Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL ( ZDUM ) ) + ABS( AIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'CHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of CLA_HEAMV +* + END diff --git a/dspl/liblapack/SRC/cla_hercond_c.f b/dspl/liblapack/SRC/cla_hercond_c.f new file mode 100644 index 0000000..a5ebaf8 --- /dev/null +++ b/dspl/liblapack/SRC/cla_hercond_c.f @@ -0,0 +1,327 @@ +*> \brief \b CLA_HERCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for Hermitian indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_HERCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, C, +* CAPPLY, INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) +* REAL C ( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_HERCOND_C computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a REAL vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, C, + $ CAPPLY, INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) + REAL C ( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CHETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_HERCOND_C = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_HERCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0E+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_HERCOND_C = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL CHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_HERCOND_C = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_hercond_x.f b/dspl/liblapack/SRC/cla_hercond_x.f new file mode 100644 index 0000000..f000410 --- /dev/null +++ b/dspl/liblapack/SRC/cla_hercond_x.f @@ -0,0 +1,297 @@ +*> \brief \b CLA_HERCOND_X computes the infinity norm condition number of op(A)*diag(x) for Hermitian indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_HERCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_HERCOND_X( UPLO, N, A, LDA, AF, LDAF, IPIV, X, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* REAL RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_HERCOND_X computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_HERCOND_X( UPLO, N, A, LDA, AF, LDAF, IPIV, X, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CHETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_HERCOND_X = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF ( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_HERCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_HERCOND_X = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL CHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_HERCOND_X = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_herfsx_extended.f b/dspl/liblapack/SRC/cla_herfsx_extended.f new file mode 100644 index 0000000..c69589d --- /dev/null +++ b/dspl/liblapack/SRC/cla_herfsx_extended.f @@ -0,0 +1,716 @@ +*> \brief \b CLA_HERFSX_EXTENDED improves the computed solution to a system of linear equations for Hermitian indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_HERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_HERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by CHERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by CHETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by CLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to CLA_HERFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + SUBROUTINE CLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER + COMPLEX ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHETRS, CHEMV, BLAS_CHEMV_X, + $ BLAS_CHEMV2_X, CLA_HEAMV, CLA_WWADDW, + $ CLA_LIN_BERR + REAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_HERFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL CHEMV( UPLO, N, CMPLX(-1.0), A, LDA, Y( 1, J ), 1, + $ CMPLX(1.0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_CHEMV_X( UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y( 1, J ), 1, CMPLX(1.0), RES, 1, PREC_TYPE) + ELSE + CALL BLAS_CHEMV2_X(UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y(1, J), Y_TAIL, 1, CMPLX(1.0), RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL CCOPY( N, RES, 1, DY, 1 ) + CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF (YK .NE. 0.0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL CAXPY( N, CMPLX(1.0), DY, 1, Y(1,J), 1 ) + ELSE + CALL CLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL CHEMV( UPLO, N, CMPLX(-1.0), A, LDA, Y(1,J), 1, + $ CMPLX(1.0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL CLA_HEAMV( UPLO2, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL CLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/cla_herpvgrw.f b/dspl/liblapack/SRC/cla_herpvgrw.f new file mode 100644 index 0000000..0fa26d9 --- /dev/null +++ b/dspl/liblapack/SRC/cla_herpvgrw.f @@ -0,0 +1,330 @@ +*> \brief \b CLA_HERPVGRW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_HERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ) +* REAL WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_HERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from SSYTRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexHEcomputational +* +* ===================================================================== + REAL FUNCTION CLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, + $ WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ) + REAL WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + REAL AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER, LSAME + COMPLEX ZDUM +* .. +* .. External Functions .. + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL ( ZDUM ) ) + ABS( AIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF (UPPER) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0 + DO I = 1, 2*N + WORK( I ) = 0.0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( CABS1( A( I,J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I,J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from csytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = + $ MAX( CABS1( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = + $ MAX( CABS1( AF( I, K+1 ) ) , WORK( K+1 ) ) + END DO + WORK(K) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + CLA_HERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/cla_lin_berr.f b/dspl/liblapack/SRC/cla_lin_berr.f new file mode 100644 index 0000000..c892d8b --- /dev/null +++ b/dspl/liblapack/SRC/cla_lin_berr.f @@ -0,0 +1,160 @@ +*> \brief \b CLA_LIN_BERR computes a component-wise relative backward error. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_LIN_BERR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* .. Scalar Arguments .. +* INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. +* REAL AYB( N, NRHS ), BERR( NRHS ) +* COMPLEX RES( N, NRHS ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_LIN_BERR computes componentwise relative backward error from +*> the formula +*> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NZ +*> \verbatim +*> NZ is INTEGER +*> We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to +*> guard against spuriously zero residuals. Default value is N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices AYB, RES, and BERR. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N,NRHS) +*> The residual matrix, i.e., the matrix R in the relative backward +*> error formula above. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N, NRHS) +*> The denominator in the relative backward error formula above, i.e., +*> the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B +*> are from iterative refinement (see cla_gerfsx_extended.f). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error from the formula above. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. + REAL AYB( N, NRHS ), BERR( NRHS ) + COMPLEX RES( N, NRHS ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL TMP + INTEGER I, J + COMPLEX CDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX +* .. +* .. External Functions .. + EXTERNAL SLAMCH + REAL SLAMCH + REAL SAFE1 +* .. +* .. Statement Functions .. + COMPLEX CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Adding SAFE1 to the numerator guards against spuriously zero +* residuals. A similar safeguard is in the CLA_yyAMV routine used +* to compute AYB. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (NZ+1)*SAFE1 + + DO J = 1, NRHS + BERR(J) = 0.0 + DO I = 1, N + IF (AYB(I,J) .NE. 0.0) THEN + TMP = (SAFE1 + CABS1(RES(I,J)))/AYB(I,J) + BERR(J) = MAX( BERR(J), TMP ) + END IF +* +* If AYB is exactly 0.0 (and if computed by CLA_yyAMV), then we know +* the true residual also must be exactly 0.0. +* + END DO + END DO + END diff --git a/dspl/liblapack/SRC/cla_porcond_c.f b/dspl/liblapack/SRC/cla_porcond_c.f new file mode 100644 index 0000000..7a2bcfe --- /dev/null +++ b/dspl/liblapack/SRC/cla_porcond_c.f @@ -0,0 +1,319 @@ +*> \brief \b CLA_PORCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for Hermitian positive-definite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_PORCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, C, CAPPLY, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) +* REAL C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_PORCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a REAL vector +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + REAL FUNCTION CLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, C, CAPPLY, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) + REAL C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + REAL AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_PORCOND_C = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_PORCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0E+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_PORCOND_C = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL CPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL CPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL CPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_PORCOND_C = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_porcond_x.f b/dspl/liblapack/SRC/cla_porcond_x.f new file mode 100644 index 0000000..f0844ec --- /dev/null +++ b/dspl/liblapack/SRC/cla_porcond_x.f @@ -0,0 +1,288 @@ +*> \brief \b CLA_PORCOND_X computes the infinity norm condition number of op(A)*diag(x) for Hermitian positive-definite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_PORCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_PORCOND_X( UPLO, N, A, LDA, AF, LDAF, X, INFO, +* WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* REAL RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_PORCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + REAL FUNCTION CLA_PORCOND_X( UPLO, N, A, LDA, AF, LDAF, X, INFO, + $ WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + REAL AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_PORCOND_X = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF ( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_PORCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_PORCOND_X = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL CPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL CPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL CPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_PORCOND_X = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_porfsx_extended.f b/dspl/liblapack/SRC/cla_porfsx_extended.f new file mode 100644 index 0000000..3a3409c --- /dev/null +++ b/dspl/liblapack/SRC/cla_porfsx_extended.f @@ -0,0 +1,687 @@ +*> \brief \b CLA_PORFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric or Hermitian positive-definite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_PORFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, COLEQU, C, B, LDB, Y, +* LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_PORFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by CPORFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by CPOTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by CLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to CPOTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, COLEQU, C, B, LDB, Y, + $ LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CPOTRS, CHEMV, BLAS_CHEMV_X, + $ BLAS_CHEMV2_X, CLA_HEAMV, CLA_WWADDW, + $ CLA_LIN_BERR, SLAMCH + REAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL(N) * EPS + + IF (LSAME (UPLO, 'L')) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF (Y_PREC_STATE .EQ. EXTRA_Y) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + IF (Y_PREC_STATE .EQ. BASE_RESIDUAL) THEN + CALL CHEMV(UPLO, N, CMPLX(-1.0), A, LDA, Y(1,J), 1, + $ CMPLX(1.0), RES, 1) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_CHEMV_X(UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y( 1, J ), 1, CMPLX(1.0), RES, 1, PREC_TYPE) + ELSE + CALL BLAS_CHEMV2_X(UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y(1, J), Y_TAIL, 1, CMPLX(1.0), RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL CCOPY( N, RES, 1, DY, 1 ) + CALL CPOTRS( UPLO, N, 1, AF, LDAF, DY, N, INFO) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1(Y(I, J)) + DYK = CABS1(DY(I)) + + IF (YK .NE. 0.0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF (DYK .NE. 0.0) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX(NORMX, YK * C(I)) + NORMDX = MAX(NORMDX, DYK * C(I)) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF (NORMX .NE. 0.0) THEN + DX_X = NORMDX / NORMX + ELSE IF (NORMDX .EQ. 0.0) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF (YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y) + $ INCR_PREC = .TRUE. + + IF (X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH) + $ X_STATE = WORKING_STATE + IF (X_STATE .EQ. WORKING_STATE) THEN + IF (DX_X .LE. EPS) THEN + X_STATE = CONV_STATE + ELSE IF (DXRAT .GT. RTHRESH) THEN + IF (Y_PREC_STATE .NE. EXTRA_Y) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF (X_STATE .GT. WORKING_STATE) FINAL_DX_X = DX_X + END IF + + IF (Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB) + $ Z_STATE = WORKING_STATE + IF (Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH) + $ Z_STATE = WORKING_STATE + IF (Z_STATE .EQ. WORKING_STATE) THEN + IF (DZ_Z .LE. EPS) THEN + Z_STATE = CONV_STATE + ELSE IF (DZ_Z .GT. DZ_UB) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF (DZRAT .GT. RTHRESH) THEN + IF (Y_PREC_STATE .NE. EXTRA_Y) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF (DZRAT .GT. DZRATMAX) DZRATMAX = DZRAT + END IF + IF (Z_STATE .GT. WORKING_STATE) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ (IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE) ) + $ GOTO 666 + + IF (INCR_PREC) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL CAXPY( N, CMPLX(1.0), DY, 1, Y(1,J), 1 ) + ELSE + CALL CLA_WWADDW(N, Y(1,J), Y_TAIL, DY) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF (X_STATE .EQ. WORKING_STATE) FINAL_DX_X = DX_X + IF (Z_STATE .EQ. WORKING_STATE) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF (N_NORMS .GE. 1) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL CHEMV(UPLO, N, CMPLX(-1.0), A, LDA, Y(1,J), 1, CMPLX(1.0), + $ RES, 1) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL CLA_HEAMV (UPLO2, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1) + + CALL CLA_LIN_BERR (N, N, 1, RES, AYB, BERR_OUT(J)) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/cla_porpvgrw.f b/dspl/liblapack/SRC/cla_porpvgrw.f new file mode 100644 index 0000000..bd2e7af --- /dev/null +++ b/dspl/liblapack/SRC/cla_porpvgrw.f @@ -0,0 +1,216 @@ +*> \brief \b CLA_PORPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric or Hermitian positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_PORPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, LDAF, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ) +* REAL WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_PORPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + REAL FUNCTION CLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, LDAF, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ) + REAL WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL AMAX, UMAX, RPVGRW + LOGICAL UPPER + COMPLEX ZDUM +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + UPPER = LSAME( 'Upper', UPLO ) +* +* SPOTRF will have factored only the NCOLSxNCOLS leading minor, so +* we restrict the growth search to that minor and use only the first +* 2*NCOLS workspace entries. +* + RPVGRW = 1.0 + DO I = 1, 2*NCOLS + WORK( I ) = 0.0 + END DO +* +* Find the max magnitude entry of each column. +* + IF ( UPPER ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( NCOLS+J ) = + $ MAX( CABS1( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( NCOLS+J ) = + $ MAX( CABS1( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of the factor in +* AF. No pivoting, so no permutations. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( J ) = MAX( CABS1( AF( I, J ) ), WORK( J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( J ) = MAX( CABS1( AF( I, J ) ), WORK( J ) ) + END DO + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + CLA_PORPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/cla_syamv.f b/dspl/liblapack/SRC/cla_syamv.f new file mode 100644 index 0000000..e1d3df9 --- /dev/null +++ b/dspl/liblapack/SRC/cla_syamv.f @@ -0,0 +1,428 @@ +*> \brief \b CLA_SYAMV computes a matrix-vector product using a symmetric indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_SYAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDA, N +* INTEGER UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), X( * ) +* REAL Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, N + INTEGER UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) + REAL Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY + COMPLEX ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN, REAL, AIMAG +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL ( ZDUM ) ) + ABS( AIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of CLA_SYAMV +* + END diff --git a/dspl/liblapack/SRC/cla_syrcond_c.f b/dspl/liblapack/SRC/cla_syrcond_c.f new file mode 100644 index 0000000..fc52bf2 --- /dev/null +++ b/dspl/liblapack/SRC/cla_syrcond_c.f @@ -0,0 +1,328 @@ +*> \brief \b CLA_SYRCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for symmetric indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_SYRCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, C, +* CAPPLY, INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) +* REAL C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_SYRCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a REAL vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + REAL FUNCTION CLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, C, + $ CAPPLY, INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ) + REAL C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + REAL AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_SYRCOND_C = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_SYRCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0E+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_SYRCOND_C = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL CSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_SYRCOND_C = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_syrcond_x.f b/dspl/liblapack/SRC/cla_syrcond_x.f new file mode 100644 index 0000000..f8fb566 --- /dev/null +++ b/dspl/liblapack/SRC/cla_syrcond_x.f @@ -0,0 +1,298 @@ +*> \brief \b CLA_SYRCOND_X computes the infinity norm condition number of op(A)*diag(x) for symmetric indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_SYRCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_SYRCOND_X( UPLO, N, A, LDA, AF, LDAF, IPIV, X, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* REAL RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_SYRCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + REAL FUNCTION CLA_SYRCOND_X( UPLO, N, A, LDA, AF, LDAF, IPIV, X, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + REAL RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + REAL AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + CLA_SYRCOND_X = 0.0E+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF ( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_SYRCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0E+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + CLA_SYRCOND_X = 1.0E+0 + RETURN + ELSE IF( ANORM .EQ. 0.0E+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0E+0 +* + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL CSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**T). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL CSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL CSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0E+0 ) + $ CLA_SYRCOND_X = 1.0E+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/cla_syrfsx_extended.f b/dspl/liblapack/SRC/cla_syrfsx_extended.f new file mode 100644 index 0000000..5d2fa0c --- /dev/null +++ b/dspl/liblapack/SRC/cla_syrfsx_extended.f @@ -0,0 +1,716 @@ +*> \brief \b CLA_SYRFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_SYRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_SYRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by CSYRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by CSYTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by CLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to CLA_SYRFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER + COMPLEX ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CSYTRS, CSYMV, BLAS_CSYMV_X, + $ BLAS_CSYMV2_X, CLA_SYAMV, CLA_WWADDW, + $ CLA_LIN_BERR + REAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX, MIN +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLA_SYRFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL CSYMV( UPLO, N, CMPLX(-1.0), A, LDA, Y(1,J), 1, + $ CMPLX(1.0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_CSYMV_X( UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y( 1, J ), 1, CMPLX(1.0), RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_CSYMV2_X(UPLO2, N, CMPLX(-1.0), A, LDA, + $ Y(1, J), Y_TAIL, 1, CMPLX(1.0), RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL CCOPY( N, RES, 1, DY, 1 ) + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF ( YK .NE. 0.0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL CAXPY( N, CMPLX(1.0), DY, 1, Y(1,J), 1 ) + ELSE + CALL CLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL CCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL CSYMV( UPLO, N, CMPLX(-1.0), A, LDA, Y(1,J), 1, + $ CMPLX(1.0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL CLA_SYAMV ( UPLO2, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL CLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/cla_syrpvgrw.f b/dspl/liblapack/SRC/cla_syrpvgrw.f new file mode 100644 index 0000000..ccea462 --- /dev/null +++ b/dspl/liblapack/SRC/cla_syrpvgrw.f @@ -0,0 +1,331 @@ +*> \brief \b CLA_SYRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_SYRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ) +* REAL WORK( * ) +* INTEGER IPIV( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> CLA_SYRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from CSYTRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + REAL FUNCTION CLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, + $ WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ) + REAL WORK( * ) + INTEGER IPIV( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + REAL AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER + COMPLEX ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, AIMAG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL ( ZDUM ) ) + ABS( AIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0 + DO I = 1, 2*N + WORK( I ) = 0.0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from csytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = + $ MAX( CABS1( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = + $ MAX( CABS1( AF( I, K+1 ) ), WORK( K+1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + CLA_SYRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/cla_wwaddw.f b/dspl/liblapack/SRC/cla_wwaddw.f new file mode 100644 index 0000000..9267c6d --- /dev/null +++ b/dspl/liblapack/SRC/cla_wwaddw.f @@ -0,0 +1,111 @@ +*> \brief \b CLA_WWADDW adds a vector into a doubled-single vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLA_WWADDW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLA_WWADDW( N, X, Y, W ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* COMPLEX X( * ), Y( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLA_WWADDW adds a vector W into a doubled-single vector (X, Y). +*> +*> This works for all extant IBM's hex and binary floating point +*> arithmetics, but not for decimal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of vectors X, Y, and W. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The first part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (N) +*> The second part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The vector to be added. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLA_WWADDW( N, X, Y, W ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX X( * ), Y( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + COMPLEX S + INTEGER I +* .. +* .. Executable Statements .. +* + DO 10 I = 1, N + S = X(I) + W(I) + S = (S + S) - S + Y(I) = ((X(I) - S) + W(I)) + Y(I) + X(I) = S + 10 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/clabrd.f b/dspl/liblapack/SRC/clabrd.f new file mode 100644 index 0000000..7830222 --- /dev/null +++ b/dspl/liblapack/SRC/clabrd.f @@ -0,0 +1,420 @@ +*> \brief \b CLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLABRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, +* LDY ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), +* $ Y( LDY, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLABRD reduces the first NB rows and columns of a complex general +*> m by n matrix A to upper or lower real bidiagonal form by a unitary +*> transformation Q**H * A * P, and returns the matrices X and Y which +*> are needed to apply the transformation to the unreduced part of A. +*> +*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +*> bidiagonal form. +*> +*> This is an auxiliary routine called by CGEBRD +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of leading rows and columns of A to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, the first NB rows and columns of the matrix are +*> overwritten; the rest of the array is unchanged. +*> If m >= n, elements on and below the diagonal in the first NB +*> columns, with the array TAUQ, represent the unitary +*> matrix Q as a product of elementary reflectors; and +*> elements above the diagonal in the first NB rows, with the +*> array TAUP, represent the unitary matrix P as a product +*> of elementary reflectors. +*> If m < n, elements below the diagonal in the first NB +*> columns, with the array TAUQ, represent the unitary +*> matrix Q as a product of elementary reflectors, and +*> elements on and above the diagonal in the first NB rows, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (NB) +*> The diagonal elements of the first NB rows and columns of +*> the reduced matrix. D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (NB) +*> The off-diagonal elements of the first NB rows and columns of +*> the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NB) +*> The m-by-nb matrix X required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,M). +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NB) +*> The n-by-nb matrix Y required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors. +*> +*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The elements of the vectors v and u together form the m-by-nb matrix +*> V and the nb-by-n matrix U**H which are needed, with X and Y, to apply +*> the transformation to the unreduced part of the matrix, using a block +*> update of the form: A := A - V*Y**H - X*U**H. +*> +*> The contents of A on exit are illustrated by the following examples +*> with nb = 2: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +*> ( v1 v2 a a a ) ( v1 1 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix which is unchanged, +*> vi denotes an element of the vector defining H(i), and ui an element +*> of the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + COMPLEX A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), + $ Y( LDY, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CLARFG, CSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I+1, N-I, ONE, + $ A( I, I+1 ), LDA, A( I, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ A( I, 1 ), LDA, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ X( I, 1 ), LDX, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL CLACGV( I, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE, + $ A( I, I+1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + ALPHA = A( I, I+1 ) + CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I, ONE, + $ Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL CLACGV( N-I+1, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE, + $ A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ), + $ LDA ) + CALL CLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + ALPHA = A( I, I ) + CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL CGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, ONE, + $ Y( I, 1 ), LDY, A( I, I ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL CLACGV( N-I+1, A( I, I ), LDA ) +* +* Update A(i+1:m,i) +* + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL CLACGV( I-1, Y( I, 1 ), LDY ) + CALL CGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL CGEMV( 'Conjugate transpose', M-I, N-I, ONE, + $ A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', M-I, I, ONE, + $ X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', I, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + ELSE + CALL CLACGV( N-I+1, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CLABRD +* + END diff --git a/dspl/liblapack/SRC/clacgv.f b/dspl/liblapack/SRC/clacgv.f new file mode 100644 index 0000000..81ecadc --- /dev/null +++ b/dspl/liblapack/SRC/clacgv.f @@ -0,0 +1,116 @@ +*> \brief \b CLACGV conjugates a complex vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACGV( N, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACGV conjugates a complex vector of length N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vector X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-1)*abs(INCX)) +*> On entry, the vector of length N to be conjugated. +*> On exit, X is overwritten with conjg(X). +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive elements of X. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = CONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = CONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of CLACGV +* + END diff --git a/dspl/liblapack/SRC/clacn2.f b/dspl/liblapack/SRC/clacn2.f new file mode 100644 index 0000000..132712d --- /dev/null +++ b/dspl/liblapack/SRC/clacn2.f @@ -0,0 +1,298 @@ +*> \brief \b CLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACN2( N, V, X, EST, KASE, ISAVE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* REAL EST +* .. +* .. Array Arguments .. +* INTEGER ISAVE( 3 ) +* COMPLEX V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACN2 estimates the 1-norm of a square, complex matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**H * X, if KASE=2, +*> where A**H is the conjugate transpose of A, and CLACN2 must be +*> re-called with all the other parameters unchanged. +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is REAL +*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +*> unchanged from the previous call to CLACN2. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to CLACN2, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**H * X. +*> On the final return from CLACN2, KASE will again be 0. +*> \endverbatim +*> +*> \param[in,out] ISAVE +*> \verbatim +*> ISAVE is INTEGER array, dimension (3) +*> ISAVE is used to save variables between calls to SLACN2 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Originally named CONEST, dated March 16, 1988. +*> +*> Last modified: April, 1999 +*> +*> This is a thread safe version of CLACON, which uses the array ISAVE +*> in place of a SAVE statement, as follows: +*> +*> CLACON CLACN2 +*> JUMP ISAVE(1) +*> J ISAVE(2) +*> ITER ISAVE(3) +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE CLACN2( N, V, X, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISAVE( 3 ) + COMPLEX V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + REAL ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER ICMAX1 + REAL SCSUM1, SLAMCH + EXTERNAL ICMAX1, SCSUM1, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = CMPLX( ONE / REAL( N ) ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = SCSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = ICMAX1( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = CONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL CCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SCSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = ICMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = CMPLX( ALTSGN*( ONE + REAL( I-1 ) / REAL( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL CCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of CLACN2 +* + END diff --git a/dspl/liblapack/SRC/clacon.f b/dspl/liblapack/SRC/clacon.f new file mode 100644 index 0000000..2d67b9e --- /dev/null +++ b/dspl/liblapack/SRC/clacon.f @@ -0,0 +1,281 @@ +*> \brief \b CLACON estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACON( N, V, X, EST, KASE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* REAL EST +* .. +* .. Array Arguments .. +* COMPLEX V( N ), X( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACON estimates the 1-norm of a square, complex matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**H * X, if KASE=2, +*> where A**H is the conjugate transpose of A, and CLACON must be +*> re-called with all the other parameters unchanged. +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is REAL +*> On entry with KASE = 1 or 2 and JUMP = 3, EST should be +*> unchanged from the previous call to CLACON. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to CLACON, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**H * X. +*> On the final return from CLACON, KASE will again be 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> Originally named CONEST, dated March 16, 1988. \n +*> Last modified: April, 1999 +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE CLACON( N, V, X, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + COMPLEX V( N ), X( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ONE, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER ICMAX1 + REAL SCSUM1, SLAMCH + EXTERNAL ICMAX1, SCSUM1, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = CMPLX( ONE / REAL( N ) ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = SCSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + J = ICMAX1( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( J ) = CONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL CCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SCSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = CMPLX( REAL( X( I ) ) / ABSXI, + $ AIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = J + J = ICMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND. + $ ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = CMPLX( ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL CCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of CLACON +* + END diff --git a/dspl/liblapack/SRC/clacp2.f b/dspl/liblapack/SRC/clacp2.f new file mode 100644 index 0000000..1d04962 --- /dev/null +++ b/dspl/liblapack/SRC/clacp2.f @@ -0,0 +1,161 @@ +*> \brief \b CLACP2 copies all or part of a real two-dimensional array to a complex array. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* COMPLEX B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACP2 copies all or part of a real two-dimensional matrix A to a +*> complex matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper trapezium +*> is accessed; if UPLO = 'L', only the lower trapezium is +*> accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) + COMPLEX B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of CLACP2 +* + END diff --git a/dspl/liblapack/SRC/clacpy.f b/dspl/liblapack/SRC/clacpy.f new file mode 100644 index 0000000..26d5f37 --- /dev/null +++ b/dspl/liblapack/SRC/clacpy.f @@ -0,0 +1,159 @@ +*> \brief \b CLACPY copies all or part of one two-dimensional array to another. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACPY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACPY copies all or part of a two-dimensional matrix A to another +*> matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper trapezium +*> is accessed; if UPLO = 'L', only the lower trapezium is +*> accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of CLACPY +* + END diff --git a/dspl/liblapack/SRC/clacrm.f b/dspl/liblapack/SRC/clacrm.f new file mode 100644 index 0000000..47c8374 --- /dev/null +++ b/dspl/liblapack/SRC/clacrm.f @@ -0,0 +1,185 @@ +*> \brief \b CLACRM multiplies a complex matrix by a square real matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACRM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), RWORK( * ) +* COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACRM performs a very simple matrix-matrix multiplication: +*> C := A * B, +*> where A is M by N and complex; B is N by N and real; +*> C is M by N and complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A and of the matrix C. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns and rows of the matrix B and +*> the number of columns of the matrix C. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, N) +*> On entry, A contains the M by N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >=max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, B contains the N by N matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >=max(1,N). +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On exit, C contains the M by N matrix C. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >=max(1,N). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*M*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + REAL B( LDB, * ), RWORK( * ) + COMPLEX A( LDA, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. External Subroutines .. + EXTERNAL SGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = REAL( A( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = AIMAG( A( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = CMPLX( REAL( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of CLACRM +* + END diff --git a/dspl/liblapack/SRC/clacrt.f b/dspl/liblapack/SRC/clacrt.f new file mode 100644 index 0000000..1f71817 --- /dev/null +++ b/dspl/liblapack/SRC/clacrt.f @@ -0,0 +1,160 @@ +*> \brief \b CLACRT performs a linear transformation of a pair of complex vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLACRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* COMPLEX C, S +* .. +* .. Array Arguments .. +* COMPLEX CX( * ), CY( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLACRT performs the operation +*> +*> ( c s )( x ) ==> ( x ) +*> ( -s c )( y ) ( y ) +*> +*> where c and s are complex and the vectors x and y are complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vectors CX and CY. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension (N) +*> On input, the vector x. +*> On output, CX is overwritten with c*x + s*y. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of CX. INCX <> 0. +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX array, dimension (N) +*> On input, the vector y. +*> On output, CY is overwritten with -s*x + c*y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive values of CY. INCY <> 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX +*> C and S define the matrix +*> [ C S ]. +*> [ -S C ] +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + COMPLEX C, S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/cladiv.f b/dspl/liblapack/SRC/cladiv.f new file mode 100644 index 0000000..189ef21 --- /dev/null +++ b/dspl/liblapack/SRC/cladiv.f @@ -0,0 +1,97 @@ +*> \brief \b CLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* COMPLEX FUNCTION CLADIV( X, Y ) +* +* .. Scalar Arguments .. +* COMPLEX X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLADIV := X / Y, where X and Y are complex. The computation of X / Y +*> will not overflow on an intermediary step unless the results +*> overflows. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is COMPLEX +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX +*> The complex scalars X and Y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + COMPLEX FUNCTION CLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + COMPLEX X, Y +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* + CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR, + $ ZI ) + CLADIV = CMPLX( ZR, ZI ) +* + RETURN +* +* End of CLADIV +* + END diff --git a/dspl/liblapack/SRC/claed0.f b/dspl/liblapack/SRC/claed0.f new file mode 100644 index 0000000..9907b10 --- /dev/null +++ b/dspl/liblapack/SRC/claed0.f @@ -0,0 +1,371 @@ +*> \brief \b CLAED0 used by sstedc. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAED0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), RWORK( * ) +* COMPLEX Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using the divide and conquer method, CLAED0 computes all eigenvalues +*> of a symmetric tridiagonal matrix which is one diagonal block of +*> those from reducing a dense or band Hermitian matrix and +*> corresponding eigenvectors of the dense or band matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the off-diagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, Q must contain an QSIZ x N matrix whose columns +*> unitarily orthonormal. It is a part of the unitary matrix +*> that reduces the full dense Hermitian matrix to a +*> (reducible) symmetric tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> the dimension of IWORK must be at least +*> 6 + 6*N + 5*N*lg N +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (1 + 3*N + 2*N*lg N + 3*N**2) +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> \endverbatim +*> +*> \param[out] QSTORE +*> \verbatim +*> QSTORE is COMPLEX array, dimension (LDQS, N) +*> Used to store parts of +*> the eigenvector matrix when the updating matrix multiplies +*> take place. +*> \endverbatim +*> +*> \param[in] LDQS +*> \verbatim +*> LDQS is INTEGER +*> The leading dimension of the array QSTORE. +*> LDQS >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* ===================================================================== +* +* Warning: N could be as big as QSIZ! +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.E+0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1, + $ SPM1, SPM2, SUBMAT, SUBPBS, TLVLS + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACRM, CLAED7, SCOPY, SSTEQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN +* INFO = -1 +* ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) +* $ THEN + IF( QSIZ.LT.MAX( 0, N ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'CLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( REAL( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* Initialize pointers + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + LL = IQ - 1 + IWORK( IQPTR+CURR ) + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ RWORK( LL ), MATSIZ, RWORK, INFO ) + CALL CLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ), + $ MATSIZ, QSTORE( 1, SUBMAT ), LDQS, + $ RWORK( IWREM ) ) + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. CLAED7 handles the case +* when the eigenvectors of a full or band Hermitian matrix (which +* was reduced to tridiagonal form) are desired. +* +* I am free to use Q as a valuable working space until Loop 150. +* + CALL CLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ), + $ RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ), + $ IWORK( IPERM ), IWORK( IGIVPT ), + $ IWORK( IGIVCL ), RWORK( IGIVNM ), + $ Q( 1, SUBMAT ), RWORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + RWORK( I ) = D( J ) + CALL CCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL SCOPY( N, RWORK, 1, D, 1 ) +* + RETURN +* +* End of CLAED0 +* + END diff --git a/dspl/liblapack/SRC/claed7.f b/dspl/liblapack/SRC/claed7.f new file mode 100644 index 0000000..45dd54c --- /dev/null +++ b/dspl/liblapack/SRC/claed7.f @@ -0,0 +1,385 @@ +*> \brief \b CLAED7 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAED7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, +* LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, +* GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, +* $ TLVLS +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), +* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) +* REAL D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) +* COMPLEX Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAED7 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and optionally eigenvectors of a dense or banded +*> Hermitian matrix that has been reduced to tridiagonal form. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) +*> +*> where Z = Q**Hu, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine SLAED2. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine SLAED4 (as called by SLAED3). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= curlvl <= tlvls. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> Contains the subdiagonal element used to create the rank-1 +*> modification. +*> \endverbatim +*> +*> \param[out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, +*> ie. D( INDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, +*> dimension (3*N+2*QSIZ*N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (QSIZ*N) +*> \endverbatim +*> +*> \param[in,out] QSTORE +*> \verbatim +*> QSTORE is REAL array, dimension (N**2+1) +*> Stores eigenvectors of submatrices encountered during +*> divide and conquer, packed together. QPTR points to +*> beginning of the submatrices. +*> \endverbatim +*> +*> \param[in,out] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> List of indices pointing to beginning of submatrices stored +*> in QSTORE. The submatrices are numbered starting at the +*> bottom left of the divide and conquer tree, from left to +*> right and bottom to top. +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and also the size of +*> the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, + $ GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, + $ TLVLS + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + REAL D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) + COMPLEX Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, + $ INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL CLACRM, CLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN +* INFO = -1 +* ELSE IF( N.LT.0 ) THEN + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ), + $ RWORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ), + $ RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ), + $ IWORK( INDXP ), IWORK( INDX ), INDXQ, + $ PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL SLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO, + $ RWORK( IDLMDA ), RWORK( IW ), + $ QSTORE( QPTR( CURR ) ), K, INFO ) + CALL CLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q, + $ LDQ, RWORK( IQ ) ) + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Prepare the INDXQ sorting premutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + RETURN +* +* End of CLAED7 +* + END diff --git a/dspl/liblapack/SRC/claed8.f b/dspl/liblapack/SRC/claed8.f new file mode 100644 index 0000000..d66bf80 --- /dev/null +++ b/dspl/liblapack/SRC/claed8.f @@ -0,0 +1,486 @@ +*> \brief \b CLAED8 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAED8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, +* Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, +* GIVCOL, GIVNUM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), +* $ INDXQ( * ), PERM( * ) +* REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), +* $ Z( * ) +* COMPLEX Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAED8 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny element in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the number of non-deflated eigenvalues. +*> This is the order of the related secular equation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the dense or band matrix to tridiagonal form. +*> QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, Q contains the eigenvectors of the partially solved +*> system which has been previously updated in matrix +*> multiplies with other partially solved eigensystems. +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D contains the eigenvalues of the two submatrices to +*> be combined. On exit, D contains the trailing (N-K) updated +*> eigenvalues (those which were deflated) sorted into increasing +*> order. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is REAL +*> Contains the off diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. RHO is modified during the computation to +*> the value required by SLAED3. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. MIN(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On input this vector contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). The contents of Z are +*> destroyed during the updating process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is REAL array, dimension (N) +*> Contains a copy of the first K eigenvalues which will be used +*> by SLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is COMPLEX array, dimension (LDQ2,N) +*> If ICOMPQ = 0, Q2 is not referenced. Otherwise, +*> Contains a copy of the first K eigenvectors which will be used +*> by SLAED7 in a matrix multiply (SGEMM) to update the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of the array Q2. LDQ2 >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> This will hold the first k values of the final +*> deflation-altered z-vector and will be passed to SLAED3. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output INDXP(1:K) +*> points to the nondeflated D-values and INDXP(K+1:N) +*> points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[in] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that elements in +*> the second half of this permutation must first have CUTPNT +*> added to their values in order to be accurate. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> Contains the number of Givens rotations which took place in +*> this subproblem. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension (2, N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, + $ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), + $ Z( * ) + COMPLEX Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CSROT, SCOPY, SLAMRG, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -8 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAED8', -INFO ) + RETURN + END IF +* +* Need to initialize GIVPTR to O here in case of quick exit +* to prevent an unspecified code behavior (usually sigfault) +* when IWORK array on entry to *stedc is not zeroed +* (or at least some IWORK entries which used in *laed7 for GIVPTR). +* + GIVPTR = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL SSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* -- except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 50 CONTINUE + CALL CLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ ) + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + K2 = N + 1 + DO 60 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JLAM = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + CALL CSROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 80 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 80 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 70 + 90 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 100 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + DO 110 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 110 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL CLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ), + $ LDQ ) + END IF +* + RETURN +* +* End of CLAED8 +* + END diff --git a/dspl/liblapack/SRC/claein.f b/dspl/liblapack/SRC/claein.f new file mode 100644 index 0000000..c4442d7 --- /dev/null +++ b/dspl/liblapack/SRC/claein.f @@ -0,0 +1,354 @@ +*> \brief \b CLAEIN computes a specified right or left eigenvector of an upper Hessenberg matrix by inverse iteration. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, +* EPS3, SMLNUM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL NOINIT, RIGHTV +* INTEGER INFO, LDB, LDH, N +* REAL EPS3, SMLNUM +* COMPLEX W +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAEIN uses inverse iteration to find a right or left eigenvector +*> corresponding to the eigenvalue W of a complex upper Hessenberg +*> matrix H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RIGHTV +*> \verbatim +*> RIGHTV is LOGICAL +*> = .TRUE. : compute right eigenvector; +*> = .FALSE.: compute left eigenvector. +*> \endverbatim +*> +*> \param[in] NOINIT +*> \verbatim +*> NOINIT is LOGICAL +*> = .TRUE. : no initial vector supplied in V +*> = .FALSE.: initial vector supplied in V. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX +*> The eigenvalue of H whose corresponding right or left +*> eigenvector is to be computed. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (N) +*> On entry, if NOINIT = .FALSE., V must contain a starting +*> vector for inverse iteration; otherwise V need not be set. +*> On exit, V contains the computed eigenvector, normalized so +*> that the component of largest magnitude has magnitude 1; here +*> the magnitude of a complex number (x,y) is taken to be +*> |x| + |y|. +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[in] EPS3 +*> \verbatim +*> EPS3 is REAL +*> A small machine-dependent value which is used to perturb +*> close eigenvalues, and to replace zero pivots. +*> \endverbatim +*> +*> \param[in] SMLNUM +*> \verbatim +*> SMLNUM is REAL +*> A machine-dependent value close to the underflow threshold. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: inverse iteration did not converge; V is set to the +*> last iterate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, + $ EPS3, SMLNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + REAL EPS3, SMLNUM + COMPLEX W +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TENTH + PARAMETER ( ONE = 1.0E+0, TENTH = 1.0E-1 ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, IERR, ITS, J + REAL GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM + COMPLEX CDUM, EI, EJ, TEMP, X +* .. +* .. External Functions .. + INTEGER ICAMAX + REAL SCASUM, SCNRM2 + COMPLEX CLADIV + EXTERNAL ICAMAX, SCASUM, SCNRM2, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CLATRS, CSSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( REAL( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - W*I (except that the subdiagonal elements are not +* stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - W + 20 CONTINUE +* + IF( NOINIT ) THEN +* +* Initialize V. +* + DO 30 I = 1, N + V( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = SCNRM2( N, V, 1 ) + CALL CSSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = CLADIV( B( I, I ), EI ) + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = CLADIV( EI, B( I, I ) ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = CLADIV( B( J, J ), EJ ) + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = CLADIV( EJ, B( J, J ) ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'C' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U**H *x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL CLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V, + $ SCALE, RWORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = SCASUM( N, V, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + RTEMP = EPS3 / ( ROOTN+ONE ) + V( 1 ) = EPS3 + DO 100 I = 2, N + V( I ) = RTEMP + 100 CONTINUE + V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = ICAMAX( N, V, 1 ) + CALL CSSCAL( N, ONE / CABS1( V( I ) ), V, 1 ) +* + RETURN +* +* End of CLAEIN +* + END diff --git a/dspl/liblapack/SRC/claesy.f b/dspl/liblapack/SRC/claesy.f new file mode 100644 index 0000000..c6a7b84 --- /dev/null +++ b/dspl/liblapack/SRC/claesy.f @@ -0,0 +1,221 @@ +*> \brief \b CLAESY computes the eigenvalues and eigenvectors of a 2-by-2 complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAESY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* .. Scalar Arguments .. +* COMPLEX A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix +*> ( ( A, B );( B, C ) ) +*> provided the norm of the matrix of eigenvectors is larger than +*> some threshold value. +*> +*> RT1 is the eigenvalue of larger absolute value, and RT2 of +*> smaller absolute value. If the eigenvectors are computed, then +*> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence +*> +*> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] +*> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is COMPLEX +*> The ( 1, 1 ) element of input matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX +*> The ( 1, 2 ) element of input matrix. The ( 2, 1 ) element +*> is also given by B, since the 2-by-2 matrix is symmetric. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX +*> The ( 2, 2 ) element of input matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is COMPLEX +*> The eigenvalue of larger modulus. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is COMPLEX +*> The eigenvalue of smaller modulus. +*> \endverbatim +*> +*> \param[out] EVSCAL +*> \verbatim +*> EVSCAL is COMPLEX +*> The complex value by which the eigenvector matrix was scaled +*> to make it orthonormal. If EVSCAL is zero, the eigenvectors +*> were not computed. This means one of two things: the 2-by-2 +*> matrix could not be diagonalized, or the norm of the matrix +*> of eigenvectors before scaling was larger than the threshold +*> value THRESH (set below). +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is COMPLEX +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is COMPLEX +*> If EVSCAL .NE. 0, ( CS1, SN1 ) is the unit right eigenvector +*> for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + SUBROUTINE CLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + COMPLEX A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL THRESH + PARAMETER ( THRESH = 0.1E0 ) +* .. +* .. Local Scalars .. + REAL BABS, EVNORM, TABS, Z + COMPLEX S, T, TMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* +* Special case: The matrix is actually diagonal. +* To avoid divide by zero later, we treat this case separately. +* + IF( ABS( B ).EQ.ZERO ) THEN + RT1 = A + RT2 = C + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + CS1 = ZERO + SN1 = ONE + ELSE + CS1 = ONE + SN1 = ZERO + END IF + ELSE +* +* Compute the eigenvalues and eigenvectors. +* The characteristic equation is +* lambda **2 - (A+C) lambda + (A*C - B*B) +* and we solve it using the quadratic formula. +* + S = ( A+C )*HALF + T = ( A-C )*HALF +* +* Take the square root carefully to avoid over/under flow. +* + BABS = ABS( B ) + TABS = ABS( T ) + Z = MAX( BABS, TABS ) + IF( Z.GT.ZERO ) + $ T = Z*SQRT( ( T / Z )**2+( B / Z )**2 ) +* +* Compute the two eigenvalues. RT1 and RT2 are exchanged +* if necessary so that RT1 will have the greater magnitude. +* + RT1 = S + T + RT2 = S - T + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + END IF +* +* Choose CS1 = 1 and SN1 to satisfy the first equation, then +* scale the components of this eigenvector so that the matrix +* of eigenvectors X satisfies X * X**T = I . (No scaling is +* done if the norm of the eigenvalue matrix is less than THRESH.) +* + SN1 = ( RT1-A ) / B + TABS = ABS( SN1 ) + IF( TABS.GT.ONE ) THEN + T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 ) + ELSE + T = SQRT( CONE+SN1*SN1 ) + END IF + EVNORM = ABS( T ) + IF( EVNORM.GE.THRESH ) THEN + EVSCAL = CONE / T + CS1 = EVSCAL + SN1 = SN1*EVSCAL + ELSE + EVSCAL = ZERO + END IF + END IF + RETURN +* +* End of CLAESY +* + END diff --git a/dspl/liblapack/SRC/claev2.f b/dspl/liblapack/SRC/claev2.f new file mode 100644 index 0000000..b80397f --- /dev/null +++ b/dspl/liblapack/SRC/claev2.f @@ -0,0 +1,165 @@ +*> \brief \b CLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAEV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* .. Scalar Arguments .. +* REAL CS1, RT1, RT2 +* COMPLEX A, B, C, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix +*> [ A B ] +*> [ CONJG(B) C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +*> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +*> eigenvector for RT1, giving the decomposition +*> +*> [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] +*> [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is COMPLEX +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX +*> The (1,2) element and the conjugate of the (2,1) element of +*> the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is REAL +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is REAL +*> The eigenvalue of smaller absolute value. +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is REAL +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is COMPLEX +*> The vector (CS1, SN1) is a unit right eigenvector for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> CS1 and SN1 are accurate to a few ulps barring over/underflow. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL CS1, RT1, RT2 + COMPLEX A, B, C, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL T + COMPLEX W +* .. +* .. External Subroutines .. + EXTERNAL SLAEV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, REAL +* .. +* .. Executable Statements .. +* + IF( ABS( B ).EQ.ZERO ) THEN + W = ONE + ELSE + W = CONJG( B ) / ABS( B ) + END IF + CALL SLAEV2( REAL( A ), ABS( B ), REAL( C ), RT1, RT2, CS1, T ) + SN1 = W*T + RETURN +* +* End of CLAEV2 +* + END diff --git a/dspl/liblapack/SRC/clag2z.f b/dspl/liblapack/SRC/clag2z.f new file mode 100644 index 0000000..a1777f0 --- /dev/null +++ b/dspl/liblapack/SRC/clag2z.f @@ -0,0 +1,136 @@ +*> \brief \b CLAG2Z converts a complex single precision matrix to a complex double precision matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAG2Z + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAG2Z( M, N, SA, LDSA, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. +* COMPLEX SA( LDSA, * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAG2Z converts a COMPLEX matrix, SA, to a COMPLEX*16 matrix, A. +*> +*> Note that while it is possible to overflow while converting +*> from double to single, it is not possible to overflow when +*> converting from single to double. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of lines of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is COMPLEX array, dimension (LDSA,N) +*> On entry, the M-by-N coefficient matrix SA. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On exit, the M-by-N coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAG2Z( M, N, SA, LDSA, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. + COMPLEX SA( LDSA, * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + INFO = 0 + DO 20 J = 1, N + DO 10 I = 1, M + A( I, J ) = SA( I, J ) + 10 CONTINUE + 20 CONTINUE + RETURN +* +* End of CLAG2Z +* + END diff --git a/dspl/liblapack/SRC/clags2.f b/dspl/liblapack/SRC/clags2.f new file mode 100644 index 0000000..7f50770 --- /dev/null +++ b/dspl/liblapack/SRC/clags2.f @@ -0,0 +1,395 @@ +*> \brief \b CLAGS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, +* SNV, CSQ, SNQ ) +* +* .. Scalar Arguments .. +* LOGICAL UPPER +* REAL A1, A3, B1, B3, CSQ, CSU, CSV +* COMPLEX A2, B2, SNQ, SNU, SNV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAGS2 computes 2-by-2 unitary matrices U, V and Q, such +*> that if ( UPPER ) then +*> +*> U**H *A*Q = U**H *( A1 A2 )*Q = ( x 0 ) +*> ( 0 A3 ) ( x x ) +*> and +*> V**H*B*Q = V**H *( B1 B2 )*Q = ( x 0 ) +*> ( 0 B3 ) ( x x ) +*> +*> or if ( .NOT.UPPER ) then +*> +*> U**H *A*Q = U**H *( A1 0 )*Q = ( x x ) +*> ( A2 A3 ) ( 0 x ) +*> and +*> V**H *B*Q = V**H *( B1 0 )*Q = ( x x ) +*> ( B2 B3 ) ( 0 x ) +*> where +*> +*> U = ( CSU SNU ), V = ( CSV SNV ), +*> ( -SNU**H CSU ) ( -SNV**H CSV ) +*> +*> Q = ( CSQ SNQ ) +*> ( -SNQ**H CSQ ) +*> +*> The rows of the transformed A and B are parallel. Moreover, if the +*> input 2-by-2 matrix A is not zero, then the transformed (1,1) entry +*> of A is not zero. If the input matrices A and B are both not zero, +*> then the transformed (2,2) element of B is not zero, except when the +*> first rows of input A and B are parallel and the second rows are +*> zero. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPPER +*> \verbatim +*> UPPER is LOGICAL +*> = .TRUE.: the input matrices A and B are upper triangular. +*> = .FALSE.: the input matrices A and B are lower triangular. +*> \endverbatim +*> +*> \param[in] A1 +*> \verbatim +*> A1 is REAL +*> \endverbatim +*> +*> \param[in] A2 +*> \verbatim +*> A2 is COMPLEX +*> \endverbatim +*> +*> \param[in] A3 +*> \verbatim +*> A3 is REAL +*> On entry, A1, A2 and A3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix A. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is REAL +*> \endverbatim +*> +*> \param[in] B2 +*> \verbatim +*> B2 is COMPLEX +*> \endverbatim +*> +*> \param[in] B3 +*> \verbatim +*> B3 is REAL +*> On entry, B1, B2 and B3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix B. +*> \endverbatim +*> +*> \param[out] CSU +*> \verbatim +*> CSU is REAL +*> \endverbatim +*> +*> \param[out] SNU +*> \verbatim +*> SNU is COMPLEX +*> The desired unitary matrix U. +*> \endverbatim +*> +*> \param[out] CSV +*> \verbatim +*> CSV is REAL +*> \endverbatim +*> +*> \param[out] SNV +*> \verbatim +*> SNV is COMPLEX +*> The desired unitary matrix V. +*> \endverbatim +*> +*> \param[out] CSQ +*> \verbatim +*> CSQ is REAL +*> \endverbatim +*> +*> \param[out] SNQ +*> \verbatim +*> SNQ is COMPLEX +*> The desired unitary matrix Q. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL UPPER + REAL A1, A3, B1, B3, CSQ, CSU, CSV + COMPLEX A2, B2, SNQ, SNU, SNV +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, SNL, + $ SNR, UA11R, UA22R, VB11R, VB22R + COMPLEX B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11, + $ VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, SLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) ) +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 + FB = ABS( B ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(1,D1). +* + D1 = ONE + IF( FB.NE.ZERO ) + $ D1 = B / FB +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U**H *A and V**H *B, +* and (1,2) element of |U|**H *|A| and |V|**H *|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + D1*SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + D1*SNR*B3 +* + AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U**H *A and V**H *B +* + IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN + CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE + CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = CSL + SNU = -D1*SNL + CSV = CSR + SNV = -D1*SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U**H *A and V**H *B, +* and (2,2) element of |U|**H *|A| and |V|**H *|B|. +* + UA21 = -CONJG( D1 )*SNL*A1 + UA22 = -CONJG( D1 )*SNL*A2 + CSL*A3 +* + VB21 = -CONJG( D1 )*SNR*B1 + VB22 = -CONJG( D1 )*SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U**H *A and V**H *B, and then swap. +* + IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN + CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R ) + ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 / + $ ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN + CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R ) + ELSE + CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = D1*CSL + CSV = SNR + SNV = D1*CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 + FC = ABS( C ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(d1,1). +* + D1 = ONE + IF( FC.NE.ZERO ) + $ D1 = C / FC +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U**H *A and V**H *B, +* and (2,1) element of |U|**H *|A| and |V|**H *|B|. +* + UA21 = -D1*SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -D1*SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 ) +* +* zero (2,1) elements of U**H *A and V**H *B. +* + IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN + CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN + CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN + CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE + CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -CONJG( D1 )*SNR + CSV = CSL + SNV = -CONJG( D1 )*SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U**H *A and V**H *B, +* and (1,1) element of |U|**H *|A| and |V|**H *|B|. +* + UA11 = CSR*A1 + CONJG( D1 )*SNR*A2 + UA12 = CONJG( D1 )*SNR*A3 +* + VB11 = CSL*B1 + CONJG( D1 )*SNL*B2 + VB12 = CONJG( D1 )*SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 ) +* +* zero (1,1) elements of U**H *A and V**H *B, and then swap. +* + IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL CLARTG( VB12, VB11, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL CLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 / + $ ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN + CALL CLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL CLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CONJG( D1 )*CSR + CSV = SNL + SNV = CONJG( D1 )*CSL +* + END IF +* + END IF +* + RETURN +* +* End of CLAGS2 +* + END diff --git a/dspl/liblapack/SRC/clagtm.f b/dspl/liblapack/SRC/clagtm.f new file mode 100644 index 0000000..dcf47a1 --- /dev/null +++ b/dspl/liblapack/SRC/clagtm.f @@ -0,0 +1,321 @@ +*> \brief \b CLAGTM performs a matrix-matrix product of the form C = αAB+βC, where A is a tridiagonal matrix, B and C are rectangular matrices, and α and β are scalars, which may be 0, 1, or -1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAGTM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER LDB, LDX, N, NRHS +* REAL ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAGTM performs a matrix-vector product of the form +*> +*> B := alpha * A * X + beta * B +*> +*> where A is a tridiagonal matrix of order N, B and X are N by NRHS +*> matrices, and alpha and beta are real scalars, each of which may be +*> 0., 1., or -1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': No transpose, B := alpha * A * X + beta * B +*> = 'T': Transpose, B := alpha * A**T * X + beta * B +*> = 'C': Conjugate transpose, B := alpha * A**H * X + beta * B +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices X and B. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) sub-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The diagonal elements of T. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) super-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> The N by NRHS matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(N,1). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> The scalar beta. BETA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix B. +*> On exit, B is overwritten by the matrix expression +*> B := alpha * A * X + beta * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(N,1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B + A**T * X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B + A**H * X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J ) + + $ CONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) + CONJG( DU( N-1 ) )* + $ X( N-1, J ) + CONJG( D( N ) )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) + CONJG( DU( I-1 ) )* + $ X( I-1, J ) + CONJG( D( I ) )* + $ X( I, J ) + CONJG( DL( I ) )* + $ X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B - A**T*X +* + DO 140 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 130 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B - A**H*X +* + DO 160 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J ) - + $ CONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) - CONJG( DU( N-1 ) )* + $ X( N-1, J ) - CONJG( D( N ) )*X( N, J ) + DO 150 I = 2, N - 1 + B( I, J ) = B( I, J ) - CONJG( DU( I-1 ) )* + $ X( I-1, J ) - CONJG( D( I ) )* + $ X( I, J ) - CONJG( DL( I ) )* + $ X( I+1, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + END IF + RETURN +* +* End of CLAGTM +* + END diff --git a/dspl/liblapack/SRC/clahef.f b/dspl/liblapack/SRC/clahef.f new file mode 100644 index 0000000..f51de18 --- /dev/null +++ b/dspl/liblapack/SRC/clahef.f @@ -0,0 +1,973 @@ +*> \brief \b CLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHEF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAHEF computes a partial factorization of a complex Hermitian +*> matrix A using the Bunch-Kaufman diagonal pivoting method. The +*> partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**H denotes the conjugate transpose of U. +*> +*> CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T + COMPLEX D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CLACGV, CSSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = REAL( A( K, K ) ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = REAL( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* BEGIN pivot search along IMAX row +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( A( IMAX, IMAX ) ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL CLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* +* Case(2) + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* +* Case(3) + ELSE IF( ABS( REAL( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* +* Case(4) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* +* +* END pivot search along IMAX row +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(4)) +* + R1 = ONE / REAL( A( K, K ) ) + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) +* +* (2) Conjugate column W(kw) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( conj(D21)*( D11 ) D21*( -1 ) ) +* ( ( -1 ) ( D22 ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = T/d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0, since in 2x2 pivot case(4) +* |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / CONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( REAL( D11*D22 )-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = CONJG( D21 )* + $ ( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + CALL CLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in of rows in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP +* at each step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 +* +* Copy column K of A to column K of W and update it +* + W( K, K ) = REAL( A( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = REAL( W( K, K ) ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* BEGIN pivot search along IMAX row +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL CLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( A( IMAX, IMAX ) ) + IF( IMAX.LT.N ) + $ CALL CCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) + W( IMAX, K+1 ) = REAL( W( IMAX, K+1 ) ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* +* Case(2) + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* +* Case(3) + ELSE IF( ABS( REAL( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* +* Case(4) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* +* +* END pivot search along IMAX row +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL CLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(4)) +* + R1 = ONE / REAL( A( K, K ) ) + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) +* +* (2) Conjugate column W(k) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( conj(D21)*( D11 ) D21*( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = T/d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0, since in 2x2 pivot case(4) +* |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / CONJG( D21 ) + T = ONE / ( REAL( D11*D22 )-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = CONJG( D21 )* + $ ( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + CALL CLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP +* at each step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLAHEF +* + END diff --git a/dspl/liblapack/SRC/clahef_aa.f b/dspl/liblapack/SRC/clahef_aa.f new file mode 100644 index 0000000..88bc3d2 --- /dev/null +++ b/dspl/liblapack/SRC/clahef_aa.f @@ -0,0 +1,501 @@ +*> \brief \b CLAHEF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHEF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAHEF_AA factorizes a panel of a complex hermitian matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by CHETRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + COMPLEX PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ILAENV + EXTERNAL LSAME, ILAENV, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CGEMV, CSCAL, CAXPY, CCOPY, CSWAP, CLASET, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, CONJG, MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from CHETRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:N, J) has been initialized to be A(J, J:N) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CLACGV( J-K1, A( 1, J ), 1 ) + CALL CGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + CALL CLACGV( J-K1, A( 1, J ), 1 ) + END IF +* +* Copy H(i:n, i) into WORK +* + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* + ALPHA = -CONJG( A( K-1, J ) ) + CALL CAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = REAL( WORK( 1 ) ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL CAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply hermitian pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) + CALL CLACGV( I2-I1, A( J1+I1-1, I1+1 ), LDA ) + CALL CLACGV( I2-I1-1, A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* + CALL CSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J:N, J), +* + CALL CCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL CSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL CLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from CHETRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:N, J) has been initialized to be A(J:N, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CLACGV( J-K1, A( J, 1 ), LDA ) + CALL CGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + CALL CLACGV( J-K1, A( J, 1 ), LDA ) + END IF +* +* Copy H(J:N, J) into WORK +* + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -CONJG( A( J, K-1 ) ) + CALL CAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = REAL( WORK( 1 ) ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L((J+1):N, J) +* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL CAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply hermitian pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) + CALL CLACGV( I2-I1, A( I1+1, J1+I1-1 ), 1 ) + CALL CLACGV( I2-I1-1, A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* + CALL CSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J+1:N, J), +* + CALL CCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL CSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL CLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of CLAHEF_AA +* + END diff --git a/dspl/liblapack/SRC/clahef_rk.f b/dspl/liblapack/SRC/clahef_rk.f new file mode 100644 index 0000000..4d9dfbe --- /dev/null +++ b/dspl/liblapack/SRC/clahef_rk.f @@ -0,0 +1,1234 @@ +*> \brief \b CLAHEF_RK computes a partial factorization of a complex Hermitian indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHEF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHEF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CLAHEF_RK computes a partial factorization of a complex Hermitian +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> CLAHEF_RK is an auxiliary routine called by CHETRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLAHEF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, K, KK, KKW, + $ KP, KSTEP, KW, P + REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T, + $ SFMIN + COMPLEX D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSSCAL, CGEMM, CGEMV, CLACGV, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, AIMAG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + IF( K.GT.1 ) + $ CALL CCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = REAL( A( K, K ) ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = REAL( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( W( K, KW ) ) + IF( K.GT.1 ) + $ CALL CCOPY( K-1, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* +* Lop until pivot found +* + DONE = .FALSE. +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + IF( IMAX.GT.1 ) + $ CALL CCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), + $ 1 ) + W( IMAX, KW-1 ) = REAL( A( IMAX, IMAX ) ) +* + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL CLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( W( IMAX,KW-1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns P and K. +* Updated column P is already stored in column KW of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P of submatrix A +* at step K. No need to copy element into columns +* K and K-1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = REAL( A( K, K ) ) + CALL CCOPY( K-1-P, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + CALL CLACGV( K-1-P, A( P, P+1 ), LDA ) + IF( P.GT.1 ) + $ CALL CCOPY( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in the last K+1 to N columns of A +* (columns K and K-1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), + $ LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = REAL( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE + DO 14 II = 1, K-1 + A( II, K ) = A( II, K ) / T + 14 CONTINUE + END IF +* +* (2) Conjugate column W(kw) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / CONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( REAL( D11*D22 )-ONE ) +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( ( D11*W( J, KW-1 )-W( J, KW ) ) / + $ D21 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ CONJG( D21 ) ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = CZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = CZERO +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + CALL CLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update column K of W +* + W( K, K ) = REAL( A( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + IF( K.GT.1 ) THEN + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = REAL( W( K, K ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( W( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, W( K+1, K ), 1, A( K+1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column k+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL CLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( A( IMAX, IMAX ) ) +* + IF( IMAX.LT.N ) + $ CALL CCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( W( IMAX, K+1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = CABS1( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( W( IMAX,K+1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* +* End pivot search loop body +* + IF( .NOT.DONE ) GOTO 72 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns P and K (only for 2-by-2 pivot). +* Updated column P is already stored in column K of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column KK-1 to column P of submatrix A +* at step K. No need to copy element into columns +* K and K+1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = REAL( A( K, K ) ) + CALL CCOPY( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + CALL CLACGV( P-K-1, A( P, K+1 ), LDA ) + IF( P.LT.N ) + $ CALL CCOPY( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* +* Interchange rows K and P in first K-1 columns of A +* (columns K and K+1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL CSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL CLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (column K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = REAL( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / T + 74 CONTINUE + END IF +* +* (2) Conjugate column W(k) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored. +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / CONJG( D21 ) + T = ONE / ( REAL( D11*D22 )-ONE ) +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ CONJG( D21 ) ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = CZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = CZERO +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + CALL CLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLAHEF_RK +* + END diff --git a/dspl/liblapack/SRC/clahef_rook.f b/dspl/liblapack/SRC/clahef_rook.f new file mode 100644 index 0000000..698df99 --- /dev/null +++ b/dspl/liblapack/SRC/clahef_rook.f @@ -0,0 +1,1176 @@ +* \brief \b CLAHEF_ROOK computes a partial factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHEF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHEF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAHEF_ROOK computes a partial factorization of a complex Hermitian +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting +*> method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**H denotes the conjugate transpose of U. +*> +*> CLAHEF_ROOK is an auxiliary routine called by CHETRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexHEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLAHEF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, JP1, JP2, K, + $ KK, KKW, KP, KSTEP, KW, P + REAL ABSAKK, ALPHA, COLMAX, STEMP, R1, ROWMAX, T, + $ SFMIN + COMPLEX D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSSCAL, CGEMM, CGEMV, CLACGV, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, AIMAG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + IF( K.GT.1 ) + $ CALL CCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = REAL( A( K, K ) ) + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = REAL( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( W( K, KW ) ) + IF( K.GT.1 ) + $ CALL CCOPY( K-1, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* +* Lop until pivot found +* + DONE = .FALSE. +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + IF( IMAX.GT.1 ) + $ CALL CCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), + $ 1 ) + W( IMAX, KW-1 ) = REAL( A( IMAX, IMAX ) ) +* + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL CLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) THEN + CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = REAL( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( W( IMAX,KW-1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns P and K. +* Updated column P is already stored in column KW of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P of submatrix A +* at step K. No need to copy element into columns +* K and K-1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = REAL( A( K, K ) ) + CALL CCOPY( K-1-P, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + CALL CLACGV( K-1-P, A( P, P+1 ), LDA ) + IF( P.GT.1 ) + $ CALL CCOPY( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in the last K+1 to N columns of A +* (columns K and K-1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), + $ LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = REAL( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL CSSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE + DO 14 II = 1, K-1 + A( II, K ) = A( II, K ) / T + 14 CONTINUE + END IF +* +* (2) Conjugate column W(kw) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / CONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( REAL( D11*D22 )-ONE ) +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( ( D11*W( J, KW-1 )-W( J, KW ) ) / + $ D21 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ CONJG( D21 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL CLACGV( K-1, W( 1, KW ), 1 ) + CALL CLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in of rows in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP2 +* (or J and JP2, and J+1 and JP1) at each step J +* + KSTEP = 1 + JP1 = 1 +* (Here, J is a diagonal index) + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 +* (Here, J is a diagonal index) + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = JJ + 1 + IF( KSTEP.EQ.2 .AND. JP1.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update column K of W +* + W( K, K ) = REAL( A( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + IF( K.GT.1 ) THEN + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = REAL( W( K, K ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( REAL( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = REAL( W( K, K ) ) + IF( K.LT.N ) + $ CALL CCOPY( N-K, W( K+1, K ), 1, A( K+1, K ), 1 ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column k+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL CLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( A( IMAX, IMAX ) ) +* + IF( IMAX.LT.N ) + $ CALL CCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = REAL( W( IMAX, K+1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = CABS1( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( REAL( W( IMAX,K+1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* +* End pivot search loop body +* + IF( .NOT.DONE ) GOTO 72 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns P and K (only for 2-by-2 pivot). +* Updated column P is already stored in column K of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column KK-1 to column P of submatrix A +* at step K. No need to copy element into columns +* K and K+1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = REAL( A( K, K ) ) + CALL CCOPY( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + CALL CLACGV( P-K-1, A( P, K+1 ), LDA ) + IF( P.LT.N ) + $ CALL CCOPY( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* +* Interchange rows K and P in first K-1 columns of A +* (columns K and K+1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL CSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = REAL( A( KK, KK ) ) + CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL CLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (column K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = REAL( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL CSSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / T + 74 CONTINUE + END IF +* +* (2) Conjugate column W(k) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored. +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / CONJG( D21 ) + T = ONE / ( REAL( D11*D22 )-ONE ) +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ CONJG( D21 ) ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL CLACGV( N-K, W( K+1, K ), 1 ) + CALL CLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = REAL( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP2 +* (or J and JP2, and J-1 and JP1) at each step J +* + KSTEP = 1 + JP1 = 1 +* (Here, J is a diagonal index) + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 +* (Here, J is a diagonal index) + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = JJ -1 + IF( KSTEP.EQ.2 .AND. JP1.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLAHEF_ROOK +* + END diff --git a/dspl/liblapack/SRC/clahqr.f b/dspl/liblapack/SRC/clahqr.f new file mode 100644 index 0000000..de2b393 --- /dev/null +++ b/dspl/liblapack/SRC/clahqr.f @@ -0,0 +1,570 @@ +*> \brief \b CLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix, using the double-shift/single-shift QR algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAHQR is an auxiliary routine called by CHSEQR to update the +*> eigenvalues and Schur decomposition already computed by CHSEQR, by +*> dealing with the Hessenberg submatrix in rows and columns ILO to +*> IHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows and +*> columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). +*> CLAHQR works primarily with the Hessenberg submatrix in rows +*> and columns ILO to IHI, but applies transformations to all of +*> H if WANTT is .TRUE.. +*> 1 <= ILO <= max(1,IHI); IHI <= N. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO is zero and if WANTT is .TRUE., then H +*> is upper triangular in rows and columns ILO:IHI. If INFO +*> is zero and if WANTT is .FALSE., then the contents of H +*> are unspecified on exit. The output state of H in case +*> INF is positive is below under the description of INFO. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The computed eigenvalues ILO to IHI are stored in the +*> corresponding elements of W. If WANTT is .TRUE., the +*> eigenvalues are stored in the same order as on the diagonal +*> of the Schur form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> If WANTZ is .TRUE., on entry Z must contain the current +*> matrix Z of transformations accumulated by CHSEQR, and on +*> exit Z has been updated; transformations are applied only to +*> the submatrix Z(ILOZ:IHIZ,ILO:IHI). +*> If WANTZ is .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, CLAHQR failed to compute all the +*> eigenvalues ILO to IHI in a total of 30 iterations +*> per eigenvalue; elements i+1:ihi of W contain +*> those eigenvalues which have been successfully +*> computed. +*> +*> If INFO .GT. 0 and WANTT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the +*> eigenvalues of the upper Hessenberg matrix +*> rows and columns ILO thorugh INFO of the final, +*> output value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> (*) (initial value of H)*U = U*(final value of H) +*> where U is an orthognal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> (final value of Z) = (initial value of Z)*U +*> where U is the orthogonal matrix in (*) +*> (regardless of the value of WANTT.) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 02-96 Based on modifications by +*> David Day, Sandia National Laboratory, USA +*> +*> 12-04 Further modifications by +*> Ralph Byers, University of Kansas, USA +*> This is a modified version of CLAHQR from LAPACK version 3.0. +*> It is (1) more robust against overflow and underflow and +*> (2) adopts the more conservative Ahues & Tisseur stopping +*> criterion (LAWN 122, 1997). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* ========================================================= +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE, HALF + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0, HALF = 0.5e0 ) + REAL DAT1 + PARAMETER ( DAT1 = 3.0e0 / 4.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX CDUM, H11, H11S, H22, SC, SUM, T, T1, TEMP, U, + $ V2, X, Y + REAL AA, AB, BA, BB, H10, H21, RTEMP, S, SAFMAX, + $ SAFMIN, SMLNUM, SX, T2, TST, ULP + INTEGER I, I1, I2, ITS, ITMAX, J, JHI, JLO, K, L, M, + $ NH, NZ +* .. +* .. Local Arrays .. + COMPLEX V( 2 ) +* .. +* .. External Functions .. + COMPLEX CLADIV + REAL SLAMCH + EXTERNAL CLADIV, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLARFG, CSCAL, SLABAD +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* ==== ensure that subdiagonal entries are real ==== + IF( WANTT ) THEN + JLO = 1 + JHI = N + ELSE + JLO = ILO + JHI = IHI + END IF + DO 20 I = ILO + 1, IHI + IF( AIMAG( H( I, I-1 ) ).NE.RZERO ) THEN +* ==== The following redundant normalization +* . avoids problems with both gradual and +* . sudden underflow in ABS(H(I,I-1)) ==== + SC = H( I, I-1 ) / CABS1( H( I, I-1 ) ) + SC = CONJG( SC ) / ABS( SC ) + H( I, I-1 ) = ABS( H( I, I-1 ) ) + CALL CSCAL( JHI-I+1, SC, H( I, I ), LDH ) + CALL CSCAL( MIN( JHI, I+1 )-JLO+1, CONJG( SC ), H( JLO, I ), + $ 1 ) + IF( WANTZ ) + $ CALL CSCAL( IHIZ-ILOZ+1, CONJG( SC ), Z( ILOZ, I ), 1 ) + END IF + 20 CONTINUE +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITMAX is the total number of QR iterations allowed. +* + ITMAX = 30 * MAX( 10, NH ) +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 30 CONTINUE + IF( I.LT.ILO ) + $ GO TO 150 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 splits off at the bottom because a +* subdiagonal element has become negligible. +* + L = ILO + DO 130 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 40 K = I, L + 1, -1 + IF( CABS1( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 50 + TST = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( REAL( H( K-1, K-2 ) ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( REAL( H( K+1, K ) ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some examples. ==== + IF( ABS( REAL( H( K, K-1 ) ) ).LE.ULP*TST ) THEN + AB = MAX( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + BA = MIN( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + AA = MAX( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 has split off. +* + IF( L.GE.I ) + $ GO TO 140 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( REAL( H( L+1, L ) ) ) + T = S + H( L, L ) + ELSE IF( ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( REAL( H( I, I-1 ) ) ) + T = S + H( I, I ) + ELSE +* +* Wilkinson's shift. +* + T = H( I, I ) + U = SQRT( H( I-1, I ) )*SQRT( H( I, I-1 ) ) + S = CABS1( U ) + IF( S.NE.RZERO ) THEN + X = HALF*( H( I-1, I-1 )-T ) + SX = CABS1( X ) + S = MAX( S, CABS1( X ) ) + Y = S*SQRT( ( X / S )**2+( U / S )**2 ) + IF( SX.GT.RZERO ) THEN + IF( REAL( X / SX )*REAL( Y )+AIMAG( X / SX )* + $ AIMAG( Y ).LT.RZERO )Y = -Y + END IF + T = T - U*CLADIV( U, ( X+Y ) ) + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 60 M = I - 1, L + 1, -1 +* +* Determine the effect of starting the single-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H11S = H11 - T + H21 = REAL( H( M+1, M ) ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + H10 = REAL( H( M, M-1 ) ) + IF( ABS( H10 )*ABS( H21 ).LE.ULP* + $ ( CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) ) ) ) + $ GO TO 70 + 60 CONTINUE + H11 = H( L, L ) + H22 = H( L+1, L+1 ) + H11S = H11 - T + H21 = REAL( H( L+1, L ) ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + 70 CONTINUE +* +* Single-shift QR step +* + DO 120 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. +* +* V(2) is always real before the call to CLARFG, and hence +* after the call T2 ( = T1*V(2) ) is also real. +* + IF( K.GT.M ) + $ CALL CCOPY( 2, H( K, K-1 ), 1, V, 1 ) + CALL CLARFG( 2, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + END IF + V2 = V( 2 ) + T2 = REAL( T1*V2 ) +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 80 J = K, I2 + SUM = CONJG( T1 )*H( K, J ) + T2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM + H( K+1, J ) = H( K+1, J ) - SUM*V2 + 80 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+2,I). +* + DO 90 J = I1, MIN( K+2, I ) + SUM = T1*H( J, K ) + T2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM + H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 ) + 90 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 100 J = ILOZ, IHIZ + SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM + Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 ) + 100 CONTINUE + END IF +* + IF( K.EQ.M .AND. M.GT.L ) THEN +* +* If the QR step was started at row M > L because two +* consecutive small subdiagonals were found, then extra +* scaling must be performed to ensure that H(M,M-1) remains +* real. +* + TEMP = ONE - T1 + TEMP = TEMP / ABS( TEMP ) + H( M+1, M ) = H( M+1, M )*CONJG( TEMP ) + IF( M+2.LE.I ) + $ H( M+2, M+1 ) = H( M+2, M+1 )*TEMP + DO 110 J = M, I + IF( J.NE.M+1 ) THEN + IF( I2.GT.J ) + $ CALL CSCAL( I2-J, TEMP, H( J, J+1 ), LDH ) + CALL CSCAL( J-I1, CONJG( TEMP ), H( I1, J ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, CONJG( TEMP ), Z( ILOZ, J ), 1 ) + END IF + END IF + 110 CONTINUE + END IF + 120 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( AIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = ABS( TEMP ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH ) + CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL CSCAL( NZ, TEMP, Z( ILOZ, I ), 1 ) + END IF + END IF +* + 130 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 140 CONTINUE +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + W( I ) = H( I, I ) +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 30 +* + 150 CONTINUE + RETURN +* +* End of CLAHQR +* + END diff --git a/dspl/liblapack/SRC/clahr2.f b/dspl/liblapack/SRC/clahr2.f new file mode 100644 index 0000000..50547f2 --- /dev/null +++ b/dspl/liblapack/SRC/clahr2.f @@ -0,0 +1,328 @@ +*> \brief \b CLAHR2 reduces the specified number of first columns of a general rectangular matrix A so that elements below the specified subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAHR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an unitary similarity transformation +*> Q**H * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*v**H, and also the matrix Y = A * V * T. +*> +*> This is an auxiliary routine called by CGEHRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> K < N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**H) * (A - Y*V**H). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a a a a a ) +*> ( a a a a a ) +*> ( a a a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This subroutine is a slight modification of LAPACK-3.0's DLAHRD +*> incorporating improvements proposed by Quintana-Orti and Van de +*> Gejin. Note that the entries of A(1:K,2:NB) differ from those +*> returned by the original LAPACK-3.0's DLAHRD routine. (This +*> subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) +*> \endverbatim +* +*> \par References: +* ================ +*> +*> Gregorio Quintana-Orti and Robert van de Geijn, "Improving the +*> performance of reduction to Hessenberg form," ACM Transactions on +*> Mathematical Software, 32(2):180-194, June 2006. +*> +* ===================================================================== + SUBROUTINE CLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX EI +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMM, CGEMV, CLACPY, + $ CLARFG, CSCAL, CTRMM, CTRMV, CLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V**H +* + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL CGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) + CALL CLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T**H * V**H to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**H * b1 +* + CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL CTRMV( 'Lower', 'Conjugate transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**H * b2 +* + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**H * w +* + CALL CTRMV( 'Upper', 'Conjugate transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL CGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL CTRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL CLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL CGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL CGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL CSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL CTRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL CLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL CTRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL CGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL CTRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of CLAHR2 +* + END diff --git a/dspl/liblapack/SRC/claic1.f b/dspl/liblapack/SRC/claic1.f new file mode 100644 index 0000000..9aaecf4 --- /dev/null +++ b/dspl/liblapack/SRC/claic1.f @@ -0,0 +1,371 @@ +*> \brief \b CLAIC1 applies one step of incremental condition estimation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAIC1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* .. Scalar Arguments .. +* INTEGER J, JOB +* REAL SEST, SESTPR +* COMPLEX C, GAMMA, S +* .. +* .. Array Arguments .. +* COMPLEX W( J ), X( J ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAIC1 applies one step of incremental condition estimation in +*> its simplest version: +*> +*> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +*> lower triangular matrix L, such that +*> twonorm(L*x) = sest +*> Then CLAIC1 computes sestpr, s, c such that +*> the vector +*> [ s*x ] +*> xhat = [ c ] +*> is an approximate singular vector of +*> [ L 0 ] +*> Lhat = [ w**H gamma ] +*> in the sense that +*> twonorm(Lhat*xhat) = sestpr. +*> +*> Depending on JOB, an estimate for the largest or smallest singular +*> value is computed. +*> +*> Note that [s c]**H and sestpr**2 is an eigenpair of the system +*> +*> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] +*> [ conjg(gamma) ] +*> +*> where alpha = x**H*w. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> = 1: an estimate for the largest singular value is computed. +*> = 2: an estimate for the smallest singular value is computed. +*> \endverbatim +*> +*> \param[in] J +*> \verbatim +*> J is INTEGER +*> Length of X and W +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (J) +*> The j-vector x. +*> \endverbatim +*> +*> \param[in] SEST +*> \verbatim +*> SEST is REAL +*> Estimated singular value of j by j matrix L +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX array, dimension (J) +*> The j-vector w. +*> \endverbatim +*> +*> \param[in] GAMMA +*> \verbatim +*> GAMMA is COMPLEX +*> The diagonal element gamma. +*> \endverbatim +*> +*> \param[out] SESTPR +*> \verbatim +*> SESTPR is REAL +*> Estimated singular value of (j+1) by (j+1) matrix Lhat. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is COMPLEX +*> Sine needed in forming xhat. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX +*> Cosine needed in forming xhat. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL SEST, SESTPR + COMPLEX C, GAMMA, S +* .. +* .. Array Arguments .. + COMPLEX W( J ), X( J ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2, + $ SCL, T, TEST, TMP, ZETA1, ZETA2 + COMPLEX ALPHA, COSINE, SINE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + COMPLEX CDOTC + EXTERNAL SLAMCH, CDOTC +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = CDOTC( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S2*SCL + S = ( ALPHA / S2 ) / SCL + C = ( GAMMA / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S1*SCL + S = ( ALPHA / S1 ) / SCL + C = ( GAMMA / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -CONJG( GAMMA ) + COSINE = CONJG( ALPHA ) + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*CONJG( S )+C*CONJG( C ) ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / SCL ) + S = -( CONJG( GAMMA ) / S2 ) / SCL + C = ( CONJG( ALPHA ) / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / SCL + S = -( CONJG( GAMMA ) / S1 ) / SCL + C = ( CONJG( ALPHA ) / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2, + $ ZETA1*ZETA2+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ( ALPHA / ABSEST ) / ( ONE-T ) + COSINE = -( GAMMA / ABSEST ) / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of CLAIC1 +* + END diff --git a/dspl/liblapack/SRC/clals0.f b/dspl/liblapack/SRC/clals0.f new file mode 100644 index 0000000..90bff58 --- /dev/null +++ b/dspl/liblapack/SRC/clals0.f @@ -0,0 +1,556 @@ +*> \brief \b CLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLALS0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, +* $ LDGNUM, NL, NR, NRHS, SQRE +* REAL C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), PERM( * ) +* REAL DIFL( * ), DIFR( LDGNUM, * ), +* $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), +* $ RWORK( * ), Z( * ) +* COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLALS0 applies back the multiplying factors of either the left or the +*> right singular vector matrix of a diagonal matrix appended by a row +*> to the right hand side matrix B in solving the least squares problem +*> using the divide-and-conquer SVD approach. +*> +*> For the left singular vector matrix, three types of orthogonal +*> matrices are involved: +*> +*> (1L) Givens rotations: the number of such rotations is GIVPTR; the +*> pairs of columns/rows they were applied to are stored in GIVCOL; +*> and the C- and S-values of these rotations are stored in GIVNUM. +*> +*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +*> row, and for J=2:N, PERM(J)-th row of B is to be moved to the +*> J-th row. +*> +*> (3L) The left singular vector matrix of the remaining matrix. +*> +*> For the right singular vector matrix, four types of orthogonal +*> matrices are involved: +*> +*> (1R) The right singular vector matrix of the remaining matrix. +*> +*> (2R) If SQRE = 1, one extra Givens rotation to generate the right +*> null space. +*> +*> (3R) The inverse transformation of (2L). +*> +*> (4R) The inverse transformation of (1L). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Left singular vector matrix. +*> = 1: Right singular vector matrix. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. On output, B contains +*> the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB must be at least +*> max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is COMPLEX array, dimension ( LDBX, NRHS ) +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) applied +*> to the two blocks. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of rows/columns +*> involved in a Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of arrays DIFR, POLES and +*> GIVNUM, must be at least K. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is REAL array, dimension ( LDGNUM, 2 ) +*> On entry, POLES(1:K, 1) contains the new singular +*> values obtained from solving the secular equation, and +*> POLES(1:K, 2) is an array containing the poles in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( K ). +*> On entry, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is REAL array, dimension ( LDGNUM, 2 ). +*> On entry, DIFR(I, 1) contains the distances between I-th +*> updated (undeflated) singular value and the I+1-th +*> (undeflated) old singular value. And DIFR(I, 2) is the +*> normalizing factor for the I-th right singular vector. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( K ) +*> Contain the components of the deflation-adjusted updating row +*> vector. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension +*> ( K*(1+NRHS) + 2*NRHS ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL DIFL( * ), DIFR( LDGNUM, * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ RWORK( * ), Z( * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JCOL, JROW, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLASCL, CSROT, CSSCAL, SGEMV, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL CCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL CCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL CSSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 100 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + RWORK( 1 ) = NEGONE + TEMP = SNRM2( K, RWORK, 1 ) +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, +* $ B( J, 1 ), LDB ) +* + I = K + NRHS*2 + DO 60 JCOL = 1, NRHS + DO 50 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( BX( JROW, JCOL ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( BX( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 90 JCOL = 1, NRHS + B( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 90 CONTINUE + CALL CLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 100 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL CCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 180 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 110 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 110 CONTINUE + DO 120 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 120 CONTINUE +* +* Since B and BX are complex, the following call to SGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, +* $ BX( J, 1 ), LDBX ) +* + I = K + NRHS*2 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, K + I = I + 1 + RWORK( I ) = REAL( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, K + I = I + 1 + RWORK( I ) = AIMAG( B( JROW, JCOL ) ) + 150 CONTINUE + 160 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 170 JCOL = 1, NRHS + BX( J, JCOL ) = CMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 170 CONTINUE + 180 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL CSROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL CLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, + $ BX( K+1, 1 ), LDBX ) +* +* Step (3R): permute rows of B. +* + CALL CCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL CCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 190 I = 2, N + CALL CCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 190 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 200 I = GIVPTR, 1, -1 + CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 200 CONTINUE + END IF +* + RETURN +* +* End of CLALS0 +* + END diff --git a/dspl/liblapack/SRC/clalsa.f b/dspl/liblapack/SRC/clalsa.f new file mode 100644 index 0000000..004d68f --- /dev/null +++ b/dspl/liblapack/SRC/clalsa.f @@ -0,0 +1,635 @@ +*> \brief \b CLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLALSA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, +* LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, +* GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, +* $ SMLSIZ +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* REAL C( * ), DIFL( LDU, * ), DIFR( LDU, * ), +* $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), +* $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) +* COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLALSA is an itermediate step in solving the least squares problem +*> by computing the SVD of the coefficient matrix in compact form (The +*> singular vectors are computed as products of simple orthorgonal +*> matrices.). +*> +*> If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector +*> matrix of an upper bidiagonal matrix to the right hand side; and if +*> ICOMPQ = 1, CLALSA applies the right singular vector matrix to the +*> right hand side. The singular vector matrices were generated in +*> compact form by CLALSA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether the left or the right singular vector +*> matrix is involved. +*> = 0: Left singular vector matrix +*> = 1: Right singular vector matrix +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row and column dimensions of the upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. +*> On output, B contains the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is COMPLEX array, dimension ( LDBX, NRHS ) +*> On exit, the result of applying the left or right singular +*> vector matrix to B. +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] U +*> \verbatim +*> U is REAL array, dimension ( LDU, SMLSIZ ). +*> On entry, U contains the left singular vector matrices of all +*> subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, +*> POLES, GIVNUM, and Z. +*> \endverbatim +*> +*> \param[in] VT +*> \verbatim +*> VT is REAL array, dimension ( LDU, SMLSIZ+1 ). +*> On entry, VT**H contains the right singular vector matrices of +*> all subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER array, dimension ( N ). +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( LDU, NLVL ). +*> where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +*> distances between singular values on the I-th level and +*> singular values on the (I -1)-th level, and DIFR(*, 2 * I) +*> record the normalizing factors of the right singular vectors +*> matrices of subproblems on I-th level. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( LDU, NLVL ). +*> On entry, Z(1, I) contains the components of the deflation- +*> adjusted updating row vector for subproblems on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +*> singular values involved in the secular equations on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension ( N ). +*> On entry, GIVPTR( I ) records the number of Givens +*> rotations performed on the I-th problem on the computation +*> tree. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +*> On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +*> locations of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( LDGCOL, NLVL ). +*> On entry, PERM(*, I) records permutations done on the I-th +*> level of the computation tree. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +*> values of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> S( I ) contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension at least +*> MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) + COMPLEX B( LDB, * ), BX( LDBX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL, + $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLALS0, SGEMM, SLASDT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 170. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 170 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 130 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NL*NRHS*2 + DO 20 JCOL = 1, NRHS + DO 10 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 10 CONTINUE + 20 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL ) + J = NL*NRHS*2 + DO 40 JCOL = 1, NRHS + DO 30 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 30 CONTINUE + 40 CONTINUE + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ), + $ NL ) + JREAL = 0 + JIMAG = NL*NRHS + DO 60 JCOL = 1, NRHS + DO 50 JROW = NLF, NLF + NL - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 50 CONTINUE + 60 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NR*NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR ) + J = NR*NRHS*2 + DO 100 JCOL = 1, NRHS + DO 90 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 90 CONTINUE + 100 CONTINUE + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ), + $ NR ) + JREAL = 0 + JIMAG = NR*NRHS + DO 120 JCOL = 1, NRHS + DO 110 JROW = NRF, NRF + NR - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 110 CONTINUE + 120 CONTINUE +* + 130 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 140 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 140 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 160 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 150 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 150 CONTINUE + 160 CONTINUE + GO TO 330 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 170 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 190 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 180 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 180 CONTINUE + 190 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 320 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NLP1*NRHS*2 + DO 210 JCOL = 1, NRHS + DO 200 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ), + $ NLP1 ) + J = NLP1*NRHS*2 + DO 230 JCOL = 1, NRHS + DO 220 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 220 CONTINUE + 230 CONTINUE + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, + $ RWORK( 1+NLP1*NRHS ), NLP1 ) + JREAL = 0 + JIMAG = NLP1*NRHS + DO 250 JCOL = 1, NRHS + DO 240 JROW = NLF, NLF + NLP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 240 CONTINUE + 250 CONTINUE +* +* Since B and BX are complex, the following call to SGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NRP1*NRHS*2 + DO 270 JCOL = 1, NRHS + DO 260 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ), + $ NRP1 ) + J = NRP1*NRHS*2 + DO 290 JCOL = 1, NRHS + DO 280 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, + $ RWORK( 1+NRP1*NRHS ), NRP1 ) + JREAL = 0 + JIMAG = NRP1*NRHS + DO 310 JCOL = 1, NRHS + DO 300 JROW = NRF, NRF + NRP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE +* + 320 CONTINUE +* + 330 CONTINUE +* + RETURN +* +* End of CLALSA +* + END diff --git a/dspl/liblapack/SRC/clalsd.f b/dspl/liblapack/SRC/clalsd.f new file mode 100644 index 0000000..5ebbb23 --- /dev/null +++ b/dspl/liblapack/SRC/clalsd.f @@ -0,0 +1,690 @@ +*> \brief \b CLALSD uses the singular value decomposition of A to solve the least squares problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLALSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, +* RANK, WORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), RWORK( * ) +* COMPLEX B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLALSD uses the singular value decomposition of A to solve the least +*> squares problem of finding X to minimize the Euclidean norm of each +*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +*> are N-by-NRHS. The solution X overwrites B. +*> +*> The singular values of A smaller than RCOND times the largest +*> singular value are treated as zero in solving the least squares +*> problem; in this case a minimum norm solution is returned. +*> The actual singular values are returned in D in ascending order. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': D and E define an upper bidiagonal matrix. +*> = 'L': D and E define a lower bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit, if INFO = 0, D contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> Contains the super-diagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On input, B contains the right hand sides of the least +*> squares problem. On output, B contains the solution X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,N). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> The singular values of A less than or equal to RCOND times +*> the largest singular value are treated as zero in solving +*> the least squares problem. If RCOND is negative, +*> machine precision is used instead. +*> For example, if diag(S)*X=B were the least squares problem, +*> where diag(S) is a diagonal matrix of singular values, the +*> solution would be X(i) = B(i) / S(i) if S(i) is greater than +*> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +*> RCOND*max(S). +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The number of singular values of A greater than RCOND times +*> the largest singular value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N * NRHS). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension at least +*> (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ), +*> where +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N*NLVL + 11*N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through MOD(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, RWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB, + $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG, + $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB, + $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1, + $ U, VT, Z + REAL CS, EPS, ORGNRM, R, RCND, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLALSA, CLASCL, CLASET, CSROT, + $ SGEMM, SLARTG, SLASCL, SLASDA, SLASDQ, SLASET, + $ SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB ) + ELSE + RANK = 1 + CALL CLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL CSROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + RWORK( I*2-1 ) = CS + RWORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = RWORK( J*2-1 ) + SN = RWORK( J*2 ) + CALL CSROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL CLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IRWU = 1 + IRWVT = IRWU + N*N + IRWWRK = IRWVT + N*N + IRWRB = IRWWRK + IRWIB = IRWRB + N*NRHS + IRWB = IRWIB + N*NRHS + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N ) + CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N, + $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1, + $ RWORK( IRWWRK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q**H. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 50 JCOL = 1, NRHS + DO 40 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 40 CONTINUE + 50 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 70 JCOL = 1, NRHS + DO 60 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 60 CONTINUE + 70 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 90 JCOL = 1, NRHS + DO 80 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 80 CONTINUE + 90 CONTINUE +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 100 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + ELSE + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 100 CONTINUE +* +* Since B is complex, the following call to SGEMM is performed +* in two steps (real and imaginary parts). That is for V * B +* (in the real version of the code V**H is stored in WORK). +* +* CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, +* $ WORK( NWORK ), N ) +* + J = IRWB - 1 + DO 120 JCOL = 1, NRHS + DO 110 JROW = 1, N + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 110 CONTINUE + 120 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, N + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) ) + 150 CONTINUE + 160 CONTINUE +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + NRWORK = GIVNUM + 2*NLVL*N + BX = 1 +* + IRWRB = NRWORK + IRWIB = IRWRB + SMLSIZ*NRHS + IRWB = IRWIB + SMLSIZ*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 170 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 170 CONTINUE +* + DO 240 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL CCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL CCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( VT+ST1 ), N ) + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( U+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ), + $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ), + $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to SLASDQ and multiplied +* internally by Q**H. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 190 JCOL = 1, NRHS + DO 180 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = REAL( B( JROW, JCOL ) ) + 180 CONTINUE + 190 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWRB ), NSIZE ) + J = IRWB - 1 + DO 210 JCOL = 1, NRHS + DO 200 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = AIMAG( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 230 JCOL = 1, NRHS + DO 220 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 220 CONTINUE + 230 CONTINUE +* + CALL CLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ), + $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ), + $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ), + $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ), + $ RWORK( S+ST1 ), RWORK( NRWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 240 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 250 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 250 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 320 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL CCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* Since B and BX are complex, the following call to SGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, +* $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, +* $ B( ST, 1 ), LDB ) +* + J = BXST - N - 1 + JREAL = IRWB - 1 + DO 270 JCOL = 1, NRHS + J = J + N + DO 260 JROW = 1, NSIZE + JREAL = JREAL + 1 + RWORK( JREAL ) = REAL( WORK( J+JROW ) ) + 260 CONTINUE + 270 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWRB ), NSIZE ) + J = BXST - N - 1 + JIMAG = IRWB - 1 + DO 290 JCOL = 1, NRHS + J = J + N + DO 280 JROW = 1, NSIZE + JIMAG = JIMAG + 1 + RWORK( JIMAG ) = AIMAG( WORK( J+JROW ) ) + 280 CONTINUE + 290 CONTINUE + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 310 JCOL = 1, NRHS + DO 300 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE + ELSE + CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 320 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of CLALSD +* + END diff --git a/dspl/liblapack/SRC/clamswlq.f b/dspl/liblapack/SRC/clamswlq.f new file mode 100644 index 0000000..f2f9ab7 --- /dev/null +++ b/dspl/liblapack/SRC/clamswlq.f @@ -0,0 +1,417 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAMQRTS overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**H * C C * Q**H +*> where Q is a real orthogonal matrix defined as the product of blocked +*> elementary reflectors computed by short wide LQ +*> factorization (CLASWLQ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> M >= K >= 0; +*> +*> \endverbatim +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > M. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the blocked +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CLASWLQ in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension +*> ( M * Number of blocks(CEIL(N-K/NB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,NB) * MB; +*> if SIDE = 'R', LWORK >= max(1,M) * MB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL CTPMLQT, CGEMLQT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * MB + ELSE + LW = M * MB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, MB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAMSWLQ', -INFO ) + WORK(1) = LW + RETURN + ELSE IF (LQUERY) THEN + WORK(1) = LW + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN + CALL CGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(NB-K)) + CTR = (M-K)/(NB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL CTPMLQT('L','C',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+NB) +* + CTR = CTR - 1 + CALL CTPMLQT('L','C',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:NB) +* + CALL CGEMLQT('L','C',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(NB-K)) + II = M-KK+1 + CTR = 1 + CALL CGEMLQT('L','N',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (I:I+NB,1:N) +* + CALL CTPMLQT('L','N',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1, CTR *K+1), LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL CTPMLQT('L','N',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1, CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(NB-K)) + CTR = (N-K)/(NB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL CTPMLQT('R','N',M , KK, K, 0, MB, A(1, II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL CTPMLQT('R','N', M, NB-K, K, 0, MB, A(1, I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL CGEMLQT('R','N',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(NB-K)) + II=N-KK+1 + CTR = 1 + CALL CGEMLQT('R','C',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL CTPMLQT('R','C',M , NB-K, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL CTPMLQT('R','C',M , KK, K, 0,MB, A(1,II), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of CLAMSWLQ +* + END diff --git a/dspl/liblapack/SRC/clamtsqr.f b/dspl/liblapack/SRC/clamtsqr.f new file mode 100644 index 0000000..77d09a5 --- /dev/null +++ b/dspl/liblapack/SRC/clamtsqr.f @@ -0,0 +1,414 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAMTSQR overwrites the general complex M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (CLATSQR) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> N >= K >= 0; +*> +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > N. (must be the same as DLATSQR) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> blockedelementary reflector H(i), for i = 1,2,...,k, as +*> returned by DLATSQR in the first k columns of +*> its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension +*> ( N * Number of blocks(CEIL(M-K/MB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If SIDE = 'L', LWORK >= max(1,N)*NB; +*> if SIDE = 'R', LWORK >= max(1,MB)*NB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL CGEMQRT, CTPMQRT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * NB + ELSE + LW = M * NB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, NB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* +* Determine the block size if it is tall skinny or short and wide +* + IF( INFO.EQ.0) THEN + WORK(1) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAMTSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((MB.LE.K).OR.(MB.GE.MAX(M,N,K))) THEN + CALL CGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(MB-K)) + CTR = (M-K)/(MB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL CTPMQRT('L','N',KK , N, K, 0, NB, A(II,1), LDA, + $ T(1, CTR*K+1),LDT , C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CTR = CTR - 1 + CALL CTPMQRT('L','N',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:MB,1:N) +* + CALL CGEMQRT('L','N',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(MB-K)) + II=M-KK+1 + CTR = 1 + CALL CGEMQRT('L','C',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CALL CTPMQRT('L','C',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1, CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL CTPMQRT('L','C',KK , N, K, 0,NB, A(II,1), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(MB-K)) + CTR = (N-K)/(MB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL CTPMQRT('R','C',M , KK, K, 0, NB, A(II,1), LDA, + $ T(1, CTR*K+1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL CTPMQRT('R','C',M , MB-K, K, 0,NB, A(I,1), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL CGEMQRT('R','C',M , MB, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(MB-K)) + II=N-KK+1 + CTR = 1 + CALL CGEMQRT('R','N', M, MB , K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL CTPMQRT('R','N', M, MB-K, K, 0,NB, A(I,1), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL CTPMQRT('R','N', M, KK , K, 0,NB, A(II,1), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of CLAMTSQR +* + END diff --git a/dspl/liblapack/SRC/clangb.f b/dspl/liblapack/SRC/clangb.f new file mode 100644 index 0000000..14a163e --- /dev/null +++ b/dspl/liblapack/SRC/clangb.f @@ -0,0 +1,226 @@ +*> \brief \b CLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANGB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +*> \endverbatim +*> +*> \return CLANGB +*> \verbatim +*> +*> CLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANGB as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANGB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of sub-diagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of super-diagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBauxiliary +* +* ===================================================================== + REAL FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + REAL SCALE, SUM, VALUE, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + TEMP = ABS( AB( I, J ) ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL CLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANGB = VALUE + RETURN +* +* End of CLANGB +* + END diff --git a/dspl/liblapack/SRC/clange.f b/dspl/liblapack/SRC/clange.f new file mode 100644 index 0000000..50f705a --- /dev/null +++ b/dspl/liblapack/SRC/clange.f @@ -0,0 +1,213 @@ +*> \brief \b CLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANGE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex matrix A. +*> \endverbatim +*> +*> \return CLANGE +*> \verbatim +*> +*> CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANGE as described +*> above. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. When M = 0, +*> CLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. When N = 0, +*> CLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEauxiliary +* +* ===================================================================== + REAL FUNCTION CLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + TEMP = ABS( A( I, J ) ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANGE = VALUE + RETURN +* +* End of CLANGE +* + END diff --git a/dspl/liblapack/SRC/clangt.f b/dspl/liblapack/SRC/clangt.f new file mode 100644 index 0000000..836e127 --- /dev/null +++ b/dspl/liblapack/SRC/clangt.f @@ -0,0 +1,208 @@ +*> \brief \b CLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANGT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANGT( NORM, N, DL, D, DU ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* COMPLEX D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANGT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex tridiagonal matrix A. +*> \endverbatim +*> +*> \return CLANGT +*> \verbatim +*> +*> CLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANGT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANGT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX array, dimension (N-1) +*> The (n-1) sub-diagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX array, dimension (N-1) +*> The (n-1) super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) + $ ANORM = ABS(DL(I)) + IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) + $ ANORM = ABS(D(I)) + IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) + $ ANORM = ABS(DU(I)) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + DO 20 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + DO 30 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL CLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL CLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL CLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + CLANGT = ANORM + RETURN +* +* End of CLANGT +* + END diff --git a/dspl/liblapack/SRC/clanhb.f b/dspl/liblapack/SRC/clanhb.f new file mode 100644 index 0000000..2b034b1 --- /dev/null +++ b/dspl/liblapack/SRC/clanhb.f @@ -0,0 +1,276 @@ +*> \brief \b CLANHB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n hermitian band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return CLANHB +*> \verbatim +*> +*> CLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANHB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangle of the hermitian band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that the imaginary parts of the diagonal elements need +*> not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + SUM = ABS( REAL( AB( K+1, J ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + DO 40 J = 1, N + SUM = ABS( REAL( AB( 1, J ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( AB( K+1, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( AB( 1, J ) ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + DO 130 J = 1, N + IF( REAL( AB( L, J ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AB( L, J ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHB = VALUE + RETURN +* +* End of CLANHB +* + END diff --git a/dspl/liblapack/SRC/clanhe.f b/dspl/liblapack/SRC/clanhe.f new file mode 100644 index 0000000..101d778 --- /dev/null +++ b/dspl/liblapack/SRC/clanhe.f @@ -0,0 +1,258 @@ +*> \brief \b CLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex hermitian matrix A. +*> \endverbatim +*> +*> \return CLANHE +*> \verbatim +*> +*> CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANHE as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> hermitian matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHE is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The hermitian matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. Note that the imaginary parts of the diagonal +*> elements need not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEauxiliary +* +* ===================================================================== + REAL FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + SUM = ABS( REAL( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + DO 40 J = 1, N + SUM = ABS( REAL( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + DO 30 I = J + 1, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( REAL( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHE = VALUE + RETURN +* +* End of CLANHE +* + END diff --git a/dspl/liblapack/SRC/clanhf.f b/dspl/liblapack/SRC/clanhf.f new file mode 100644 index 0000000..13e5fe3 --- /dev/null +++ b/dspl/liblapack/SRC/clanhf.f @@ -0,0 +1,1573 @@ +*> \brief \b CLANHF returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, TRANSR, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL WORK( 0: * ) +* COMPLEX A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHF returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex Hermitian matrix A in RFP format. +*> \endverbatim +*> +*> \return CLANHF +*> \verbatim +*> +*> CLANHF = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER +*> Specifies the value to be returned in CLANHF as described +*> above. +*> \endverbatim +*> +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER +*> Specifies whether the RFP format of A is normal or +*> conjugate-transposed format. +*> = 'N': RFP format is Normal +*> = 'C': RFP format is Conjugate-transposed +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> +*> UPLO = 'U' or 'u' RFP A came from an upper triangular +*> matrix +*> +*> UPLO = 'L' or 'l' RFP A came from a lower triangular +*> matrix +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHF is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( N*(N+1)/2 ); +*> On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A +*> as defined when TRANSR = 'N'. The contents of RFP A are +*> defined by UPLO as follows: If UPLO = 'U' the RFP A +*> contains the ( N*(N+1)/2 ) elements of upper packed A +*> either in normal or conjugate-transpose Format. If +*> UPLO = 'L' the RFP A contains the ( N*(N+1) /2 ) elements +*> of lower packed A either in normal or conjugate-transpose +*> Format. The LDA of RFP A is (N+1)/2 when TRANSR = 'C'. When +*> TRANSR is 'N' the LDA is N+1 when N is even and is N when +*> is odd. See the Note below for more details. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + REAL FUNCTION CLANHF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, TRANSR, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( 0: * ) + COMPLEX A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA + REAL SCALE, S, VALUE, AA, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + CLANHF = ZERO + RETURN + ELSE IF( N.EQ.1 ) THEN + CLANHF = ABS(REAL(A(0))) + RETURN + END IF +* +* set noe = 1 if n is odd. if n is even set noe=0 +* + NOE = 1 + IF( MOD( N, 2 ).EQ.0 ) + $ NOE = 0 +* +* set ifm = 0 when form='C' or 'c' and 1 otherwise +* + IFM = 1 + IF( LSAME( TRANSR, 'C' ) ) + $ IFM = 0 +* +* set ilu = 0 when uplo='U or 'u' and 1 otherwise +* + ILU = 1 + IF( LSAME( UPLO, 'U' ) ) + $ ILU = 0 +* +* set lda = (n+1)/2 when ifm = 0 +* set lda = n when ifm = 1 and noe = 1 +* set lda = n+1 when ifm = 1 and noe = 0 +* + IF( IFM.EQ.1 ) THEN + IF( NOE.EQ.1 ) THEN + LDA = N + ELSE +* noe=0 + LDA = N + 1 + END IF + ELSE +* ifm=0 + LDA = ( N+1 ) / 2 + END IF +* + IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = ( N+1 ) / 2 + VALUE = ZERO + IF( NOE.EQ.1 ) THEN +* n is odd & n = k + k - 1 + IF( IFM.EQ.1 ) THEN +* A is n by k + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(0,0) + TEMP = ABS( REAL( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - 1 +* L(k+j,k+j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J +* -> L(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K + J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K + J - 1 +* -> U(i,i) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = I + 1 +* =k+j; i -> U(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = K + J + 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + DO I = 0, N - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP +* j=k-1 + END DO +* i=n-1 -> U(n-1,n-1) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + ELSE +* xpose case; A is k by n + IF( ILU.EQ.1 ) THEN +* uplo ='L' + DO J = 0, K - 2 + DO I = 0, J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J +* L(i,i) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J + 1 +* L(j+k,j+k) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 2, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K - 1 + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* -> L(i,i) is at A(i,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO J = K, N - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K - 1 +* -> U(j,j) is at A(0,j) + TEMP = ABS( REAL( A( 0+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = K, N - 1 + DO I = 0, J - K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - K +* -> U(i,i) at A(i,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J - K + 1 +* U(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J - K + 2, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + END IF + ELSE +* n is even & k = n/2 + IF( IFM.EQ.1 ) THEN +* A is n+1 by k + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(k,k) & j=1 -> L(0,0) + TEMP = ABS( REAL( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + TEMP = ABS( REAL( A( J+1+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J +* L(k+j,k+j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J + 1 +* -> L(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K + J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K + J +* -> U(i,i) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = I + 1 +* =k+j+1; i -> U(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = K + J + 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + DO I = 0, N - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP +* j=k-1 + END DO +* i=n-1 -> U(n-1,n-1) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = N +* -> U(k-1,k-1) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + ELSE +* xpose case; A is k by n+1 + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(k,k) at A(0,0) + TEMP = ABS( REAL( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - 1 +* L(i,i) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J +* L(j+k,j+k) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* -> L(i,i) is at A(i,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO J = K + 1, N + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K +* -> U(j,j) is at A(0,j) + TEMP = ABS( REAL( A( 0+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = K + 1, N - 1 + DO I = 0, J - K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - K - 1 +* -> U(i,i) at A(i,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + I = J - K +* U(j,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J - K + 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = N + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* U(k,k) at A(i,j) + TEMP = ABS( REAL( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is Hermitian). +* + IF( IFM.EQ.1 ) THEN +* A is 'N' + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd & A is n by (n+1)/2 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + IF( I.EQ.K+K ) + $ GO TO 10 + I = I + 1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + 10 CONTINUE + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 & uplo = 'L' + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + IF( J.GT.0 ) THEN + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + END IF + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even & A is n+1 by k = n/2 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + I = I + 1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 & uplo = 'L' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + ELSE +* ifm=0 + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd & A is (n+1)/2 by n + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + N1 = K +* n/2 + K = K + 1 +* k is the row size and lda + DO I = N1, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, N1 - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,n1+i) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=n1=k-1 is special + S = ABS( REAL( A( 0+J*LDA ) ) ) +* A(k-1,k-1) + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,i+n1) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K, N - 1 + S = ZERO + DO I = 0, J - K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-k + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(j-k,j-k) + S = S + AA + WORK( J-K ) = WORK( J-K ) + S + I = I + 1 + S = ABS( REAL( A( I+J*LDA ) ) ) +* A(j,j) + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 & uplo = 'L' + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 2 +* process + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( REAL( A( I+J*LDA ) ) ) +* i=j so process of A(j,j) + S = S + AA + WORK( J ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( REAL( A( I+J*LDA ) ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k-1 is special :process col A(k-1,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K, N - 1 +* process col j of A = A(j,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even & A is k=n/2 by n+1 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i+k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=k + AA = ABS( REAL( A( 0+J*LDA ) ) ) +* A(k,k) + S = AA + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k,k+i) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K + 1, N - 1 + S = ZERO + DO I = 0, J - 2 - K + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-1-k + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(j-k-1,j-k-1) + S = S + AA + WORK( J-K-1 ) = WORK( J-K-1 ) + S + I = I + 1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(j,j) + S = AA + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO +* j=n + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(i,k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = WORK( I ) + S + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 & uplo = 'L' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO +* j=0 is special :process col A(k:n-1,k) + S = ABS( REAL( A( 0 ) ) ) +* A(k,k) + DO I = 1, K - 1 + AA = ABS( A( I ) ) +* A(k+i,k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( K ) = WORK( K ) + S + DO J = 1, K - 1 +* process + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( REAL( A( I+J*LDA ) ) ) +* i=j-1 so process of A(j-1,j-1) + S = S + AA + WORK( J-1 ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( REAL( A( I+J*LDA ) ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k is special :process col A(k,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* +* i=k-1 + AA = ABS( REAL( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K + 1, N +* +* process col j-1 of A = A(j-1,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J-1 ) = WORK( J-1 ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + K = ( N+1 ) / 2 + SCALE = ZERO + S = ONE + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is normal & A is n by k + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 3 + CALL CLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) +* L at A(k,0) + END DO + DO J = 0, K - 1 + CALL CLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = K - 1 +* -> U(k,k) at A(k-1,0) + DO I = 0, K - 2 + AA = REAL( A( L ) ) +* U(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* U(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + AA = REAL( A( L ) ) +* U(n-1,n-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL CLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* trap L at A(0,0) + END DO + DO J = 1, K - 2 + CALL CLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + S = S + S +* double s for the off diagonal elements + AA = REAL( A( 0 ) ) +* L(0,0) at A(0,0) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = LDA +* -> L(k,k) at A(0,1) + DO I = 1, K - 1 + AA = REAL( A( L ) ) +* L(k-1+i,k-1+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + END IF + ELSE +* A is xpose & A is k by n + IF( ILU.EQ.0 ) THEN +* A**H is upper + DO J = 1, K - 2 + CALL CLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) +* U at A(0,k) + END DO + DO J = 0, K - 2 + CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL CLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, + $ SCALE, S ) +* L at A(0,k-1) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 + K*LDA - LDA +* -> U(k-1,k-1) at A(0,k-1) + AA = REAL( A( L ) ) +* U(k-1,k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA +* -> U(0,0) at A(0,k) + DO J = K, N - 1 + AA = REAL( A( L ) ) +* -> U(j-k,j-k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* -> U(j,j) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + ELSE +* A**H is lower + DO J = 1, K - 1 + CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + DO J = K, N - 1 + CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,k) + END DO + DO J = 0, K - 3 + CALL CLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) +* L at A(1,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(0,0) at A(0,0) + DO I = 0, K - 2 + AA = REAL( A( L ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* L(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L-> k-1 + (k-1)*lda or L(k-1,k-1) at A(k-1,k-1) + AA = REAL( A( L ) ) +* L(k-1,k-1) at A(k-1,k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + END IF + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 2 + CALL CLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) +* L at A(k+1,0) + END DO + DO J = 0, K - 1 + CALL CLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = K +* -> U(k,k) at A(k,0) + DO I = 0, K - 1 + AA = REAL( A( L ) ) +* U(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* U(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL CLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) +* trap L at A(1,0) + END DO + DO J = 1, K - 1 + CALL CLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(k,k) at A(0,0) + DO I = 0, K - 1 + AA = REAL( A( L ) ) +* L(k-1+i,k-1+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**H is upper + DO J = 1, K - 1 + CALL CLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) +* U at A(0,k+1) + END DO + DO J = 0, K - 1 + CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL CLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, + $ S ) +* L at A(0,k) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 + K*LDA +* -> U(k,k) at A(0,k) + AA = REAL( A( L ) ) +* U(k,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA +* -> U(0,0) at A(0,k+1) + DO J = K + 1, N - 1 + AA = REAL( A( L ) ) +* -> U(j-k-1,j-k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* -> U(j,j) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L=k-1+n*lda +* -> U(k-1,k-1) at A(k-1,n) + AA = REAL( A( L ) ) +* U(k,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + ELSE +* A**H is lower + DO J = 1, K - 1 + CALL CLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + DO J = K + 1, N + CALL CLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,k+1) + END DO + DO J = 0, K - 2 + CALL CLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* L at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(k,k) at A(0,0) + AA = REAL( A( L ) ) +* L(k,k) at A(0,0) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = LDA +* -> L(0,0) at A(0,1) + DO I = 0, K - 2 + AA = REAL( A( L ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = REAL( A( L+1 ) ) +* L(k+i+1,k+i+1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L-> k - 1 + k*lda or L(k-1,k-1) at A(k-1,k) + AA = REAL( A( L ) ) +* L(k-1,k-1) at A(k-1,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + END IF + END IF + END IF + VALUE = SCALE*SQRT( S ) + END IF +* + CLANHF = VALUE + RETURN +* +* End of CLANHF +* + END diff --git a/dspl/liblapack/SRC/clanhp.f b/dspl/liblapack/SRC/clanhp.f new file mode 100644 index 0000000..c8927d5 --- /dev/null +++ b/dspl/liblapack/SRC/clanhp.f @@ -0,0 +1,269 @@ +*> \brief \b CLANHP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex hermitian matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return CLANHP +*> \verbatim +*> +*> CLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANHP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> hermitian matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that the imaginary parts of the diagonal elements need +*> not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANHP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 0 + DO 20 J = 1, N + DO 10 I = K + 1, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + SUM = ABS( REAL( AP( K ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + SUM = ABS( REAL( AP( K ) ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( REAL( AP( K ) ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( REAL( AP( K ) ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( REAL( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHP = VALUE + RETURN +* +* End of CLANHP +* + END diff --git a/dspl/liblapack/SRC/clanhs.f b/dspl/liblapack/SRC/clanhs.f new file mode 100644 index 0000000..35623b7 --- /dev/null +++ b/dspl/liblapack/SRC/clanhs.f @@ -0,0 +1,207 @@ +*> \brief \b CLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHS returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> Hessenberg matrix A. +*> \endverbatim +*> +*> \return CLANHS +*> \verbatim +*> +*> CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANHS as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHS is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The n by n upper Hessenberg matrix A; the part of A below the +*> first sub-diagonal is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANHS = VALUE + RETURN +* +* End of CLANHS +* + END diff --git a/dspl/liblapack/SRC/clanht.f b/dspl/liblapack/SRC/clanht.f new file mode 100644 index 0000000..9e2be72 --- /dev/null +++ b/dspl/liblapack/SRC/clanht.f @@ -0,0 +1,188 @@ +*> \brief \b CLANHT returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANHT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANHT( NORM, N, D, E ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANHT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex Hermitian tridiagonal matrix A. +*> \endverbatim +*> +*> \return CLANHT +*> \verbatim +*> +*> CLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANHT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANHT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> The (n-1) sub-diagonal or super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANHT( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ, SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + SUM = ABS( D( I ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + SUM = ABS( E( I ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) + SUM = ABS( E( N-1 ) )+ABS( D( N ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + DO 20 I = 2, N - 1 + SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL CLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL SLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + CLANHT = ANORM + RETURN +* +* End of CLANHT +* + END diff --git a/dspl/liblapack/SRC/clansb.f b/dspl/liblapack/SRC/clansb.f new file mode 100644 index 0000000..fbc5067 --- /dev/null +++ b/dspl/liblapack/SRC/clansb.f @@ -0,0 +1,260 @@ +*> \brief \b CLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANSB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n symmetric band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return CLANSB +*> \verbatim +*> +*> CLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANSB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular part is supplied +*> = 'L': Lower triangular part is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANSB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL CLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSB = VALUE + RETURN +* +* End of CLANSB +* + END diff --git a/dspl/liblapack/SRC/clansp.f b/dspl/liblapack/SRC/clansp.f new file mode 100644 index 0000000..fd64366 --- /dev/null +++ b/dspl/liblapack/SRC/clansp.f @@ -0,0 +1,272 @@ +*> \brief \b CLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANSP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANSP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex symmetric matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return CLANSP +*> \verbatim +*> +*> CLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANSP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANSP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( REAL( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( REAL( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( AIMAG( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( AIMAG( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSP = VALUE + RETURN +* +* End of CLANSP +* + END diff --git a/dspl/liblapack/SRC/clansy.f b/dspl/liblapack/SRC/clansy.f new file mode 100644 index 0000000..3aa7874 --- /dev/null +++ b/dspl/liblapack/SRC/clansy.f @@ -0,0 +1,243 @@ +*> \brief \b CLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANSY returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex symmetric matrix A. +*> \endverbatim +*> +*> \return CLANSY +*> \verbatim +*> +*> CLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANSY as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANSY is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + REAL FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL CLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANSY = VALUE + RETURN +* +* End of CLANSY +* + END diff --git a/dspl/liblapack/SRC/clantb.f b/dspl/liblapack/SRC/clantb.f new file mode 100644 index 0000000..4b4361c --- /dev/null +++ b/dspl/liblapack/SRC/clantb.f @@ -0,0 +1,363 @@ +*> \brief \b CLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANTB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB, +* LDAB, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANTB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n triangular band matrix A, with ( k + 1 ) diagonals. +*> \endverbatim +*> +*> \return CLANTB +*> \verbatim +*> +*> CLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANTB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANTB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first k+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that when DIAG = 'U', the elements of the array AB +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL CLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL CLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL CLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTB = VALUE + RETURN +* +* End of CLANTB +* + END diff --git a/dspl/liblapack/SRC/clantp.f b/dspl/liblapack/SRC/clantp.f new file mode 100644 index 0000000..148ac54 --- /dev/null +++ b/dspl/liblapack/SRC/clantp.f @@ -0,0 +1,357 @@ +*> \brief \b CLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANTP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> triangular matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return CLANTP +*> \verbatim +*> +*> CLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANTP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, CLANTP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that when DIAG = 'U', the elements of the array AP +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL CLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL CLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTP = VALUE + RETURN +* +* End of CLANTP +* + END diff --git a/dspl/liblapack/SRC/clantr.f b/dspl/liblapack/SRC/clantr.f new file mode 100644 index 0000000..4e1843d --- /dev/null +++ b/dspl/liblapack/SRC/clantr.f @@ -0,0 +1,355 @@ +*> \brief \b CLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLANTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL WORK( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLANTR returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> trapezoidal or triangular matrix A. +*> \endverbatim +*> +*> \return CLANTR +*> \verbatim +*> +*> CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in CLANTR as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower trapezoidal. +*> = 'U': Upper trapezoidal +*> = 'L': Lower trapezoidal +*> Note that A is triangular instead of trapezoidal if M = N. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A has unit diagonal. +*> = 'N': Non-unit diagonal +*> = 'U': Unit diagonal +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0, and if +*> UPLO = 'U', M <= N. When M = 0, CLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0, and if +*> UPLO = 'L', N <= M. When N = 0, CLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The trapezoidal matrix A (A is triangular if M = N). +*> If UPLO = 'U', the leading m by n upper trapezoidal part of +*> the array A contains the upper trapezoidal matrix, and the +*> strictly lower triangular part of A is not referenced. +*> If UPLO = 'L', the leading m by n lower trapezoidal part of +*> the array A contains the lower trapezoidal matrix, and the +*> strictly upper triangular part of A is not referenced. Note +*> that when DIAG = 'U', the diagonal elements of A are not +*> referenced and are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL WORK( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + CLANTR = VALUE + RETURN +* +* End of CLANTR +* + END diff --git a/dspl/liblapack/SRC/clapll.f b/dspl/liblapack/SRC/clapll.f new file mode 100644 index 0000000..5465372 --- /dev/null +++ b/dspl/liblapack/SRC/clapll.f @@ -0,0 +1,169 @@ +*> \brief \b CLAPLL measures the linear dependence of two vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAPLL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* REAL SSMIN +* .. +* .. Array Arguments .. +* COMPLEX X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given two column vectors X and Y, let +*> +*> A = ( X Y ). +*> +*> The subroutine first computes the QR factorization of A = Q*R, +*> and then computes the SVD of the 2-by-2 upper triangular matrix R. +*> The smaller singular value of R is returned in SSMIN, which is used +*> as the measurement of the linear dependency of the vectors X and Y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vectors X and Y. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (1+(N-1)*INCX) +*> On entry, X contains the N-vector X. +*> On exit, X is overwritten. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (1+(N-1)*INCY) +*> On entry, Y contains the N-vector Y. +*> On exit, Y is overwritten. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is REAL +*> The smallest singular value of the N-by-2 matrix A = ( X Y ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL SSMIN +* .. +* .. Array Arguments .. + COMPLEX X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + REAL SSMAX + COMPLEX A11, A12, A22, C, TAU +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG +* .. +* .. External Functions .. + COMPLEX CDOTC + EXTERNAL CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLARFG, SLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL CLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = CONE +* + C = -CONJG( TAU )*CDOTC( N, X, INCX, Y, INCY ) + CALL CAXPY( N, C, X, INCX, Y, INCY ) +* + CALL CLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL SLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX ) +* + RETURN +* +* End of CLAPLL +* + END diff --git a/dspl/liblapack/SRC/clapmr.f b/dspl/liblapack/SRC/clapmr.f new file mode 100644 index 0000000..e01d5bf --- /dev/null +++ b/dspl/liblapack/SRC/clapmr.f @@ -0,0 +1,204 @@ +*> \brief \b CLAPMR rearranges rows of a matrix as specified by a permutation vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAPMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAPMR( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* COMPLEX X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAPMR rearranges the rows of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (M) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAPMR( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IN, J, JJ + COMPLEX TEMP +* .. +* .. Executable Statements .. +* + IF( M.LE.1 ) + $ RETURN +* + DO 10 I = 1, M + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 JJ = 1, N + TEMP = X( J, JJ ) + X( J, JJ ) = X( IN, JJ ) + X( IN, JJ ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 JJ = 1, N + TEMP = X( I, JJ ) + X( I, JJ ) = X( J, JJ ) + X( J, JJ ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END + diff --git a/dspl/liblapack/SRC/clapmt.f b/dspl/liblapack/SRC/clapmt.f new file mode 100644 index 0000000..3b22e09 --- /dev/null +++ b/dspl/liblapack/SRC/clapmt.f @@ -0,0 +1,203 @@ +*> \brief \b CLAPMT performs a forward or backward permutation of the columns of a matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAPMT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAPMT( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* COMPLEX X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAPMT rearranges the columns of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (N) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, J, IN + COMPLEX TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 60 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 60 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 110 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 100 +* + K( I ) = -K( I ) + J = K( I ) + 80 CONTINUE + IF( J.EQ.I ) + $ GO TO 100 +* + DO 90 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 90 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 80 +* + 100 CONTINUE + + 110 CONTINUE +* + END IF +* + RETURN +* +* End of CLAPMT +* + END diff --git a/dspl/liblapack/SRC/claqgb.f b/dspl/liblapack/SRC/claqgb.f new file mode 100644 index 0000000..636c684 --- /dev/null +++ b/dspl/liblapack/SRC/claqgb.f @@ -0,0 +1,258 @@ +*> \brief \b CLAQGB scales a general band matrix, using row and column scaling factors computed by sgbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQGB equilibrates a general M by N band matrix A with KL +*> subdiagonals and KU superdiagonals using the row and scaling factors +*> in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, the equilibrated matrix, in the same storage format +*> as A. See EQUED for the form of the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDA >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is REAL +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is REAL +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGBauxiliary +* +* ===================================================================== + SUBROUTINE CLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of CLAQGB +* + END diff --git a/dspl/liblapack/SRC/claqge.f b/dspl/liblapack/SRC/claqge.f new file mode 100644 index 0000000..9d9efa3 --- /dev/null +++ b/dspl/liblapack/SRC/claqge.f @@ -0,0 +1,238 @@ +*> \brief \b CLAQGE scales a general rectangular matrix, using row and column scaling factors computed by sgeequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL C( * ), R( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQGE equilibrates a general M by N matrix A using the row and +*> column scaling factors in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M by N matrix A. +*> On exit, the equilibrated matrix. See EQUED for the form of +*> the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is REAL +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is REAL +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEauxiliary +* +* ===================================================================== + SUBROUTINE CLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL C( * ), R( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of CLAQGE +* + END diff --git a/dspl/liblapack/SRC/claqhb.f b/dspl/liblapack/SRC/claqhb.f new file mode 100644 index 0000000..db69875 --- /dev/null +++ b/dspl/liblapack/SRC/claqhb.f @@ -0,0 +1,230 @@ +*> \brief \b CLAQHB scales a Hermitian band matrix, using scaling factors computed by cpbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQHB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQHB equilibrates an Hermitian band matrix A using the scaling +*> factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J - 1 + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + AB( KD+1, J ) = CJ*CJ*REAL( AB( KD+1, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + AB( 1, J ) = CJ*CJ*REAL( AB( 1, J ) ) + DO 30 I = J + 1, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHB +* + END diff --git a/dspl/liblapack/SRC/claqhe.f b/dspl/liblapack/SRC/claqhe.f new file mode 100644 index 0000000..14714b5 --- /dev/null +++ b/dspl/liblapack/SRC/claqhe.f @@ -0,0 +1,223 @@ +*> \brief \b CLAQHE scales a Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQHE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQHE equilibrates a Hermitian matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexHEauxiliary +* +* ===================================================================== + SUBROUTINE CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + A( J, J ) = CJ*CJ*REAL( A( J, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + A( J, J ) = CJ*CJ*REAL( A( J, J ) ) + DO 30 I = J + 1, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHE +* + END diff --git a/dspl/liblapack/SRC/claqhp.f b/dspl/liblapack/SRC/claqhp.f new file mode 100644 index 0000000..c41eb18 --- /dev/null +++ b/dspl/liblapack/SRC/claqhp.f @@ -0,0 +1,219 @@ +*> \brief \b CLAQHP scales a Hermitian matrix stored in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQHP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQHP equilibrates a Hermitian matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + AP( JC+J-1 ) = CJ*CJ*REAL( AP( JC+J-1 ) ) + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + AP( JC ) = CJ*CJ*REAL( AP( JC ) ) + DO 30 I = J + 1, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQHP +* + END diff --git a/dspl/liblapack/SRC/claqp2.f b/dspl/liblapack/SRC/claqp2.f new file mode 100644 index 0000000..33f2a8e --- /dev/null +++ b/dspl/liblapack/SRC/claqp2.f @@ -0,0 +1,266 @@ +*> \brief \b CLAQP2 computes a QR factorization with column pivoting of the matrix block. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, +* WORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL VN1( * ), VN2( * ) +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQP2 computes a QR factorization with column pivoting of +*> the block A(OFFSET+1:M,1:N). +*> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of the matrix A that must be pivoted +*> but no factorized. OFFSET >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +*> the triangular factor obtained; the elements in block +*> A(OFFSET+1:M,1:N) below the diagonal, together with the +*> array TAU, represent the orthogonal matrix Q as a product of +*> elementary reflectors. Block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is REAL array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is REAL array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL TEMP, TEMP2, TOL3Z + COMPLEX AII +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL CLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = CONE + CALL CLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SCNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of CLAQP2 +* + END diff --git a/dspl/liblapack/SRC/claqps.f b/dspl/liblapack/SRC/claqps.f new file mode 100644 index 0000000..f47e852 --- /dev/null +++ b/dspl/liblapack/SRC/claqps.f @@ -0,0 +1,371 @@ +*> \brief \b CLAQPS computes a step of QR factorization with column pivoting of a real m-by-n matrix A by using BLAS level 3. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, +* VN2, AUXV, F, LDF ) +* +* .. Scalar Arguments .. +* INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL VN1( * ), VN2( * ) +* COMPLEX A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQPS computes a step of QR factorization with column pivoting +*> of a complex M-by-N matrix A by using Blas-3. It tries to factorize +*> NB columns from A starting from the row OFFSET+1, and updates all +*> of the matrix with Blas-3 xGEMM. +*> +*> In some cases, due to catastrophic cancellations, it cannot +*> factorize NB columns. Hence, the actual number of factorized +*> columns is returned in KB. +*> +*> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of A that have been factorized in +*> previous steps. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to factorize. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns actually factorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, block A(OFFSET+1:M,1:KB) is the triangular +*> factor obtained and block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +*> been updated. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> JPVT(I) = K <==> Column K of the full matrix A has been +*> permuted into position I in AP. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (KB) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is REAL array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is REAL array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[in,out] AUXV +*> \verbatim +*> AUXV is COMPLEX array, dimension (NB) +*> Auxiliar vector. +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX array, dimension (LDF,NB) +*> Matrix F**H = L * Y**H * A. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE CLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL VN1( * ), VN2( * ) + COMPLEX A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + COMPLEX CZERO, CONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL TEMP, TEMP2, TOL3Z + COMPLEX AKK +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CLARFG, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SCNRM2, SLAMCH + EXTERNAL ISAMAX, SCNRM2, SLAMCH +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL CSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL CSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**H. +* + IF( K.GT.1 ) THEN + DO 20 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 20 CONTINUE + CALL CGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 ) + DO 30 J = 1, K - 1 + F( K, J ) = CONJG( F( K, J ) ) + 30 CONTINUE + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL CLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL CLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = CONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 40 J = 1, K + F( J, K ) = CZERO + 40 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**H +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ), + $ A( RK, 1 ), LDA, A( RK, K ), 1, CZERO, + $ AUXV( 1 ), 1 ) +* + CALL CGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, CONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**H. +* + IF( K.LT.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', 1, N-K, + $ K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF, + $ CONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 50 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 50 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**H. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB, + $ KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, + $ CONE, A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 60 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SCNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 60 + END IF +* + RETURN +* +* End of CLAQPS +* + END diff --git a/dspl/liblapack/SRC/claqr0.f b/dspl/liblapack/SRC/claqr0.f new file mode 100644 index 0000000..b61c9f1 --- /dev/null +++ b/dspl/liblapack/SRC/claqr0.f @@ -0,0 +1,701 @@ +*> \brief \b CLAQR0 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQR0 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to CGEBAL, and then passed to CGEHRD when the +*> matrix output by CGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H +*> contains the upper triangular matrix T from the Schur +*> decomposition (the Schur form). If INFO = 0 and WANT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +*> in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then CLAQR0 does a workspace query. +*> In this case, CLAQR0 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, CLAQR0 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE CLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constant WILK1 is used to form the exceptional +* . shifts. ==== + REAL WILK1 + PARAMETER ( WILK1 = 0.75e0 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + REAL S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAHQR, CLAQR3, CLAQR4, CLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, MIN, MOD, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use CLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to CLAQR3 ==== +* + CALL CLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(CLAQR5, CLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'CLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if CLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . CLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use CLAQR4 or +* . CLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL CLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL CLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, WORK, LWORK, INF ) + ELSE + CALL CLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR0 ==== +* + END diff --git a/dspl/liblapack/SRC/claqr1.f b/dspl/liblapack/SRC/claqr1.f new file mode 100644 index 0000000..b76bedf --- /dev/null +++ b/dspl/liblapack/SRC/claqr1.f @@ -0,0 +1,172 @@ +*> \brief \b CLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and specified shifts. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR1( N, H, LDH, S1, S2, V ) +* +* .. Scalar Arguments .. +* COMPLEX S1, S2 +* INTEGER LDH, N +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given a 2-by-2 or 3-by-3 matrix H, CLAQR1 sets v to a +*> scalar multiple of the first column of the product +*> +*> (*) K = (H - s1*I)*(H - s2*I) +*> +*> scaling to avoid overflows and most underflows. +*> +*> This is useful for starting double implicit shift bulges +*> in the QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Order of the matrix H. N must be either 2 or 3. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> The 2-by-2 or 3-by-3 matrix H in (*). +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of H as declared in +*> the calling procedure. LDH.GE.N +*> \endverbatim +*> +*> \param[in] S1 +*> \verbatim +*> S1 is COMPLEX +*> \endverbatim +*> +*> \param[in] S2 +*> \verbatim +*> S2 is COMPLEX +*> +*> S1 and S2 are the shifts defining K in (*) above. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (N) +*> A scalar multiple of the first column of the +*> matrix K in (*). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE CLAQR1( N, H, LDH, S1, S2, V ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + COMPLEX S1, S2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), V( * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ) ) + REAL RZERO + PARAMETER ( RZERO = 0.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX CDUM, H21S, H31S + REAL S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + IF( S.EQ.RZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-S1 )* + $ ( ( H( 1, 1 )-S2 ) / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + END IF + ELSE + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + + $ CABS1( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-S1 )*( ( H( 1, 1 )-S2 ) / S ) + + $ H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-S1-S2 ) + H21S*H( 3, 2 ) + END IF + END IF + END diff --git a/dspl/liblapack/SRC/claqr2.f b/dspl/liblapack/SRC/claqr2.f new file mode 100644 index 0000000..03e9760 --- /dev/null +++ b/dspl/liblapack/SRC/claqr2.f @@ -0,0 +1,566 @@ +*> \brief \b CLAQR2 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, +* NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), +* $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQR2 is identical to CLAQR3 except that it avoids +*> recursion by calling CLAHQR instead of CLAQR4. +*> +*> Aggressive early deflation: +*> +*> This subroutine accepts as input an upper Hessenberg matrix +*> H and performs an unitary similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an unitary similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the unitary matrix Z is updated so +*> so that the unitary Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the unitary matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by a unitary +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the unitary +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SH +*> \verbatim +*> SH is COMPLEX array, dimension (KBOT) +*> On output, approximate eigenvalues that may +*> be used for shifts are stored in SH(KBOT-ND-NS+1) +*> through SR(KBOT-ND). Converged eigenvalues are +*> stored in SH(KBOT-ND+1) through SH(KBOT). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; CLAQR2 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX BETA, CDUM, S, TAU + REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLARF, + $ CLARFG, CLASET, CTREXC, CUNMHR, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to CGEHRD ==== +* + CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CUNMHR ==== +* + CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undeflatable eigenvalue. Move it up out of the +* . way. (CTREXC can not fail in this case.) ==== +* + IFST = NS + CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL CCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = CONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) ) + CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR2 ==== +* + END diff --git a/dspl/liblapack/SRC/claqr3.f b/dspl/liblapack/SRC/claqr3.f new file mode 100644 index 0000000..660a583 --- /dev/null +++ b/dspl/liblapack/SRC/claqr3.f @@ -0,0 +1,577 @@ +*> \brief \b CLAQR3 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, +* NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), +* $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Aggressive early deflation: +*> +*> CLAQR3 accepts as input an upper Hessenberg matrix +*> H and performs an unitary similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an unitary similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the unitary matrix Z is updated so +*> so that the unitary Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the unitary matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by a unitary +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the unitary +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SH +*> \verbatim +*> SH is COMPLEX array, dimension (KBOT) +*> On output, approximate eigenvalues that may +*> be used for shifts are stored in SH(KBOT-ND-NS+1) +*> through SR(KBOT-ND). Converged eigenvalues are +*> stored in SH(KBOT-ND+1) through SH(KBOT). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; CLAQR3 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE CLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX BETA, CDUM, S, TAU + REAL FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + EXTERNAL SLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEHRD, CGEMM, CLACPY, CLAHQR, CLAQR4, + $ CLARF, CLARFG, CLASET, CTREXC, CUNMHR, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to CGEHRD ==== +* + CALL CGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CUNMHR ==== +* + CALL CUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to CLAQR4 ==== +* + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, + $ LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL CLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL CCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL CLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'CLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL CLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL CLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) + END IF +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undeflatable eigenvalue. Move it up out of the +* . way. (CTREXC can not fail in this case.) ==== +* + IFST = NS + CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL CTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL CCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = CONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL CLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL CLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL CLARF( 'L', NS, JW, WORK, 1, CONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL CLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL CGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*CONJG( V( 1, 1 ) ) + CALL CLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL CCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL CUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL CGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL CLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL CGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL CLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR3 ==== +* + END diff --git a/dspl/liblapack/SRC/claqr4.f b/dspl/liblapack/SRC/claqr4.f new file mode 100644 index 0000000..647fa67 --- /dev/null +++ b/dspl/liblapack/SRC/claqr4.f @@ -0,0 +1,705 @@ +*> \brief \b CLAQR4 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQR4 implements one level of recursion for CLAQR0. +*> It is a complete implementation of the small bulge multi-shift +*> QR algorithm. It may be called by CLAQR0 and, for large enough +*> deflation window size, it may be called by CLAQR3. This +*> subroutine is identical to CLAQR0 except that it calls CLAQR2 +*> instead of CLAQR3. +*> +*> CLAQR4 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to CGEBAL, and then passed to CGEHRD when the +*> matrix output by CGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H +*> contains the upper triangular matrix T from the Schur +*> decomposition (the Schur form). If INFO = 0 and WANT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +*> in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then CLAQR4 does a workspace query. +*> In this case, CLAQR4 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, CLAQR4 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE CLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +* ================================================================ +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . CLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constant WILK1 is used to form the exceptional +* . shifts. ==== + REAL WILK1 + PARAMETER ( WILK1 = 0.75e0 ) + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + REAL S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAHQR, CLAQR2, CLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, INT, MAX, MIN, MOD, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use CLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to CLAQR2 ==== +* + CALL CLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(CLAQR5, CLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = CMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== CLAHQR/CLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'CLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL CLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if CLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . CLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use CLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL CLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL CLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, ZDUM, + $ 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = CMPLX( LWKOPT, 0 ) +* +* ==== End of CLAQR4 ==== +* + END diff --git a/dspl/liblapack/SRC/claqr5.f b/dspl/liblapack/SRC/claqr5.f new file mode 100644 index 0000000..4c89789 --- /dev/null +++ b/dspl/liblapack/SRC/claqr5.f @@ -0,0 +1,907 @@ +*> \brief \b CLAQR5 performs a single small-bulge multi-shift QR sweep. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQR5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, +* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, +* WV, LDWV, NH, WH, LDWH ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, +* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), +* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQR5 called by CLAQR0 performs a +*> single small-bulge multi-shift QR sweep. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> WANTT = .true. if the triangular Schur factor +*> is being computed. WANTT is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> WANTZ = .true. if the unitary Schur factor is being +*> computed. WANTZ is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] KACC22 +*> \verbatim +*> KACC22 is INTEGER with value 0, 1, or 2. +*> Specifies the computation mode of far-from-diagonal +*> orthogonal updates. +*> = 0: CLAQR5 does not accumulate reflections and does not +*> use matrix-matrix multiply to update far-from-diagonal +*> matrix entries. +*> = 1: CLAQR5 accumulates reflections and uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries. +*> = 2: CLAQR5 accumulates reflections, uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries, +*> and takes advantage of 2-by-2 block structure during +*> matrix multiplies. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H upon which this +*> subroutine operates. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> These are the first and last rows and columns of an +*> isolated diagonal block upon which the QR sweep is to be +*> applied. It is assumed without a check that +*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 +*> and +*> either KBOT = N or H(KBOT+1,KBOT) = 0. +*> \endverbatim +*> +*> \param[in] NSHFTS +*> \verbatim +*> NSHFTS is INTEGER +*> NSHFTS gives the number of simultaneous shifts. NSHFTS +*> must be positive and even. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is COMPLEX array, dimension (NSHFTS) +*> S contains the shifts of origin that define the multi- +*> shift QR sweep. On output S may be reordered. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX array, dimension (LDH,N) +*> On input H contains a Hessenberg matrix. On output a +*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +*> to the isolated diagonal block in rows and columns KTOP +*> through KBOT. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> LDH is the leading dimension of H just as declared in the +*> calling procedure. LDH.GE.MAX(1,N). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,IHIZ) +*> If WANTZ = .TRUE., then the QR Sweep unitary +*> similarity transformation is accumulated into +*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ = .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> LDA is the leading dimension of Z just as declared in +*> the calling procedure. LDZ.GE.N. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,NSHFTS/2) +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> LDV is the leading dimension of V as declared in the +*> calling procedure. LDV.GE.3. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> LDU is the leading dimension of U just as declared in the +*> in the calling subroutine. LDU.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> NH is the number of columns in array WH available for +*> workspace. NH.GE.1. +*> \endverbatim +*> +*> \param[out] WH +*> \verbatim +*> WH is COMPLEX array, dimension (LDWH,NH) +*> \endverbatim +*> +*> \param[in] LDWH +*> \verbatim +*> LDWH is INTEGER +*> Leading dimension of WH just as declared in the +*> calling procedure. LDWH.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> NV is the number of rows in WV agailable for workspace. +*> NV.GE.1. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX array, dimension (LDWV,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> LDWV is the leading dimension of WV as declared in the +*> in the calling subroutine. LDWV.GE.NV. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> +* ===================================================================== + SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, + $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, + $ WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), + $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0e0, 0.0e0 ), + $ ONE = ( 1.0e0, 0.0e0 ) ) + REAL RZERO, RONE + PARAMETER ( RZERO = 0.0e0, RONE = 1.0e0 ) +* .. +* .. Local Scalars .. + COMPLEX ALPHA, BETA, CDUM, REFSUM + REAL H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, + $ SMLNUM, TST1, TST2, ULP + INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL +* .. +* .. Local Arrays .. + COMPLEX VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CLACPY, CLAQR1, CLARFG, CLASET, CTRMM, + $ SLABAD +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== NSHFTS is supposed to be even, but if it is odd, +* . then simply reduce it by one. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL CLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 10 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL CLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ), + $ S( 2*M ), V( 1, M ) ) + ALPHA = V( 1, M ) + CALL CLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL CLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. In the +* . underflow case, try the two-small-subdiagonals +* . trick to try to reinflate the bulge. ==== +* + IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. + $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K) and H(K+2,K). +* . If the fill resulting from the new +* . reflector is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL CLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ), + $ S( 2*M ), VT ) + ALPHA = VT( 1 ) + CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = CONJG( VT( 1 ) )* + $ ( H( K+1, K )+CONJG( VT( 2 ) )* + $ H( K+2, K ) ) +* + IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ + $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + $ ( CABS1( H( K, K ) )+CABS1( H( K+1, + $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. Use +* . the old one with trepidation. ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 10 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL CLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ), + $ S( 2*M22 ), V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 30 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 20 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = CONJG( V( 1, M ) )* + $ ( H( K+1, J )+CONJG( V( 2, M ) )*H( K+2, J )+ + $ CONJG( V( 3, M ) )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 20 CONTINUE + 30 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 40 J = MAX( K+1, KTOP ), JBOT + REFSUM = CONJG( V( 1, M22 ) )* + $ ( H( K+1, J )+CONJG( V( 2, M22 ) )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 40 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 80 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 50 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + H( J, K+3 ) = H( J, K+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 50 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 60 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + U( J, KMS+3 ) = U( J, KMS+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 60 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 70 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M ) ) + Z( J, K+3 ) = Z( J, K+3 ) - + $ REFSUM*CONJG( V( 3, M ) ) + 70 CONTINUE + END IF + END IF + 80 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF ( V( 1, M22 ).NE.ZERO ) THEN + DO 90 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 90 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 100 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ + $ V( 2, M22 )*U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 100 CONTINUE + ELSE IF( WANTZ ) THEN + DO 110 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*CONJG( V( 2, M22 ) ) + 110 CONTINUE + END IF + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 120 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) + IF( TST1.EQ.RZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + CABS1( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + CABS1( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + CABS1( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) + END IF + IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H21 = MIN( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H11 = MAX( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 120 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 130 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*CONJG( V( 2, M ) ) + H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*CONJG( V( 3, M ) ) + 130 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 140 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL CGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL CLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 150 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 160 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 170 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 170 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL CLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21**H ==== +* + CALL CLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL CTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11**H ==== +* + CALL CGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H to bottom of WH ==== +* + CALL CLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21**H ==== +* + CALL CTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL CLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 180 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL CLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL CLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 190 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 200 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL CLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL CLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 200 CONTINUE + END IF + END IF + END IF + 210 CONTINUE +* +* ==== End of CLAQR5 ==== +* + END diff --git a/dspl/liblapack/SRC/claqsb.f b/dspl/liblapack/SRC/claqsb.f new file mode 100644 index 0000000..004645a --- /dev/null +++ b/dspl/liblapack/SRC/claqsb.f @@ -0,0 +1,228 @@ +*> \brief \b CLAQSB scales a symmetric/Hermitian band matrix, using scaling factors computed by spbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQSB equilibrates a symmetric band matrix A using the scaling +*> factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSB +* + END diff --git a/dspl/liblapack/SRC/claqsp.f b/dspl/liblapack/SRC/claqsp.f new file mode 100644 index 0000000..2a962d4 --- /dev/null +++ b/dspl/liblapack/SRC/claqsp.f @@ -0,0 +1,214 @@ +*> \brief \b CLAQSP scales a symmetric/Hermitian matrix in packed storage, using scaling factors computed by sppequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQSP equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSP +* + END diff --git a/dspl/liblapack/SRC/claqsy.f b/dspl/liblapack/SRC/claqsy.f new file mode 100644 index 0000000..6d5f7da --- /dev/null +++ b/dspl/liblapack/SRC/claqsy.f @@ -0,0 +1,218 @@ +*> \brief \b CLAQSY scales a symmetric/Hermitian matrix, using scaling factors computed by spoequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAQSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAQSY equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + SUBROUTINE CLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of CLAQSY +* + END diff --git a/dspl/liblapack/SRC/clar1v.f b/dspl/liblapack/SRC/clar1v.f new file mode 100644 index 0000000..972c82c --- /dev/null +++ b/dspl/liblapack/SRC/clar1v.f @@ -0,0 +1,488 @@ +*> \brief \b CLAR1V computes the (scaled) r-th column of the inverse of the submatrix in rows b1 through bn of the tridiagonal matrix LDLT - λI. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAR1V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, +* PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, +* R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* .. Scalar Arguments .. +* LOGICAL WANTNC +* INTEGER B1, BN, N, NEGCNT, R +* REAL GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, +* $ RQCORR, ZTZ +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ) +* REAL D( * ), L( * ), LD( * ), LLD( * ), +* $ WORK( * ) +* COMPLEX Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAR1V computes the (scaled) r-th column of the inverse of +*> the sumbmatrix in rows B1 through BN of the tridiagonal matrix +*> L D L**T - sigma I. When sigma is close to an eigenvalue, the +*> computed vector is an accurate eigenvector. Usually, r corresponds +*> to the index where the eigenvector is largest in magnitude. +*> The following steps accomplish this computation : +*> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, +*> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, +*> (c) Computation of the diagonal elements of the inverse of +*> L D L**T - sigma I by combining the above transforms, and choosing +*> r as the index where the diagonal of the inverse is (one of the) +*> largest in magnitude. +*> (d) Computation of the (scaled) r-th column of the inverse using the +*> twisted factorization obtained by combining the top part of the +*> the stationary and the bottom part of the progressive transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix L D L**T. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is INTEGER +*> First index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] BN +*> \verbatim +*> BN is INTEGER +*> Last index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is REAL +*> The shift. In order to compute an accurate eigenvector, +*> LAMBDA should be a good approximation to an eigenvalue +*> of L D L**T. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal matrix +*> L, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is REAL array, dimension (N-1) +*> The n-1 elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is REAL array, dimension (N-1) +*> The n-1 elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] GAPTOL +*> \verbatim +*> GAPTOL is REAL +*> Tolerance that indicates when eigenvector entries are negligible +*> w.r.t. their contribution to the residual. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (N) +*> On input, all entries of Z must be set to 0. +*> On output, Z contains the (scaled) r-th column of the +*> inverse. The scaling is such that Z(R) equals 1. +*> \endverbatim +*> +*> \param[in] WANTNC +*> \verbatim +*> WANTNC is LOGICAL +*> Specifies whether NEGCNT has to be computed. +*> \endverbatim +*> +*> \param[out] NEGCNT +*> \verbatim +*> NEGCNT is INTEGER +*> If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin +*> in the matrix factorization L D L**T, and NEGCNT = -1 otherwise. +*> \endverbatim +*> +*> \param[out] ZTZ +*> \verbatim +*> ZTZ is REAL +*> The square of the 2-norm of Z. +*> \endverbatim +*> +*> \param[out] MINGMA +*> \verbatim +*> MINGMA is REAL +*> The reciprocal of the largest (in magnitude) diagonal +*> element of the inverse of L D L**T - sigma I. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization used to +*> compute Z. +*> On input, 0 <= R <= N. If R is input as 0, R is set to +*> the index where (L D L**T - sigma I)^{-1} is largest +*> in magnitude. If 1 <= R <= N, R is unchanged. +*> On output, R contains the twist index used to compute Z. +*> Ideally, R designates the position of the maximum entry in the +*> eigenvector. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension (2) +*> The support of the vector in Z, i.e., the vector Z is +*> nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +*> \endverbatim +*> +*> \param[out] NRMINV +*> \verbatim +*> NRMINV is REAL +*> NRMINV = 1/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RESID +*> \verbatim +*> RESID is REAL +*> The residual of the FP vector. +*> RESID = ABS( MINGMA )/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RQCORR +*> \verbatim +*> RQCORR is REAL +*> The Rayleigh Quotient correction to LAMBDA. +*> RQCORR = MINGMA*TMP +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE CLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, + $ PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, + $ R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTNC + INTEGER B1, BN, N, NEGCNT, R + REAL GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, + $ RQCORR, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + REAL D( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + COMPLEX Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E0, 0.0E0 ) ) + +* .. +* .. Local Scalars .. + LOGICAL SAWNAN1, SAWNAN2 + INTEGER I, INDLPL, INDP, INDS, INDUMN, NEG1, NEG2, R1, + $ R2 + REAL DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + LOGICAL SISNAN + REAL SLAMCH + EXTERNAL SISNAN, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Precision' ) + + + IF( R.EQ.0 ) THEN + R1 = B1 + R2 = BN + ELSE + R1 = R + R2 = R + END IF + +* Storage for LPLUS + INDLPL = 0 +* Storage for UMINUS + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS+B1-1 ) = LLD( B1-1 ) + END IF + +* +* Compute the stationary transform (using the differential form) +* until the index R2. +* + SAWNAN1 = .FALSE. + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 50 I = B1, R1 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 50 CONTINUE + SAWNAN1 = SISNAN( S ) + IF( SAWNAN1 ) GOTO 60 + DO 51 I = R1, R2 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 51 CONTINUE + SAWNAN1 = SISNAN( S ) +* + 60 CONTINUE + IF( SAWNAN1 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 70 I = B1, R1 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 70 CONTINUE + DO 71 I = R1, R2 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 71 CONTINUE + END IF +* +* Compute the progressive transform (using the differential form) +* until the index R1 +* + SAWNAN2 = .FALSE. + NEG2 = 0 + WORK( INDP+BN-1 ) = D( BN ) - LAMBDA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + SAWNAN2 = SISNAN( TMP ) + + IF( SAWNAN2 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG2 = 0 + DO 100 I = BN-1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + IF(ABS(DMINUS).LT.PIVMIN) DMINUS = -PIVMIN + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + IF( TMP.EQ.ZERO ) + $ WORK( INDP+I-1 ) = D( I ) - LAMBDA + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.LT.ZERO ) NEG1 = NEG1 + 1 + IF( WANTNC ) THEN + NEGCNT = NEG1 + NEG2 + ELSE + NEGCNT = -1 + ENDIF + IF( ABS(MINGMA).EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LE.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the FP vector: solve N^T v = e_r +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = CONE + ZTZ = ONE +* +* Compute the FP vector upwards from R +* + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 210 I = R-1, B1, -1 + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GOTO 220 + ENDIF + ZTZ = ZTZ + REAL( Z( I )*Z( I ) ) + 210 CONTINUE + 220 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 230 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GO TO 240 + END IF + ZTZ = ZTZ + REAL( Z( I )*Z( I ) ) + 230 CONTINUE + 240 CONTINUE + ENDIF + +* Compute the FP vector downwards from R in blocks of size BLKSIZ + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 250 I = R, BN-1 + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 260 + END IF + ZTZ = ZTZ + REAL( Z( I+1 )*Z( I+1 ) ) + 250 CONTINUE + 260 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 270 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 280 + END IF + ZTZ = ZTZ + REAL( Z( I+1 )*Z( I+1 ) ) + 270 CONTINUE + 280 CONTINUE + END IF +* +* Compute quantities for convergence test +* + TMP = ONE / ZTZ + NRMINV = SQRT( TMP ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP +* +* + RETURN +* +* End of CLAR1V +* + END diff --git a/dspl/liblapack/SRC/clar2v.f b/dspl/liblapack/SRC/clar2v.f new file mode 100644 index 0000000..8dd1716 --- /dev/null +++ b/dspl/liblapack/SRC/clar2v.f @@ -0,0 +1,169 @@ +*> \brief \b CLAR2V applies a vector of plane rotations with real cosines and complex sines from both sides to a sequence of 2-by-2 symmetric/Hermitian matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAR2V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. +* REAL C( * ) +* COMPLEX S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAR2V applies a vector of complex plane rotations with real cosines +*> from both sides to a sequence of 2-by-2 complex Hermitian matrices, +*> defined by the elements of the vectors x, y and z. For i = 1,2,...,n +*> +*> ( x(i) z(i) ) := +*> ( conjg(z(i)) y(i) ) +*> +*> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) +*> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (1+(N-1)*INCX) +*> The vector x; the elements of x are assumed to be real. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (1+(N-1)*INCX) +*> The vector y; the elements of y are assumed to be real. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (1+(N-1)*INCX) +*> The vector z. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X, Y and Z. INCX > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + REAL CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII, + $ ZIR + COMPLEX SI, T2, T3, T4, ZI +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, CONJG, REAL +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = REAL( X( IX ) ) + YI = REAL( Y( IX ) ) + ZI = Z( IX ) + ZIR = REAL( ZI ) + ZII = AIMAG( ZI ) + CI = C( IC ) + SI = S( IC ) + SIR = REAL( SI ) + SII = AIMAG( SI ) + T1R = SIR*ZIR - SII*ZII + T1I = SIR*ZII + SII*ZIR + T2 = CI*ZI + T3 = T2 - CONJG( SI )*XI + T4 = CONJG( T2 ) + SI*YI + T5 = CI*XI + T1R + T6 = CI*YI - T1R + X( IX ) = CI*T5 + ( SIR*REAL( T4 )+SII*AIMAG( T4 ) ) + Y( IX ) = CI*T6 - ( SIR*REAL( T3 )-SII*AIMAG( T3 ) ) + Z( IX ) = CI*T3 + CONJG( SI )*CMPLX( T6, T1I ) + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of CLAR2V +* + END diff --git a/dspl/liblapack/SRC/clarcm.f b/dspl/liblapack/SRC/clarcm.f new file mode 100644 index 0000000..b22af2d --- /dev/null +++ b/dspl/liblapack/SRC/clarcm.f @@ -0,0 +1,185 @@ +*> \brief \b CLARCM copies all or part of a real two-dimensional array to a complex array. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARCM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), RWORK( * ) +* COMPLEX B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARCM performs a very simple matrix-matrix multiplication: +*> C := A * B, +*> where A is M by M and real; B is M by N and complex; +*> C is M by N and complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A and of the matrix C. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns and rows of the matrix B and +*> the number of columns of the matrix C. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA, M) +*> On entry, A contains the M by M matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >=max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, B contains the M by N matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >=max(1,M). +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On exit, C contains the M by N matrix C. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >=max(1,M). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*M*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), RWORK( * ) + COMPLEX B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, REAL +* .. +* .. External Subroutines .. + EXTERNAL SGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = REAL( B( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = AIMAG( B( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = CMPLX( REAL( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of CLARCM +* + END diff --git a/dspl/liblapack/SRC/clarf.f b/dspl/liblapack/SRC/clarf.f new file mode 100644 index 0000000..72140d5 --- /dev/null +++ b/dspl/liblapack/SRC/clarf.f @@ -0,0 +1,232 @@ +*> \brief \b CLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARF applies a complex elementary reflector H to a complex M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILACLR, ILACLC + EXTERNAL LSAME, ILACLR, ILACLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILACLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILACLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + CALL CGEMV( 'Conjugate transpose', LASTV, LASTC, ONE, + $ C, LDC, V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H +* + CALL CGERC( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL CGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H +* + CALL CGERC( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of CLARF +* + END diff --git a/dspl/liblapack/SRC/clarfb.f b/dspl/liblapack/SRC/clarfb.f new file mode 100644 index 0000000..8fdd5c8 --- /dev/null +++ b/dspl/liblapack/SRC/clarfb.f @@ -0,0 +1,731 @@ +*> \brief \b CLARFB applies a block reflector or its conjugate-transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFB applies a complex block reflector H or its transpose H**H to a +*> complex M-by-N matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2013 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2013 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C1**H +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**H *V2 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK, + $ LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ), + $ LDV, ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C2**H +* + DO 70 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H * V1 +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W**H +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE, + $ C, LDC ) + END IF +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C1**H +* + DO 130 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**H * V2**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2**H * W**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**H +* + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C2**H +* + DO 190 J = 1, K + CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL CLACGV( N, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK, + $ LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H * V1**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, C, + $ LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**H * W**H +* + CALL CGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, V, + $ LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ CONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**H +* + CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK, + $ LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**H +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of CLARFB +* + END diff --git a/dspl/liblapack/SRC/clarfg.f b/dspl/liblapack/SRC/clarfg.f new file mode 100644 index 0000000..d54c227 --- /dev/null +++ b/dspl/liblapack/SRC/clarfg.f @@ -0,0 +1,203 @@ +*> \brief \b CLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFG generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, with beta real, and x is an +*> (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> +*> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH, SLAPY3 + COMPLEX CLADIV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, CLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHR = REAL( ALPHA ) + ALPHI = AIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL CSSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHA = CMPLX( ALPHR, ALPHI ) + BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA ) + CALL CSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of CLARFG +* + END diff --git a/dspl/liblapack/SRC/clarfgp.f b/dspl/liblapack/SRC/clarfgp.f new file mode 100644 index 0000000..19e48cc --- /dev/null +++ b/dspl/liblapack/SRC/clarfgp.f @@ -0,0 +1,272 @@ +*> \brief \b CLARFGP generates an elementary reflector (Householder matrix) with non-negative beta. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFGP generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, beta is real and non-negative, and +*> x is an (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARFGP( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + COMPLEX SAVEALPHA +* .. +* .. External Functions .. + REAL SCNRM2, SLAMCH, SLAPY3, SLAPY2 + COMPLEX CLADIV + EXTERNAL SCNRM2, SLAMCH, SLAPY3, SLAPY2, CLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, REAL, SIGN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHR = REAL( ALPHA ) + ALPHI = AIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. +* + IF( ALPHI.EQ.ZERO ) THEN + IF( ALPHR.GE.ZERO ) THEN +* When TAU.eq.ZERO, the vector is special-cased to be +* all zeros in the application routines. We do not need +* to clear it. + TAU = ZERO + ELSE +* However, the application routines rely on explicit +* zero checks when TAU.ne.ZERO, and we must clear X. + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + ALPHA = -ALPHA + END IF + ELSE +* Only "reflecting" the diagonal entry to be real and non-negative. + XNORM = SLAPY2( ALPHR, ALPHI ) + TAU = CMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + ALPHA = XNORM + END IF + ELSE +* +* general case +* + BETA = SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'E' ) + BIGNUM = ONE / SMLNUM +* + KNT = 0 + IF( ABS( BETA ).LT.SMLNUM ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL CSSCAL( N-1, BIGNUM, X, INCX ) + BETA = BETA*BIGNUM + ALPHI = ALPHI*BIGNUM + ALPHR = ALPHR*BIGNUM + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SMLNUM +* + XNORM = SCNRM2( N-1, X, INCX ) + ALPHA = CMPLX( ALPHR, ALPHI ) + BETA = SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + SAVEALPHA = ALPHA + ALPHA = ALPHA + BETA + IF( BETA.LT.ZERO ) THEN + BETA = -BETA + TAU = -ALPHA / BETA + ELSE + ALPHR = ALPHI * (ALPHI/REAL( ALPHA )) + ALPHR = ALPHR + XNORM * (XNORM/REAL( ALPHA )) + TAU = CMPLX( ALPHR/BETA, -ALPHI/BETA ) + ALPHA = CMPLX( -ALPHR, ALPHI ) + END IF + ALPHA = CLADIV( CMPLX( ONE ), ALPHA ) +* + IF ( ABS(TAU).LE.SMLNUM ) THEN +* +* In the case where the computed TAU ends up being a denormalized number, +* it loses relative accuracy. This is a BIG problem. Solution: flush TAU +* to ZERO (or TWO or whatever makes a nonnegative real number for BETA). +* +* (Bug report provided by Pat Quillen from MathWorks on Jul 29, 2009.) +* (Thanks Pat. Thanks MathWorks.) +* + ALPHR = REAL( SAVEALPHA ) + ALPHI = AIMAG( SAVEALPHA ) + IF( ALPHI.EQ.ZERO ) THEN + IF( ALPHR.GE.ZERO ) THEN + TAU = ZERO + ELSE + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + BETA = -SAVEALPHA + END IF + ELSE + XNORM = SLAPY2( ALPHR, ALPHI ) + TAU = CMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + BETA = XNORM + END IF +* + ELSE +* +* This is the general case. +* + CALL CSCAL( N-1, ALPHA, X, INCX ) +* + END IF +* +* If BETA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SMLNUM + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of CLARFGP +* + END diff --git a/dspl/liblapack/SRC/clarft.f b/dspl/liblapack/SRC/clarft.f new file mode 100644 index 0000000..296467a --- /dev/null +++ b/dspl/liblapack/SRC/clarft.f @@ -0,0 +1,328 @@ +*> \brief \b CLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CGEMV, CTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL CGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, + $ ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of CLARFT +* + END diff --git a/dspl/liblapack/SRC/clarfx.f b/dspl/liblapack/SRC/clarfx.f new file mode 100644 index 0000000..1111c80 --- /dev/null +++ b/dspl/liblapack/SRC/clarfx.f @@ -0,0 +1,699 @@ +*> \brief \b CLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the reflector has order ≤ 10. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARFX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFX applies a complex elementary reflector H to a complex m by n +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix +*> +*> This version uses inline code if H has order < 11. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (M) if SIDE = 'L' +*> or (N) if SIDE = 'R' +*> The vector v in the representation of H. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> WORK is not referenced if H has order < 11. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J + COMPLEX SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* + CALL CLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = CONJG( V( 1 ) ) + T1 = TAU*CONJG( V1 ) + V2 = CONJG( V( 2 ) ) + T2 = TAU*CONJG( V2 ) + V3 = CONJG( V( 3 ) ) + T3 = TAU*CONJG( V3 ) + V4 = CONJG( V( 4 ) ) + T4 = TAU*CONJG( V4 ) + V5 = CONJG( V( 5 ) ) + T5 = TAU*CONJG( V5 ) + V6 = CONJG( V( 6 ) ) + T6 = TAU*CONJG( V6 ) + V7 = CONJG( V( 7 ) ) + T7 = TAU*CONJG( V7 ) + V8 = CONJG( V( 8 ) ) + T8 = TAU*CONJG( V8 ) + V9 = CONJG( V( 9 ) ) + T9 = TAU*CONJG( V9 ) + V10 = CONJG( V( 10 ) ) + T10 = TAU*CONJG( V10 ) + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* + CALL CLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*CONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*CONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*CONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*CONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*CONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*CONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*CONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*CONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*CONJG( V9 ) + V10 = V( 10 ) + T10 = TAU*CONJG( V10 ) + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of CLARFX +* + END diff --git a/dspl/liblapack/SRC/clarfy.f b/dspl/liblapack/SRC/clarfy.f new file mode 100644 index 0000000..a574385 --- /dev/null +++ b/dspl/liblapack/SRC/clarfy.f @@ -0,0 +1,163 @@ +*> \brief \b CLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE CLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n Hermitian matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex_eig +* +* ===================================================================== + SUBROUTINE CLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CHEMV, CHER2 +* .. +* .. External Functions .. + COMPLEX CDOTC + EXTERNAL CDOTC +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL CHEMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*CDOTC( N, WORK, 1, V, INCV ) + CALL CAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL CHER2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of CLARFY +* + END diff --git a/dspl/liblapack/SRC/clargv.f b/dspl/liblapack/SRC/clargv.f new file mode 100644 index 0000000..ba53cae --- /dev/null +++ b/dspl/liblapack/SRC/clargv.f @@ -0,0 +1,299 @@ +*> \brief \b CLARGV generates a vector of plane rotations with real cosines and complex sines. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* REAL C( * ) +* COMPLEX X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARGV generates a vector of complex plane rotations with real +*> cosines, determined by elements of the complex vectors x and y. +*> For i = 1,2,...,n +*> +*> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) +*> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) +*> +*> where c(i)**2 + ABS(s(i))**2 = 1 +*> +*> The following conventions are used (these are the same as in CLARTG, +*> but differ from the BLAS1 routine CROTG): +*> If y(i)=0, then c(i)=1 and s(i)=0. +*> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be generated. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (1+(N-1)*INCX) +*> On entry, the vector x. +*> On exit, x(i) is overwritten by r(i), for i = 1,...,n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (1+(N-1)*INCY) +*> On entry, the vector y. +*> On exit, the sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I, IC, IX, IY, J + REAL CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX F, FF, FS, G, GS, R, SN +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL ABS1, ABSSQ +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) ) + ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2 +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN +* FIRST = .FALSE. + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* END IF + IX = 1 + IY = 1 + IC = 1 + DO 60 I = 1, N + F = X( IX ) + G = Y( IY ) +* +* Use identical algorithm as in CLARTG +* + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + GO TO 50 + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = SLAPY2( REAL( G ), AIMAG( G ) ) +* Do complex/real division explicitly with two real +* divisions + D = SLAPY2( REAL( GS ), AIMAG( GS ) ) + SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D ) + GO TO 50 + END IF + F2S = SLAPY2( REAL( FS ), AIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = SLAPY2( REAL( F ), AIMAG( F ) ) + FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D ) + ELSE + DR = SAFMX2*REAL( F ) + DI = SAFMX2*AIMAG( F ) + D = SLAPY2( DR, DI ) + FF = CMPLX( DR / D, DI / D ) + END IF + SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real +* multiplies + R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D ) + SN = SN*CONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 J = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 J = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + 50 CONTINUE + C( IC ) = CS + Y( IY ) = SN + X( IX ) = R + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 60 CONTINUE + RETURN +* +* End of CLARGV +* + END diff --git a/dspl/liblapack/SRC/clarnv.f b/dspl/liblapack/SRC/clarnv.f new file mode 100644 index 0000000..9b26f4f --- /dev/null +++ b/dspl/liblapack/SRC/clarnv.f @@ -0,0 +1,193 @@ +*> \brief \b CLARNV returns a vector of random numbers from a uniform or normal distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARNV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARNV( IDIST, ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER IDIST, N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARNV returns a vector of n random complex numbers from a uniform or +*> normal distribution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDIST +*> \verbatim +*> IDIST is INTEGER +*> Specifies the distribution of the random numbers: +*> = 1: real and imaginary parts each uniform (0,1) +*> = 2: real and imaginary parts each uniform (-1,1) +*> = 3: real and imaginary parts each normal (0,1) +*> = 4: uniformly distributed on the disc abs(z) < 1 +*> = 5: uniformly distributed on the circle abs(z) = 1 +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine calls the auxiliary routine SLARUV to generate random +*> real numbers from a uniform (0,1) distribution, in batches of up to +*> 128 using vectorisable code. The Box-Muller method is used to +*> transform numbers from a uniform to a normal distribution. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + REAL TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IV +* .. +* .. Local Arrays .. + REAL U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, EXP, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL SLARUV +* .. +* .. Executable Statements .. +* + DO 60 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) +* +* Call SLARUV to generate 2*IL real numbers from a uniform (0,1) +* distribution (2*IL <= LV) +* + CALL SLARUV( ISEED, 2*IL, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = CMPLX( U( 2*I-1 ), U( 2*I ) ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = CMPLX( TWO*U( 2*I-1 )-ONE, + $ TWO*U( 2*I )-ONE ) + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 30 CONTINUE + ELSE IF( IDIST.EQ.4 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit disk +* + DO 40 I = 1, IL + X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* + $ EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 40 CONTINUE + ELSE IF( IDIST.EQ.5 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit circle +* + DO 50 I = 1, IL + X( IV+I-1 ) = EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE + RETURN +* +* End of CLARNV +* + END diff --git a/dspl/liblapack/SRC/clarrv.f b/dspl/liblapack/SRC/clarrv.f new file mode 100644 index 0000000..72fe1f9 --- /dev/null +++ b/dspl/liblapack/SRC/clarrv.f @@ -0,0 +1,1060 @@ +*> \brief \b CLARRV computes the eigenvectors of the tridiagonal matrix T = L D LT given L, D and the eigenvalues of L D LT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARRV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARRV( N, VL, VU, D, L, PIVMIN, +* ISPLIT, M, DOL, DOU, MINRGP, +* RTOL1, RTOL2, W, WERR, WGAP, +* IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER DOL, DOU, INFO, LDZ, M, N +* REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), +* $ ISUPPZ( * ), IWORK( * ) +* REAL D( * ), GERS( * ), L( * ), W( * ), WERR( * ), +* $ WGAP( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARRV computes the eigenvectors of the tridiagonal matrix +*> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. +*> The input eigenvalues should have been computed by SLARRE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> Lower bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> Upper bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the diagonal matrix D. +*> On exit, D may be overwritten. +*> \endverbatim +*> +*> \param[in,out] L +*> \verbatim +*> L is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the unit +*> bidiagonal matrix L are in elements 1 to N-1 of L +*> (if the matrix is not split.) At the end of each block +*> is stored the corresponding shift as given by SLARRE. +*> On exit, L is overwritten. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of input eigenvalues. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] DOL +*> \verbatim +*> DOL is INTEGER +*> \endverbatim +*> +*> \param[in] DOU +*> \verbatim +*> DOU is INTEGER +*> If the user wants to compute only selected eigenvectors from all +*> the eigenvalues supplied, he can specify an index range DOL:DOU. +*> Or else the setting DOL=1, DOU=M should be applied. +*> Note that DOL and DOU refer to the order in which the eigenvalues +*> are stored in W. +*> If the user wants to compute only selected eigenpairs, then +*> the columns DOL-1 to DOU+1 of the eigenvector space Z contain the +*> computed eigenvectors. All other columns of Z are set to zero. +*> \endverbatim +*> +*> \param[in] MINRGP +*> \verbatim +*> MINRGP is REAL +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is REAL +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is REAL +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements of W contain the APPROXIMATE eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block ( The output array +*> W from SLARRE is expected here ). Furthermore, they are with +*> respect to the shift of the corresponding root representation +*> for their block. On exit, W holds the eigenvalues of the +*> UNshifted matrix. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> The first M elements contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue in W +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is REAL array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[in] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is REAL array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should +*> be computed from the original UNshifted matrix. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M) ) +*> If INFO = 0, the first M columns of Z contain the +*> orthonormal eigenvectors of the matrix T +*> corresponding to the input eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The I-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*I-1 ) through +*> ISUPPZ( 2*I ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (12*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> > 0: A problem occurred in CLARRV. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in SLARRB when refining a child's eigenvalues. +*> =-2: Problem in SLARRF when computing the RRR of a child. +*> When a child is inside a tight cluster, it can be difficult +*> to find an RRR. A partial remedy from the user's point of +*> view is to make the parameter MINRGP smaller and recompile. +*> However, as the orthogonality of the computed vectors is +*> proportional to 1/MINRGP, the user should be aware that +*> he might be trading in precision when he decreases MINRGP. +*> =-3: Problem in SLARRB when refining a single eigenvalue +*> after the Rayleigh correction was rejected. +*> = 5: The Rayleigh Quotient Iteration failed to converge to +*> full accuracy in MAXITR steps. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE CLARRV( N, VL, VU, D, L, PIVMIN, + $ ISPLIT, M, DOL, DOU, MINRGP, + $ RTOL1, RTOL2, W, WERR, WGAP, + $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER DOL, DOU, INFO, LDZ, M, N + REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), + $ ISUPPZ( * ), IWORK( * ) + REAL D( * ), GERS( * ), L( * ), W( * ), WERR( * ), + $ WGAP( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 10 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ) ) + REAL ZERO, ONE, TWO, THREE, FOUR, HALF + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, THREE = 3.0E0, + $ FOUR = 4.0E0, HALF = 0.5E0) +* .. +* .. Local Scalars .. + LOGICAL ESKIP, NEEDBS, STP2II, TRYRQC, USEDBS, USEDRQ + INTEGER DONE, I, IBEGIN, IDONE, IEND, II, IINDC1, + $ IINDC2, IINDR, IINDWK, IINFO, IM, IN, INDEIG, + $ INDLD, INDLLD, INDWRK, ISUPMN, ISUPMX, ITER, + $ ITMP1, J, JBLK, K, MINIWSIZE, MINWSIZE, NCLUS, + $ NDEPTH, NEGCNT, NEWCLS, NEWFST, NEWFTT, NEWLST, + $ NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN, OLDLST, + $ OLDNCL, P, PARITY, Q, WBEGIN, WEND, WINDEX, + $ WINDMN, WINDPL, ZFROM, ZTO, ZUSEDL, ZUSEDU, + $ ZUSEDW + INTEGER INDIN1, INDIN2 + REAL BSTRES, BSTW, EPS, FUDGE, GAP, GAPTOL, GL, GU, + $ LAMBDA, LEFT, LGAP, MINGMA, NRMINV, RESID, + $ RGAP, RIGHT, RQCORR, RQTOL, SAVGAP, SGNDEF, + $ SIGMA, SPDIAM, SSIGMA, TAU, TMP, TOL, ZTZ +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLAR1V, CLASET, CSSCAL, SCOPY, SLARRB, + $ SLARRF +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, MAX, MIN + INTRINSIC CMPLX +* .. +* .. Executable Statements .. +* .. + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* The first N entries of WORK are reserved for the eigenvalues + INDLD = N+1 + INDLLD= 2*N+1 + INDIN1 = 3*N + 1 + INDIN2 = 4*N + 1 + INDWRK = 5*N + 1 + MINWSIZE = 12 * N + + DO 5 I= 1,MINWSIZE + WORK( I ) = ZERO + 5 CONTINUE + +* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the +* factorization used to compute the FP vector + IINDR = 0 +* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current +* layer and the one above. + IINDC1 = N + IINDC2 = 2*N + IINDWK = 3*N + 1 + + MINIWSIZE = 7 * N + DO 10 I= 1,MINIWSIZE + IWORK( I ) = 0 + 10 CONTINUE + + ZUSEDL = 1 + IF(DOL.GT.1) THEN +* Set lower bound for use of Z + ZUSEDL = DOL-1 + ENDIF + ZUSEDU = M + IF(DOU.LT.M) THEN +* Set lower bound for use of Z + ZUSEDU = DOU+1 + ENDIF +* The width of the part of Z that is used + ZUSEDW = ZUSEDU - ZUSEDL + 1 + + + CALL CLASET( 'Full', N, ZUSEDW, CZERO, CZERO, + $ Z(1,ZUSEDL), LDZ ) + + EPS = SLAMCH( 'Precision' ) + RQTOL = TWO * EPS +* +* Set expert flags for standard code. + TRYRQC = .TRUE. + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN + ELSE +* Only selected eigenpairs are computed. Since the other evalues +* are not refined by RQ iteration, bisection has to compute to full +* accuracy. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ENDIF + +* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the +* desired eigenvalues. The support of the nonzero eigenvector +* entries is contained in the interval IBEGIN:IEND. +* Remark that if k eigenpairs are desired, then the eigenvectors +* are stored in k contiguous columns of Z. + +* DONE is the number of eigenvectors already computed + DONE = 0 + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, IBLOCK( M ) + IEND = ISPLIT( JBLK ) + SIGMA = L( IEND ) +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. + WEND = WBEGIN - 1 + 15 CONTINUE + IF( WEND.LT.M ) THEN + IF( IBLOCK( WEND+1 ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 15 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 170 + ELSEIF( (WEND.LT.DOL).OR.(WBEGIN.GT.DOU) ) THEN + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + GO TO 170 + END IF + +* Find local spectral diameter of the block + GL = GERS( 2*IBEGIN-1 ) + GU = GERS( 2*IBEGIN ) + DO 20 I = IBEGIN+1 , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 20 CONTINUE + SPDIAM = GU - GL + +* OLDIEN is the last index of the previous block + OLDIEN = IBEGIN - 1 +* Calculate the size of the current block + IN = IEND - IBEGIN + 1 +* The number of eigenvalues in the current block + IM = WEND - WBEGIN + 1 + +* This is for a 1x1 block + IF( IBEGIN.EQ.IEND ) THEN + DONE = DONE+1 + Z( IBEGIN, WBEGIN ) = CMPLX( ONE, ZERO ) + ISUPPZ( 2*WBEGIN-1 ) = IBEGIN + ISUPPZ( 2*WBEGIN ) = IBEGIN + W( WBEGIN ) = W( WBEGIN ) + SIGMA + WORK( WBEGIN ) = W( WBEGIN ) + IBEGIN = IEND + 1 + WBEGIN = WBEGIN + 1 + GO TO 170 + END IF + +* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) +* Note that these can be approximations, in this case, the corresp. +* entries of WERR give the size of the uncertainty interval. +* The eigenvalue approximations will be refined when necessary as +* high relative accuracy is required for the computation of the +* corresponding eigenvectors. + CALL SCOPY( IM, W( WBEGIN ), 1, + $ WORK( WBEGIN ), 1 ) + +* We store in W the eigenvalue approximations w.r.t. the original +* matrix T. + DO 30 I=1,IM + W(WBEGIN+I-1) = W(WBEGIN+I-1)+SIGMA + 30 CONTINUE + + +* NDEPTH is the current depth of the representation tree + NDEPTH = 0 +* PARITY is either 1 or 0 + PARITY = 1 +* NCLUS is the number of clusters for the next level of the +* representation tree, we start with NCLUS = 1 for the root + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IM + +* IDONE is the number of eigenvectors already computed in the current +* block + IDONE = 0 +* loop while( IDONE.LT.IM ) +* generate the representation tree for the current block and +* compute the eigenvectors + 40 CONTINUE + IF( IDONE.LT.IM ) THEN +* This is a crude protection against infinitely deep trees + IF( NDEPTH.GT.M ) THEN + INFO = -2 + RETURN + ENDIF +* breadth first processing of the current level of the representation +* tree: OLDNCL = number of clusters on current level + OLDNCL = NCLUS +* reset NCLUS to count the number of child clusters + NCLUS = 0 +* + PARITY = 1 - PARITY + IF( PARITY.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* Process the clusters on the current level + DO 150 I = 1, OLDNCL + J = OLDCLS + 2*I +* OLDFST, OLDLST = first, last index of current cluster. +* cluster indices start with 1 and are relative +* to WBEGIN when accessing W, WGAP, WERR, Z + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN +* Retrieve relatively robust representation (RRR) of cluster +* that has been computed at the previous level +* The RRR is stored in Z and overwritten once the eigenvectors +* have been computed or when the cluster is refined + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Get representation from location of the leftmost evalue +* of the cluster + J = WBEGIN + OLDFST - 1 + ELSE + IF(WBEGIN+OLDFST-1.LT.DOL) THEN +* Get representation from the left end of Z array + J = DOL - 1 + ELSEIF(WBEGIN+OLDFST-1.GT.DOU) THEN +* Get representation from the right end of Z array + J = DOU + ELSE + J = WBEGIN + OLDFST - 1 + ENDIF + ENDIF + DO 45 K = 1, IN - 1 + D( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1, + $ J ) ) + L( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1, + $ J+1 ) ) + 45 CONTINUE + D( IEND ) = REAL( Z( IEND, J ) ) + SIGMA = REAL( Z( IEND, J+1 ) ) + +* Set the corresponding entries in Z to zero + CALL CLASET( 'Full', IN, 2, CZERO, CZERO, + $ Z( IBEGIN, J), LDZ ) + END IF + +* Compute DL and DLL of current RRR + DO 50 J = IBEGIN, IEND-1 + TMP = D( J )*L( J ) + WORK( INDLD-1+J ) = TMP + WORK( INDLLD-1+J ) = TMP*L( J ) + 50 CONTINUE + + IF( NDEPTH.GT.0 ) THEN +* P and Q are index of the first and last eigenvalue to compute +* within the current block + P = INDEXW( WBEGIN-1+OLDFST ) + Q = INDEXW( WBEGIN-1+OLDLST ) +* Offset for the arrays WORK, WGAP and WERR, i.e., the P-OFFSET +* through the Q-OFFSET elements of these arrays are to be used. +* OFFSET = P-OLDFST + OFFSET = INDEXW( WBEGIN ) - 1 +* perform limited bisection (if necessary) to get approximate +* eigenvalues to the precision needed. + CALL SLARRB( IN, D( IBEGIN ), + $ WORK(INDLLD+IBEGIN-1), + $ P, Q, RTOL1, RTOL2, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN),WERR(WBEGIN), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ PIVMIN, SPDIAM, IN, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* We also recompute the extremal gaps. W holds all eigenvalues +* of the unshifted matrix and must be used for computation +* of WGAP, the entries of WORK might stem from RRRs with +* different shifts. The gaps from WBEGIN-1+OLDFST to +* WBEGIN-1+OLDLST are correctly computed in SLARRB. +* However, we only allow the gaps to become greater since +* this is what should happen when we decrease WERR + IF( OLDFST.GT.1) THEN + WGAP( WBEGIN+OLDFST-2 ) = + $ MAX(WGAP(WBEGIN+OLDFST-2), + $ W(WBEGIN+OLDFST-1)-WERR(WBEGIN+OLDFST-1) + $ - W(WBEGIN+OLDFST-2)-WERR(WBEGIN+OLDFST-2) ) + ENDIF + IF( WBEGIN + OLDLST -1 .LT. WEND ) THEN + WGAP( WBEGIN+OLDLST-1 ) = + $ MAX(WGAP(WBEGIN+OLDLST-1), + $ W(WBEGIN+OLDLST)-WERR(WBEGIN+OLDLST) + $ - W(WBEGIN+OLDLST-1)-WERR(WBEGIN+OLDLST-1) ) + ENDIF +* Each time the eigenvalues in WORK get refined, we store +* the newly found approximation with all shifts applied in W + DO 53 J=OLDFST,OLDLST + W(WBEGIN+J-1) = WORK(WBEGIN+J-1)+SIGMA + 53 CONTINUE + END IF + +* Process the current node. + NEWFST = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST ) THEN +* we are at the right end of the cluster, this is also the +* boundary of the child cluster + NEWLST = J + ELSE IF ( WGAP( WBEGIN + J -1).GE. + $ MINRGP* ABS( WORK(WBEGIN + J -1) ) ) THEN +* the right relative gap is big enough, the child cluster +* (NEWFST,..,NEWLST) is well separated from the following + NEWLST = J + ELSE +* inside a child cluster, the relative gap is not +* big enough. + GOTO 140 + END IF + +* Compute size of child cluster found + NEWSIZ = NEWLST - NEWFST + 1 + +* NEWFTT is the place in Z where the new RRR or the computed +* eigenvector is to be stored + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Store representation at location of the leftmost evalue +* of the cluster + NEWFTT = WBEGIN + NEWFST - 1 + ELSE + IF(WBEGIN+NEWFST-1.LT.DOL) THEN +* Store representation at the left end of Z array + NEWFTT = DOL - 1 + ELSEIF(WBEGIN+NEWFST-1.GT.DOU) THEN +* Store representation at the right end of Z array + NEWFTT = DOU + ELSE + NEWFTT = WBEGIN + NEWFST - 1 + ENDIF + ENDIF + + IF( NEWSIZ.GT.1) THEN +* +* Current child is not a singleton but a cluster. +* Compute and store new representation of child. +* +* +* Compute left and right cluster gap. +* +* LGAP and RGAP are not computed from WORK because +* the eigenvalue approximations may stem from RRRs +* different shifts. However, W hold all eigenvalues +* of the unshifted matrix. Still, the entries in WGAP +* have to be computed from WORK since the entries +* in W might be of the same order so that gaps are not +* exhibited correctly for very close eigenvalues. + IF( NEWFST.EQ.1 ) THEN + LGAP = MAX( ZERO, + $ W(WBEGIN)-WERR(WBEGIN) - VL ) + ELSE + LGAP = WGAP( WBEGIN+NEWFST-2 ) + ENDIF + RGAP = WGAP( WBEGIN+NEWLST-1 ) +* +* Compute left- and rightmost eigenvalue of child +* to high precision in order to shift as close +* as possible and obtain as large relative gaps +* as possible +* + DO 55 K =1,2 + IF(K.EQ.1) THEN + P = INDEXW( WBEGIN-1+NEWFST ) + ELSE + P = INDEXW( WBEGIN-1+NEWLST ) + ENDIF + OFFSET = INDEXW( WBEGIN ) - 1 + CALL SLARRB( IN, D(IBEGIN), + $ WORK( INDLLD+IBEGIN-1 ),P,P, + $ RQTOL, RQTOL, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ IN, IINFO ) + 55 CONTINUE +* + IF((WBEGIN+NEWLST-1.LT.DOL).OR. + $ (WBEGIN+NEWFST-1.GT.DOU)) THEN +* if the cluster contains no desired eigenvalues +* skip the computation of that branch of the rep. tree +* +* We could skip before the refinement of the extremal +* eigenvalues of the child, but then the representation +* tree could be different from the one when nothing is +* skipped. For this reason we skip at this place. + IDONE = IDONE + NEWLST - NEWFST + 1 + GOTO 139 + ENDIF +* +* Compute RRR of child cluster. +* Note that the new RRR is stored in Z +* +* SLARRF needs LWORK = 2*N + CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ NEWFST, NEWLST, WORK(WBEGIN), + $ WGAP(WBEGIN), WERR(WBEGIN), + $ SPDIAM, LGAP, RGAP, PIVMIN, TAU, + $ WORK( INDIN1 ), WORK( INDIN2 ), + $ WORK( INDWRK ), IINFO ) +* In the complex case, SLARRF cannot write +* the new RRR directly into Z and needs an intermediate +* workspace + DO 56 K = 1, IN-1 + Z( IBEGIN+K-1, NEWFTT ) = + $ CMPLX( WORK( INDIN1+K-1 ), ZERO ) + Z( IBEGIN+K-1, NEWFTT+1 ) = + $ CMPLX( WORK( INDIN2+K-1 ), ZERO ) + 56 CONTINUE + Z( IEND, NEWFTT ) = + $ CMPLX( WORK( INDIN1+IN-1 ), ZERO ) + IF( IINFO.EQ.0 ) THEN +* a new RRR for the cluster was found by SLARRF +* update shift and store it + SSIGMA = SIGMA + TAU + Z( IEND, NEWFTT+1 ) = CMPLX( SSIGMA, ZERO ) +* WORK() are the midpoints and WERR() the semi-width +* Note that the entries in W are unchanged. + DO 116 K = NEWFST, NEWLST + FUDGE = + $ THREE*EPS*ABS(WORK(WBEGIN+K-1)) + WORK( WBEGIN + K - 1 ) = + $ WORK( WBEGIN + K - 1) - TAU + FUDGE = FUDGE + + $ FOUR*EPS*ABS(WORK(WBEGIN+K-1)) +* Fudge errors + WERR( WBEGIN + K - 1 ) = + $ WERR( WBEGIN + K - 1 ) + FUDGE +* Gaps are not fudged. Provided that WERR is small +* when eigenvalues are close, a zero gap indicates +* that a new representation is needed for resolving +* the cluster. A fudge could lead to a wrong decision +* of judging eigenvalues 'separated' which in +* reality are not. This could have a negative impact +* on the orthogonality of the computed eigenvectors. + 116 CONTINUE + + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFST + IWORK( K ) = NEWLST + ELSE + INFO = -2 + RETURN + ENDIF + ELSE +* +* Compute eigenvector of singleton +* + ITER = 0 +* + TOL = FOUR * LOG(REAL(IN)) * EPS +* + K = NEWFST + WINDEX = WBEGIN + K - 1 + WINDMN = MAX(WINDEX - 1,1) + WINDPL = MIN(WINDEX + 1,M) + LAMBDA = WORK( WINDEX ) + DONE = DONE + 1 +* Check if eigenvector computation is to be skipped + IF((WINDEX.LT.DOL).OR. + $ (WINDEX.GT.DOU)) THEN + ESKIP = .TRUE. + GOTO 125 + ELSE + ESKIP = .FALSE. + ENDIF + LEFT = WORK( WINDEX ) - WERR( WINDEX ) + RIGHT = WORK( WINDEX ) + WERR( WINDEX ) + INDEIG = INDEXW( WINDEX ) +* Note that since we compute the eigenpairs for a child, +* all eigenvalue approximations are w.r.t the same shift. +* In this case, the entries in WORK should be used for +* computing the gaps since they exhibit even very small +* differences in the eigenvalues, as opposed to the +* entries in W which might "look" the same. + + IF( K .EQ. 1) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VL, the formula +* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) +* can lead to an overestimation of the left gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small left gap. + LGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + LGAP = WGAP(WINDMN) + ENDIF + IF( K .EQ. IM) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VU, the formula +* can lead to an overestimation of the right gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small right gap. + RGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + RGAP = WGAP(WINDEX) + ENDIF + GAP = MIN( LGAP, RGAP ) + IF(( K .EQ. 1).OR.(K .EQ. IM)) THEN +* The eigenvector support can become wrong +* because significant entries could be cut off due to a +* large GAPTOL parameter in LAR1V. Prevent this. + GAPTOL = ZERO + ELSE + GAPTOL = GAP * EPS + ENDIF + ISUPMN = IN + ISUPMX = 1 +* Update WGAP so that it holds the minimum gap +* to the left or the right. This is crucial in the +* case where bisection is used to ensure that the +* eigenvalue is refined up to the required precision. +* The correct value is restored afterwards. + SAVGAP = WGAP(WINDEX) + WGAP(WINDEX) = GAP +* We want to use the Rayleigh Quotient Correction +* as often as possible since it converges quadratically +* when we are close enough to the desired eigenvalue. +* However, the Rayleigh Quotient can have the wrong sign +* and lead us away from the desired eigenvalue. In this +* case, the best we can do is to use bisection. + USEDBS = .FALSE. + USEDRQ = .FALSE. +* Bisection is initially turned off unless it is forced + NEEDBS = .NOT.TRYRQC + 120 CONTINUE +* Check if bisection should be used to refine eigenvalue + IF(NEEDBS) THEN +* Take the bisection as new iterate + USEDBS = .TRUE. + ITMP1 = IWORK( IINDR+WINDEX ) + OFFSET = INDEXW( WBEGIN ) - 1 + CALL SLARRB( IN, D(IBEGIN), + $ WORK(INDLLD+IBEGIN-1),INDEIG,INDEIG, + $ ZERO, TWO*EPS, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ ITMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -3 + RETURN + ENDIF + LAMBDA = WORK( WINDEX ) +* Reset twist index from inaccurate LAMBDA to +* force computation of true MINGMA + IWORK( IINDR+WINDEX ) = 0 + ENDIF +* Given LAMBDA, compute the eigenvector. + CALL CLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + IF(ITER .EQ. 0) THEN + BSTRES = RESID + BSTW = LAMBDA + ELSEIF(RESID.LT.BSTRES) THEN + BSTRES = RESID + BSTW = LAMBDA + ENDIF + ISUPMN = MIN(ISUPMN,ISUPPZ( 2*WINDEX-1 )) + ISUPMX = MAX(ISUPMX,ISUPPZ( 2*WINDEX )) + ITER = ITER + 1 + +* sin alpha <= |resid|/gap +* Note that both the residual and the gap are +* proportional to the matrix, so ||T|| doesn't play +* a role in the quotient + +* +* Convergence test for Rayleigh-Quotient iteration +* (omitted when Bisection has been used) +* + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ RQTOL*ABS( LAMBDA ) .AND. .NOT. USEDBS) + $ THEN +* We need to check that the RQCORR update doesn't +* move the eigenvalue away from the desired one and +* towards a neighbor. -> protection with bisection + IF(INDEIG.LE.NEGCNT) THEN +* The wanted eigenvalue lies to the left + SGNDEF = -ONE + ELSE +* The wanted eigenvalue lies to the right + SGNDEF = ONE + ENDIF +* We only use the RQCORR if it improves the +* the iterate reasonably. + IF( ( RQCORR*SGNDEF.GE.ZERO ) + $ .AND.( LAMBDA + RQCORR.LE. RIGHT) + $ .AND.( LAMBDA + RQCORR.GE. LEFT) + $ ) THEN + USEDRQ = .TRUE. +* Store new midpoint of bisection interval in WORK + IF(SGNDEF.EQ.ONE) THEN +* The current LAMBDA is on the left of the true +* eigenvalue + LEFT = LAMBDA +* We prefer to assume that the error estimate +* is correct. We could make the interval not +* as a bracket but to be modified if the RQCORR +* chooses to. In this case, the RIGHT side should +* be modified as follows: +* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) + ELSE +* The current LAMBDA is on the right of the true +* eigenvalue + RIGHT = LAMBDA +* See comment about assuming the error estimate is +* correct above. +* LEFT = MIN(LEFT, LAMBDA + RQCORR) + ENDIF + WORK( WINDEX ) = + $ HALF * (RIGHT + LEFT) +* Take RQCORR since it has the correct sign and +* improves the iterate reasonably + LAMBDA = LAMBDA + RQCORR +* Update width of error interval + WERR( WINDEX ) = + $ HALF * (RIGHT-LEFT) + ELSE + NEEDBS = .TRUE. + ENDIF + IF(RIGHT-LEFT.LT.RQTOL*ABS(LAMBDA)) THEN +* The eigenvalue is computed to bisection accuracy +* compute eigenvector and stop + USEDBS = .TRUE. + GOTO 120 + ELSEIF( ITER.LT.MAXITR ) THEN + GOTO 120 + ELSEIF( ITER.EQ.MAXITR ) THEN + NEEDBS = .TRUE. + GOTO 120 + ELSE + INFO = 5 + RETURN + END IF + ELSE + STP2II = .FALSE. + IF(USEDRQ .AND. USEDBS .AND. + $ BSTRES.LE.RESID) THEN + LAMBDA = BSTW + STP2II = .TRUE. + ENDIF + IF (STP2II) THEN +* improve error angle by second step + CALL CLAR1V( IN, 1, IN, LAMBDA, + $ D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), + $ ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + ENDIF + WORK( WINDEX ) = LAMBDA + END IF +* +* Compute FP-vector support w.r.t. whole matrix +* + ISUPPZ( 2*WINDEX-1 ) = ISUPPZ( 2*WINDEX-1 )+OLDIEN + ISUPPZ( 2*WINDEX ) = ISUPPZ( 2*WINDEX )+OLDIEN + ZFROM = ISUPPZ( 2*WINDEX-1 ) + ZTO = ISUPPZ( 2*WINDEX ) + ISUPMN = ISUPMN + OLDIEN + ISUPMX = ISUPMX + OLDIEN +* Ensure vector is ok if support in the RQI has changed + IF(ISUPMN.LT.ZFROM) THEN + DO 122 II = ISUPMN,ZFROM-1 + Z( II, WINDEX ) = ZERO + 122 CONTINUE + ENDIF + IF(ISUPMX.GT.ZTO) THEN + DO 123 II = ZTO+1,ISUPMX + Z( II, WINDEX ) = ZERO + 123 CONTINUE + ENDIF + CALL CSSCAL( ZTO-ZFROM+1, NRMINV, + $ Z( ZFROM, WINDEX ), 1 ) + 125 CONTINUE +* Update W + W( WINDEX ) = LAMBDA+SIGMA +* Recompute the gaps on the left and right +* But only allow them to become larger and not +* smaller (which can only happen through "bad" +* cancellation and doesn't reflect the theory +* where the initial gaps are underestimated due +* to WERR being too crude.) + IF(.NOT.ESKIP) THEN + IF( K.GT.1) THEN + WGAP( WINDMN ) = MAX( WGAP(WINDMN), + $ W(WINDEX)-WERR(WINDEX) + $ - W(WINDMN)-WERR(WINDMN) ) + ENDIF + IF( WINDEX.LT.WEND ) THEN + WGAP( WINDEX ) = MAX( SAVGAP, + $ W( WINDPL )-WERR( WINDPL ) + $ - W( WINDEX )-WERR( WINDEX) ) + ENDIF + ENDIF + IDONE = IDONE + 1 + ENDIF +* here ends the code for the current child +* + 139 CONTINUE +* Proceed to any remaining child nodes + NEWFST = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* End of CLARRV +* + END diff --git a/dspl/liblapack/SRC/clarscl2.f b/dspl/liblapack/SRC/clarscl2.f new file mode 100644 index 0000000..e7c1c21 --- /dev/null +++ b/dspl/liblapack/SRC/clarscl2.f @@ -0,0 +1,122 @@ +*> \brief \b CLARSCL2 performs reciprocal diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARSCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARSCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* COMPLEX X( LDX, * ) +* REAL D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> x <-- inv(D) * x +*> where the REAL diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_cge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLARSCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + COMPLEX X( LDX, * ) + REAL D( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) / D( I ) + END DO + END DO + + RETURN + END + diff --git a/dspl/liblapack/SRC/clartg.f b/dspl/liblapack/SRC/clartg.f new file mode 100644 index 0000000..da9a1cd --- /dev/null +++ b/dspl/liblapack/SRC/clartg.f @@ -0,0 +1,250 @@ +*> \brief \b CLARTG generates a plane rotation with real cosine and complex sine. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARTG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARTG( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* REAL CS +* COMPLEX F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARTG generates a plane rotation so that +*> +*> [ CS SN ] [ F ] [ R ] +*> [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a faster version of the BLAS1 routine CROTG, except for +*> the following differences: +*> F and G are unchanged on return. +*> If G=0, then CS=1 and SN=0. +*> If F=0, then CS=0 and SN is chosen so that R is real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is COMPLEX +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is COMPLEX +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is REAL +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is COMPLEX +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is COMPLEX +*> The nonzero component of the rotated vector. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL CS + COMPLEX F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + REAL D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX FF, FS, GS +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + LOGICAL SISNAN + EXTERNAL SLAMCH, SLAPY2, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL, + $ SQRT +* .. +* .. Statement Functions .. + REAL ABS1, ABSSQ +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) ) + ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO.OR.SISNAN( ABS( G ) ) ) THEN + CS = ONE + SN = CZERO + R = F + RETURN + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = SLAPY2( REAL( G ), AIMAG( G ) ) +* Do complex/real division explicitly with two real divisions + D = SLAPY2( REAL( GS ), AIMAG( GS ) ) + SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D ) + RETURN + END IF + F2S = SLAPY2( REAL( FS ), AIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = SLAPY2( REAL( F ), AIMAG( F ) ) + FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D ) + ELSE + DR = SAFMX2*REAL( F ) + DI = SAFMX2*AIMAG( F ) + D = SLAPY2( DR, DI ) + FF = CMPLX( DR / D, DI / D ) + END IF + SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real multiplies + R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D ) + SN = SN*CONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 I = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 I = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of CLARTG +* + END diff --git a/dspl/liblapack/SRC/clartv.f b/dspl/liblapack/SRC/clartv.f new file mode 100644 index 0000000..c366c7d --- /dev/null +++ b/dspl/liblapack/SRC/clartv.f @@ -0,0 +1,150 @@ +*> \brief \b CLARTV applies a vector of plane rotations with real cosines and complex sines to the elements of a pair of vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARTV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* REAL C( * ) +* COMPLEX S( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARTV applies a vector of complex plane rotations with real cosines +*> to elements of the complex vectors x and y. For i = 1,2,...,n +*> +*> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +*> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension (1+(N-1)*INCY) +*> The vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ) + COMPLEX S( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + COMPLEX XI, YI +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - CONJG( S( IC ) )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of CLARTV +* + END diff --git a/dspl/liblapack/SRC/clarz.f b/dspl/liblapack/SRC/clarz.f new file mode 100644 index 0000000..b7fde54 --- /dev/null +++ b/dspl/liblapack/SRC/clarz.f @@ -0,0 +1,241 @@ +*> \brief \b CLARZ applies an elementary reflector (as returned by stzrzf) to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, L, LDC, M, N +* COMPLEX TAU +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARZ applies a complex elementary reflector H to a complex +*> M-by-N matrix C, from either the left or the right. H is represented +*> in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> +*> H is a product of k elementary reflectors as returned by CTZRZF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of entries of the vector V containing +*> the meaningful part of the Householder vectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (1+(L-1)*abs(INCV)) +*> The vector v in the representation of H as returned by +*> CTZRZF. V is not used if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + COMPLEX TAU +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = conjg( C( 1, 1:n ) ) +* + CALL CCOPY( N, C, LDC, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )**H * v( 1:l ) ) +* + CALL CGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ), + $ LDC, V, INCV, ONE, WORK, 1 ) + CALL CLACGV( N, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL CAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )**H +* + CALL CGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL CCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL CGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL CAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )**H +* + CALL CGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of CLARZ +* + END diff --git a/dspl/liblapack/SRC/clarzb.f b/dspl/liblapack/SRC/clarzb.f new file mode 100644 index 0000000..ffa8842 --- /dev/null +++ b/dspl/liblapack/SRC/clarzb.f @@ -0,0 +1,337 @@ +*> \brief \b CLARZB applies a block reflector or its conjugate-transpose to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARZB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, +* LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARZB applies a complex block reflector H or its transpose H**H +*> to a complex distributed M-by-N C from the left or the right. +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise (not supported yet) +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix V containing the +*> meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,NV). +*> If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CLACGV, CTRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )**H +* + DO 10 J = 1, K + CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )**H * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Conjugate transpose', N, K, L, + $ ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, + $ LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T**T or W( 1:m, 1:k ) * T +* + CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )**H +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )**H * W( 1:n, 1:k )**H +* + IF( L.GT.0 ) + $ CALL CGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )**H +* + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T ) or +* W( 1:m, 1:k ) * T**H +* + DO 50 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 50 CONTINUE + CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) + DO 60 J = 1, K + CALL CLACGV( K-J+1, T( J, J ), 1 ) + 60 CONTINUE +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 80 J = 1, K + DO 70 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 70 CONTINUE + 80 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) +* + DO 90 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 90 CONTINUE + IF( L.GT.0 ) + $ CALL CGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) + DO 100 J = 1, L + CALL CLACGV( K, V( 1, J ), 1 ) + 100 CONTINUE +* + END IF +* + RETURN +* +* End of CLARZB +* + END diff --git a/dspl/liblapack/SRC/clarzt.f b/dspl/liblapack/SRC/clarzt.f new file mode 100644 index 0000000..6bea5d4 --- /dev/null +++ b/dspl/liblapack/SRC/clarzt.f @@ -0,0 +1,266 @@ +*> \brief \b CLARZT forms the triangular factor T of a block reflector H = I - vtvH. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLARZT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLARZT forms the triangular factor T of a complex block reflector +*> H of order > n, which is defined as a product of k elementary +*> reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise (not supported yet) +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> ______V_____ +*> ( v1 v2 v3 ) / \ +*> ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +*> V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +*> ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +*> ( v1 v2 v3 ) +*> . . . +*> . . . +*> 1 . . +*> 1 . +*> 1 +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> ______V_____ +*> 1 / \ +*> . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +*> . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +*> . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +*> . . . +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> V = ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CTRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)**H +* + CALL CLACGV( N, V( I, 1 ), LDV ) + CALL CGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL CLACGV( N, V( I, 1 ), LDV ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of CLARZT +* + END diff --git a/dspl/liblapack/SRC/clascl.f b/dspl/liblapack/SRC/clascl.f new file mode 100644 index 0000000..b760bab --- /dev/null +++ b/dspl/liblapack/SRC/clascl.f @@ -0,0 +1,368 @@ +*> \brief \b CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* REAL CFROM, CTO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASCL multiplies the M by N complex matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See CGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is REAL +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is REAL +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); +*> TYPE = 'B', LDA >= KL+1; +*> TYPE = 'Q', LDA >= KU+1; +*> TYPE = 'Z', LDA >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + REAL SLAMCH + EXTERNAL LSAME, SLAMCH, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. SISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( SISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CLASCL +* + END diff --git a/dspl/liblapack/SRC/clascl2.f b/dspl/liblapack/SRC/clascl2.f new file mode 100644 index 0000000..d3a5b91 --- /dev/null +++ b/dspl/liblapack/SRC/clascl2.f @@ -0,0 +1,122 @@ +*> \brief \b CLASCL2 performs diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASCL2 performs a diagonal scaling on a vector: +*> x <-- D * x +*> where the diagonal REAL matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_cge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CLASCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) * D( I ) + END DO + END DO + + RETURN + END + diff --git a/dspl/liblapack/SRC/claset.f b/dspl/liblapack/SRC/claset.f new file mode 100644 index 0000000..fe49fd5 --- /dev/null +++ b/dspl/liblapack/SRC/claset.f @@ -0,0 +1,184 @@ +*> \brief \b CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASET + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASET initializes a 2-D array A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set. The lower triangle +*> is unchanged. +*> = 'L': Lower triangular part is set. The upper triangle +*> is unchanged. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of A. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> All the offdiagonal array elements are set to ALPHA. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX +*> All the diagonal array elements are set to BETA. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +*> A(i,i) = BETA , 1 <= i <= min(m,n) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of CLASET +* + END diff --git a/dspl/liblapack/SRC/clasr.f b/dspl/liblapack/SRC/clasr.f new file mode 100644 index 0000000..8c6216b --- /dev/null +++ b/dspl/liblapack/SRC/clasr.f @@ -0,0 +1,439 @@ +*> \brief \b CLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL C( * ), S( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASR applies a sequence of real plane rotations to a complex matrix +*> A, from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP + COMPLEX TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of CLASR +* + END diff --git a/dspl/liblapack/SRC/classq.f b/dspl/liblapack/SRC/classq.f new file mode 100644 index 0000000..2839859 --- /dev/null +++ b/dspl/liblapack/SRC/classq.f @@ -0,0 +1,168 @@ +*> \brief \b CLASSQ updates a sum of squares represented in scaled form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASSQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASSQ returns the values scl and ssq such that +*> +*> ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +*> +*> where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +*> assumed to be at least unity and the value of ssq will then satisfy +*> +*> 1.0 .le. ssq .le. ( sumsq + 2*n ). +*> +*> scale is assumed to be non-negative and scl returns the value +*> +*> scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +*> i +*> +*> scale and sumsq must be supplied in SCALE and SUMSQ respectively. +*> SCALE and SUMSQ are overwritten by scl and ssq respectively. +*> +*> The routine makes only one pass through the vector X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements to be used from the vector X. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> The vector x as described above. +*> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> INCX > 0. +*> \endverbatim +*> +*> \param[in,out] SCALE +*> \verbatim +*> SCALE is REAL +*> On entry, the value scale in the equation above. +*> On exit, SCALE is overwritten with the value scl . +*> \endverbatim +*> +*> \param[in,out] SUMSQ +*> \verbatim +*> SUMSQ is REAL +*> On entry, the value sumsq in the equation above. +*> On exit, SUMSQ is overwritten with the value ssq . +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL TEMP1 +* .. +* .. External Functions .. + LOGICAL SISNAN + EXTERNAL SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + TEMP1 = ABS( REAL( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.SISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + TEMP1 = ABS( AIMAG( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.SISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 .OR. SISNAN( TEMP1 ) ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of CLASSQ +* + END diff --git a/dspl/liblapack/SRC/claswlq.f b/dspl/liblapack/SRC/claswlq.f new file mode 100644 index 0000000..5fa2276 --- /dev/null +++ b/dspl/liblapack/SRC/claswlq.f @@ -0,0 +1,262 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASWLQ computes a blocked Short-Wide LQ factorization of a +*> M-by-N matrix A, where N >= M: +*> A = L * Q +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal +*> of the array contain the N-by-N lower triangular matrix L; +*> the elements above the diagonal represent Q by the rows +*> of blocked V (see Further Details). +*> +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((N-M)/(NB-M)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= MB*M. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, + $ INFO) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ), T( LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL CGELQT, CTPLQT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXTERNAL FUNCTIONS .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.LT.M ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.M .AND. M.GT.0 )) THEN + INFO = -3 + ELSE IF( NB.LE.M ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -8 + ELSE IF( ( LWORK.LT.M*MB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = MB*M + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLASWLQ', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF((M.GE.N).OR.(NB.LE.M).OR.(NB.GE.N)) THEN + CALL CGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF +* + KK = MOD((N-M),(NB-M)) + II=N-KK+1 +* +* Compute the LQ factorization of the first block A(1:M,1:NB) +* + CALL CGELQT( M, NB, MB, A(1,1), LDA, T, LDT, WORK, INFO) + CTR = 1 +* + DO I = NB+1, II-NB+M , (NB-M) +* +* Compute the QR factorization of the current block A(1:M,I:I+NB-M) +* + CALL CTPLQT( M, NB-M, 0, MB, A(1,1), LDA, A( 1, I ), + $ LDA, T(1,CTR*M+1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(1:M,II:N) +* + IF (II.LE.N) THEN + CALL CTPLQT( M, KK, 0, MB, A(1,1), LDA, A( 1, II ), + $ LDA, T(1,CTR*M+1), LDT, + $ WORK, INFO ) + END IF +* + WORK( 1 ) = M * MB + RETURN +* +* End of CLASWLQ +* + END diff --git a/dspl/liblapack/SRC/claswp.f b/dspl/liblapack/SRC/claswp.f new file mode 100644 index 0000000..9f8a2dc --- /dev/null +++ b/dspl/liblapack/SRC/claswp.f @@ -0,0 +1,193 @@ +*> \brief \b CLASWP performs a series of row interchanges on a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASWP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASWP performs a series of row interchanges on the matrix A. +*> One row interchange is initiated for each of rows K1 through K2 of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the matrix of column dimension N to which the row +*> interchanges will be applied. +*> On exit, the permuted matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> \endverbatim +*> +*> \param[in] K1 +*> \verbatim +*> K1 is INTEGER +*> The first element of IPIV for which a row interchange will +*> be done. +*> \endverbatim +*> +*> \param[in] K2 +*> \verbatim +*> K2 is INTEGER +*> (K2-K1+1) is the number of elements of IPIV for which a row +*> interchange will be done. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) +*> The vector of pivot indices. Only the elements in positions +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of IPIV. If INCX +*> is negative, the pivots are applied in reverse order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by +*> R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + COMPLEX TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = K1 + ( K1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of CLASWP +* + END diff --git a/dspl/liblapack/SRC/clasyf.f b/dspl/liblapack/SRC/clasyf.f new file mode 100644 index 0000000..ade36f9 --- /dev/null +++ b/dspl/liblapack/SRC/clasyf.f @@ -0,0 +1,832 @@ +*> \brief \b CLASYF computes a partial factorization of a complex symmetric matrix using the Bunch-Kaufman diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASYF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASYF computes a partial factorization of a complex symmetric matrix +*> A using the Bunch-Kaufman diagonal pivoting method. The partial +*> factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**T denotes the transpose of U. +*> +*> CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + IF( KP.GT.1 ) + $ CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored. +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = CONE / A( K, K ) + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored. +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = CONE / ( D11*D22-CONE ) +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + D21 = T / D21 + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL CCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL CCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + IF( KP.LT.N ) + $ CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLASYF +* + END diff --git a/dspl/liblapack/SRC/clasyf_aa.f b/dspl/liblapack/SRC/clasyf_aa.f new file mode 100644 index 0000000..1bc96ee --- /dev/null +++ b/dspl/liblapack/SRC/clasyf_aa.f @@ -0,0 +1,493 @@ +*> \brief \b CLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a complex symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by CSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (M) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + COMPLEX PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ILAENV + EXTERNAL LSAME, ILAENV, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CSCAL, CCOPY, CSWAP, CLASET, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from CSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:M, i) into WORK +* + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) +* + ALPHA = -A( K-1, J ) + CALL CAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL CAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) +* + CALL CSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J:M, J), +* + CALL CCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL CSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL CLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from CSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL CGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:M, J) into WORK +* + CALL CCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL CAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL CAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = ICAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL CSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) +* + CALL CSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL CSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL CSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J+1:M, J), +* + CALL CCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL CCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL CSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL CLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of CLASYF_AA +* + END diff --git a/dspl/liblapack/SRC/clasyf_rk.f b/dspl/liblapack/SRC/clasyf_rk.f new file mode 100644 index 0000000..0700c5c --- /dev/null +++ b/dspl/liblapack/SRC/clasyf_rk.f @@ -0,0 +1,974 @@ +*> \brief \b CLASYF_RK computes a partial factorization of a complex symmetric indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASYF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CLASYF_RK computes a partial factorization of a complex symmetric +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> CLASYF_RK is an auxiliary routine called by CSYTRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, K, KK, KW, KKW, + $ KP, KSTEP, P, II + REAL ABSAKK, ALPHA, COLMAX, ROWMAX, SFMIN, STEMP + COMPLEX D11, D12, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(CABS1( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL CCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL CCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL CSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL CSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL CSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = CONE / ( D11*D22-CONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = CZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), + $ LDW, CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL CCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL CCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = CABS1( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( CABS1( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL CCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL CCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL CSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL CSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL CCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL CSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = CZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF +* + RETURN +* +* End of CLASYF_RK +* + END diff --git a/dspl/liblapack/SRC/clasyf_rook.f b/dspl/liblapack/SRC/clasyf_rook.f new file mode 100644 index 0000000..b83f541 --- /dev/null +++ b/dspl/liblapack/SRC/clasyf_rook.f @@ -0,0 +1,900 @@ +*> \brief \b CLASYF_ROOK computes a partial factorization of a complex symmetric matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLASYF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLASYF_ROOK computes a partial factorization of a complex symmetric +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> CLASYF_ROOK is an auxiliary routine called by CSYTRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, JP1, JP2, K, KK, + $ KW, KKW, KP, KSTEP, P, II + REAL ABSAKK, ALPHA, COLMAX, ROWMAX, STEMP, SFMIN + COMPLEX D11, D12, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMM, CGEMV, CSCAL, CSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL CCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL CCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL CGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(CABS1( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL CCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL CCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL CSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL CSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL CCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL CSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = CONE / ( D11*D22-CONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL CSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = J - 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL CSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL CCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL CCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = CABS1( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( CABS1( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL CCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL CCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL CSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL CSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL CCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL CCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL CSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL CSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = J + 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL CSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of CLASYF_ROOK +* + END diff --git a/dspl/liblapack/SRC/clatbs.f b/dspl/liblapack/SRC/clatbs.f new file mode 100644 index 0000000..3bb7bff --- /dev/null +++ b/dspl/liblapack/SRC/clatbs.f @@ -0,0 +1,998 @@ +*> \brief \b CLATBS solves a triangular banded system of equations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATBS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, +* SCALE, CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, KD, LDAB, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL CNORM( * ) +* COMPLEX AB( LDAB, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATBS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular band matrix. Here A**T denotes the transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine CTBSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of subdiagonals or superdiagonals in the +*> triangular matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, CTBSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTBSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call CTBSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX AB( LDAB, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTBSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SCASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SCASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTBSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL CAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL CAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTU( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTU( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 120 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 120 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 130 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = CDOTC( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = CDOTC( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 160 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( KD+I-JLEN, J ) )* + $ USCAL )*X( J-JLEN-1+I ) + 160 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 170 I = 1, JLEN + CSUMJ = CSUMJ + ( CONJG( AB( I+1, J ) )*USCAL )* + $ X( J+I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = CONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATBS +* + END diff --git a/dspl/liblapack/SRC/clatdf.f b/dspl/liblapack/SRC/clatdf.f new file mode 100644 index 0000000..357f664 --- /dev/null +++ b/dspl/liblapack/SRC/clatdf.f @@ -0,0 +1,323 @@ +*> \brief \b CLATDF uses the LU factorization of the n-by-n matrix computed by sgetc2 and computes a contribution to the reciprocal Dif-estimate. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATDF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, +* JPIV ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, LDZ, N +* REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX RHS( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATDF computes the contribution to the reciprocal Dif-estimate +*> by solving for x in Z * x = b, where b is chosen such that the norm +*> of x is as large as possible. It is assumed that LU decomposition +*> of Z has been computed by CGETC2. On entry RHS = f holds the +*> contribution from earlier solved sub-systems, and on return RHS = x. +*> +*> The factorization of Z returned by CGETC2 has the form +*> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower +*> triangular with unit diagonal elements and U is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> IJOB = 2: First compute an approximative null-vector e +*> of Z using CGECON, e is normalized and solve for +*> Zx = +-e - f with the sign giving the greater value of +*> 2-norm(x). About 5 times as expensive as Default. +*> IJOB .ne. 2: Local look ahead strategy where +*> all entries of the r.h.s. b is chosen as either +1 or +*> -1. Default. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Z. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix Z computed by CGETC2: Z = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is COMPLEX array, dimension (N). +*> On entry, RHS contains contributions from other subsystems. +*> On exit, RHS contains the solution of the subsystem with +*> entries according to the value of IJOB (see above). +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is REAL +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by CTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when CTGSY2 is called by CTGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is REAL +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when CTGSY2 is called by +*> CTGSYL. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> This routine is a further developed implementation of algorithm +*> BSOLVE in [1] using complete pivoting in the LU factorization. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] Bo Kagstrom and Lars Westin, +*> Generalized Schur Methods with Condition Estimators for +*> Solving the Generalized Sylvester Equation, IEEE Transactions +*> on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +*> +*> [2] Peter Poromaa, +*> On Efficient and Robust Estimators for the Separation +*> between two Regular Matrix Pairs with Applications in +*> Condition Estimation. Report UMINF-95.05, Department of +*> Computing Science, Umea University, S-901 87 Umea, Sweden, +*> 1995. +* +* ===================================================================== + SUBROUTINE CLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX RHS( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + REAL RTEMP, SCALE, SMINU, SPLUS + COMPLEX BM, BP, PMONE, TEMP +* .. +* .. Local Arrays .. + REAL RWORK( MAXDIM ) + COMPLEX WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CGECON, CGESC2, CLASSQ, CLASWP, + $ CSCAL +* .. +* .. External Functions .. + REAL SCASUM + COMPLEX CDOTC + EXTERNAL SCASUM, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL CLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -CONE + DO 10 J = 1, N - 1 + BP = RHS( J ) + CONE + BM = RHS( J ) - CONE + SPLUS = ONE +* +* Lockahead for L- part RHS(1:N-1) = +-1 +* SPLUS and SMIN computed more efficiently than in BSOLVE[1]. +* + SPLUS = SPLUS + REAL( CDOTC( N-J, Z( J+1, J ), 1, Z( J+1, + $ J ), 1 ) ) + SMINU = REAL( CDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) ) + SPLUS = SPLUS*REAL( RHS( J ) ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens we +* choose -1, thereafter +1. This is a simple way to get +* good estimates of matrices like Byers well-known example +* (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = CONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL CAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + 10 CONTINUE +* +* Solve for U- part, lockahead for RHS(N) = +-1. This is not done +* In BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL CCOPY( N-1, RHS, 1, WORK, 1 ) + WORK( N ) = RHS( N ) + CONE + RHS( N ) = RHS( N ) - CONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = CONE / Z( I, I ) + WORK( I ) = WORK( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( WORK( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL CCOPY( N, WORK, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL CLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN + END IF +* +* ENTRY IJOB = 2 +* +* Compute approximate nullvector XM of Z +* + CALL CGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO ) + CALL CCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL CLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = CONE / SQRT( CDOTC( N, XM, 1, XM, 1 ) ) + CALL CSCAL( N, TEMP, XM, 1 ) + CALL CCOPY( N, XM, 1, XP, 1 ) + CALL CAXPY( N, CONE, RHS, 1, XP, 1 ) + CALL CAXPY( N, -CONE, XM, 1, RHS, 1 ) + CALL CGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE ) + CALL CGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE ) + IF( SCASUM( N, XP, 1 ).GT.SCASUM( N, RHS, 1 ) ) + $ CALL CCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN +* +* End of CLATDF +* + END diff --git a/dspl/liblapack/SRC/clatps.f b/dspl/liblapack/SRC/clatps.f new file mode 100644 index 0000000..402b33e --- /dev/null +++ b/dspl/liblapack/SRC/clatps.f @@ -0,0 +1,978 @@ +*> \brief \b CLATPS solves a triangular system of equations with the matrix held in packed storage. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL CNORM( * ) +* COMPLEX AP( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATPS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular matrix stored in packed form. Here A**T denotes the +*> transpose of A, A**H denotes the conjugate transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, CTPSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTPSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call CTPSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX AP( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTPSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = SCASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = SCASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTPSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL CAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL CAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTU( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = 1, N - J + CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTC( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 160 I = 1, J - 1 + CSUMJ = CSUMJ + ( CONJG( AP( IP-J+I ) )*USCAL )* + $ X( I ) + 160 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 170 I = 1, N - J + CSUMJ = CSUMJ + ( CONJG( AP( IP+I ) )*USCAL )* + $ X( J+I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = CONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATPS +* + END diff --git a/dspl/liblapack/SRC/clatrd.f b/dspl/liblapack/SRC/clatrd.f new file mode 100644 index 0000000..1ad84c1 --- /dev/null +++ b/dspl/liblapack/SRC/clatrd.f @@ -0,0 +1,358 @@ +*> \brief \b CLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an unitary similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* REAL E( * ) +* COMPLEX A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATRD reduces NB rows and columns of a complex Hermitian matrix A to +*> Hermitian tridiagonal form by a unitary similarity +*> transformation Q**H * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', CLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', CLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by CHETRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a Hermitian rank-2k update of the form: +*> A := A - V*W**H - W*V**H. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL E( * ) + COMPLEX A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ), + $ HALF = ( 0.5E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGEMV, CHEMV, CLACGV, CLARFG, CSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, REAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, W( I, IW+1 ), LDW ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL CLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL CHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL CGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL CSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*CDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = REAL( A( I, I ) ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, W( I, 1 ), LDW ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = REAL( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL CHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL CGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL CSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*CDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of CLATRD +* + END diff --git a/dspl/liblapack/SRC/clatrs.f b/dspl/liblapack/SRC/clatrs.f new file mode 100644 index 0000000..8fc539b --- /dev/null +++ b/dspl/liblapack/SRC/clatrs.f @@ -0,0 +1,966 @@ +*> \brief \b CLATRS solves a triangular system of equations with the scale factor set to prevent overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, LDA, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL CNORM( * ) +* COMPLEX A( LDA, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATRS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow. Here A is an upper or lower +*> triangular matrix, A**T denotes the transpose of A, A**H denotes the +*> conjugate transpose of A, x and b are n-element vectors, and s is a +*> scaling factor, usually less than or equal to 1, chosen so that the +*> components of x will be less than the overflow threshold. If the +*> unscaled problem will not cause overflow, the Level 2 BLAS routine +*> CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +*> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max (1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, CTRSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL CNORM( * ) + COMPLEX A( LDA, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0, + $ TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX, ISAMAX + REAL SCASUM, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC, + $ CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CSSCAL, CTRSV, SLABAD, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) + + $ ABS( AIMAG( ZDUM ) / 2. ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SCASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SCASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine CTRSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX +* + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL CTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL CSSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 105 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 105 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL CSSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL CAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ICAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL CAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ICAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 150 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTU( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 145 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 145 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 150 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 190 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = CLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.CMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call CDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = CDOTC( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = CDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 160 I = 1, J - 1 + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 160 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 170 I = J + 1, N + CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )* + $ X( I ) + 170 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = CONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 185 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL CSSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = CLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 180 I = 1, N + X( I ) = ZERO + 180 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 185 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 190 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of CLATRS +* + END diff --git a/dspl/liblapack/SRC/clatrz.f b/dspl/liblapack/SRC/clatrz.f new file mode 100644 index 0000000..0328e31 --- /dev/null +++ b/dspl/liblapack/SRC/clatrz.f @@ -0,0 +1,206 @@ +*> \brief \b CLATRZ factors an upper trapezoidal matrix by means of unitary transformations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLATRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* .. Scalar Arguments .. +* INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix +*> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means +*> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary +*> matrix and, R and A1 are M-by-M upper triangular matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing the +*> meaningful part of the Householder vectors. N-M >= L >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements N-L+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (M) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**H, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an l element vector. tau and z( k ) +*> are chosen to annihilate the elements of the kth row of A2. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A2, such that the elements of z( k ) are +*> in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A1. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARFG, CLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL CLACGV( L, A( I, N-L+1 ), LDA ) + ALPHA = CONJG( A( I, I ) ) + CALL CLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) ) + TAU( I ) = CONJG( TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL CLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ CONJG( TAU( I ) ), A( 1, I ), LDA, WORK ) + A( I, I ) = CONJG( ALPHA ) +* + 20 CONTINUE +* + RETURN +* +* End of CLATRZ +* + END diff --git a/dspl/liblapack/SRC/clatsqr.f b/dspl/liblapack/SRC/clatsqr.f new file mode 100644 index 0000000..dab5774 --- /dev/null +++ b/dspl/liblapack/SRC/clatsqr.f @@ -0,0 +1,255 @@ +* +* Definition: +* =========== +* +* SUBROUTINE CLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATSQR computes a blocked Tall-Skinny QR factorization of +*> an M-by-N matrix A, where M >= N: +*> A = Q * R . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> MB > N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal +*> of the array contain the N-by-N upper triangular matrix R; +*> the elements below the diagonal represent Q by the columns +*> of blocked V (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((M-N)/(MB-N)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= NB*N. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ), T(LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL CGEQRT, CTPQRT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. ( NB.GT.N .AND. N.GT.0 )) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(N*NB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = NB*N + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLATSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF ((MB.LE.N).OR.(MB.GE.M)) THEN + CALL CGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF + KK = MOD((M-N),(MB-N)) + II=M-KK+1 +* +* Compute the QR factorization of the first block A(1:MB,1:N) +* + CALL CGEQRT( MB, N, NB, A(1,1), LDA, T, LDT, WORK, INFO ) + CTR = 1 +* + DO I = MB+1, II-MB+N , (MB-N) +* +* Compute the QR factorization of the current block A(I:I+MB-N,1:N) +* + CALL CTPQRT( MB-N, N, 0, NB, A(1,1), LDA, A( I, 1 ), LDA, + $ T(1,CTR * N + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(II:M,1:N) +* + IF (II.LE.M) THEN + CALL CTPQRT( KK, N, 0, NB, A(1,1), LDA, A( II, 1 ), LDA, + $ T(1, CTR * N + 1), LDT, + $ WORK, INFO ) + END IF +* + work( 1 ) = N*NB + RETURN +* +* End of CLATSQR +* + END diff --git a/dspl/liblapack/SRC/clauu2.f b/dspl/liblapack/SRC/clauu2.f new file mode 100644 index 0000000..930662d --- /dev/null +++ b/dspl/liblapack/SRC/clauu2.f @@ -0,0 +1,206 @@ +*> \brief \b CLAUU2 computes the product UUH or LHL, where U and L are upper or lower triangular matrices (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAUU2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAUU2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAUU2 computes the product U * U**H or L**H * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the unblocked form of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**H; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**H * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U**H. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I, I+1 ), LDA, + $ A( I, I+1 ), LDA ) ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, CMPLX( AII ), + $ A( 1, I ), 1 ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L**H * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I+1, I ), 1, + $ A( I+1, I ), 1 ) ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, + $ CMPLX( AII ), A( I, 1 ), LDA ) + CALL CLACGV( I-1, A( I, 1 ), LDA ) + ELSE + CALL CSSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CLAUU2 +* + END diff --git a/dspl/liblapack/SRC/clauum.f b/dspl/liblapack/SRC/clauum.f new file mode 100644 index 0000000..eb9f894 --- /dev/null +++ b/dspl/liblapack/SRC/clauum.f @@ -0,0 +1,223 @@ +*> \brief \b CLAUUM computes the product UUH or LHL, where U and L are upper or lower triangular matrices (blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CLAUUM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CLAUUM( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CLAUUM computes the product U * U**H or L**H * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the blocked form of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**H; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**H * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CLAUU2, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U**H. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Non-unit', I-1, IB, CONE, A( I, I ), LDA, + $ A( 1, I ), LDA ) + CALL CLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ), + $ LDA, A( I, I+IB ), LDA, CONE, A( 1, I ), + $ LDA ) + CALL CHERK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L**H * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL CTRMM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', IB, I-1, CONE, A( I, I ), LDA, + $ A( I, 1 ), LDA ) + CALL CLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL CGEMM( 'Conjugate transpose', 'No transpose', IB, + $ I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA ) + CALL CHERK( 'Lower', 'Conjugate transpose', IB, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE, + $ A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of CLAUUM +* + END diff --git a/dspl/liblapack/SRC/cpbcon.f b/dspl/liblapack/SRC/cpbcon.f new file mode 100644 index 0000000..174f840 --- /dev/null +++ b/dspl/liblapack/SRC/cpbcon.f @@ -0,0 +1,277 @@ +*> \brief \b CPBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite band matrix using +*> the Cholesky factorization A = U**H*U or A = L*L**H computed by +*> CPBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the Hermitian band matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK, + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL CLATBS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK, + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of CPBCON +* + END diff --git a/dspl/liblapack/SRC/cpbequ.f b/dspl/liblapack/SRC/cpbequ.f new file mode 100644 index 0000000..922c44b --- /dev/null +++ b/dspl/liblapack/SRC/cpbequ.f @@ -0,0 +1,244 @@ +*> \brief \b CPBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite band matrix A and reduce its condition +*> number (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular of A is stored; +*> = 'L': Lower triangular of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangle of the Hermitian band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = REAL( AB( J, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = REAL( AB( J, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPBEQU +* + END diff --git a/dspl/liblapack/SRC/cpbrfs.f b/dspl/liblapack/SRC/cpbrfs.f new file mode 100644 index 0000000..7bf13e9 --- /dev/null +++ b/dspl/liblapack/SRC/cpbrfs.f @@ -0,0 +1,448 @@ +*> \brief \b CPBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, +* LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and banded, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangle of the Hermitian band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H of the band matrix A as computed by +*> CPBTRF, in the same storage format as A (see AB). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CPBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHBMV, CLACN2, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AB( KD+1, K ) ) )* + $ XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AB( 1, K ) ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPBRFS +* + END diff --git a/dspl/liblapack/SRC/cpbstf.f b/dspl/liblapack/SRC/cpbstf.f new file mode 100644 index 0000000..f344bf8 --- /dev/null +++ b/dspl/liblapack/SRC/cpbstf.f @@ -0,0 +1,332 @@ +*> \brief \b CPBSTF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBSTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBSTF computes a split Cholesky factorization of a complex +*> Hermitian positive definite band matrix A. +*> +*> This routine is designed to be used in conjunction with CHBGST. +*> +*> The factorization has the form A = S**H*S where S is a band matrix +*> of the same bandwidth as A and the following structure: +*> +*> S = ( U ) +*> ( M L ) +*> +*> where U is upper triangular of order m = (n+kd)/2, and L is lower +*> triangular of order n-m. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first kd+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the factor S from the split Cholesky +*> factorization A = S**H*S. See Further Details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the factorization could not be completed, +*> because the updated element a(i,i) was negative; the +*> matrix A is not positive definite. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 7, KD = 2: +*> +*> S = ( s11 s12 s13 ) +*> ( s22 s23 s24 ) +*> ( s33 s34 ) +*> ( s44 ) +*> ( s53 s54 s55 ) +*> ( s64 s65 s66 ) +*> ( s75 s76 s77 ) +*> +*> If UPLO = 'U', the array AB holds: +*> +*> on entry: on exit: +*> +*> * * a13 a24 a35 a46 a57 * * s13 s24 s53**H s64**H s75**H +*> * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54**H s65**H s76**H +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> +*> If UPLO = 'L', the array AB holds: +*> +*> on entry: on exit: +*> +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> a21 a32 a43 a54 a65 a76 * s12**H s23**H s34**H s54 s65 s76 * +*> a31 a42 a53 a64 a64 * * s13**H s24**H s53 s64 s75 * * +*> +*> Array elements marked * are not used by the routine; s12**H denotes +*> conjg(s12); the diagonal elements of S are real. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHER, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL CSSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL CHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL CSSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL CLACGV( KM, AB( KD, J+1 ), KLD ) + CALL CHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL CLACGV( KM, AB( KD, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL CSSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL CLACGV( KM, AB( KM+1, J-KM ), KLD ) + CALL CHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + CALL CLACGV( KM, AB( KM+1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL CSSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL CHER( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of CPBSTF +* + END diff --git a/dspl/liblapack/SRC/cpbsv.f b/dspl/liblapack/SRC/cpbsv.f new file mode 100644 index 0000000..b1a1fab --- /dev/null +++ b/dspl/liblapack/SRC/cpbsv.f @@ -0,0 +1,229 @@ +*> \brief CPBSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix, with the same number of superdiagonals or +*> subdiagonals as A. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H*U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPBTRF, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H*U or A = L*L**H. +* + CALL CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPBSV +* + END diff --git a/dspl/liblapack/SRC/cpbsvx.f b/dspl/liblapack/SRC/cpbsvx.f new file mode 100644 index 0000000..fde0564 --- /dev/null +++ b/dspl/liblapack/SRC/cpbsvx.f @@ -0,0 +1,543 @@ +*> \brief CPBSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, +* EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AB and AFB will not +*> be modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array, except +*> if FACT = 'F' and EQUED = 'Y', then A must contain the +*> equilibrated matrix diag(S)*A*diag(S). The j-th column of A +*> is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H of the band matrix +*> A, in the same storage format as A (see AB). If EQUED = 'Y', +*> then AFB is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 +*> a22 a23 a24 +*> a33 a34 a35 +*> a44 a45 a46 +*> a55 a56 +*> (aij=conjg(aji)) a66 +*> +*> Band storage of the upper triangle of A: +*> +*> * * a13 a24 a35 a46 +*> * a12 a23 a34 a45 a56 +*> a11 a22 a33 a44 a55 a66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> a11 a22 a33 a44 a55 a66 +*> a21 a32 a43 a54 a65 * +*> a31 a42 a53 a64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHB, SLAMCH + EXTERNAL LSAME, CLANHB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAQHB, CPBCON, CPBEQU, CPBRFS, + $ CPBTRF, CPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL CCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL CCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL CPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CPBSVX +* + END diff --git a/dspl/liblapack/SRC/cpbtf2.f b/dspl/liblapack/SRC/cpbtf2.f new file mode 100644 index 0000000..f871dd7 --- /dev/null +++ b/dspl/liblapack/SRC/cpbtf2.f @@ -0,0 +1,269 @@ +*> \brief \b CPBTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite band matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBTF2 computes the Cholesky factorization of a complex Hermitian +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix, U**H is the conjugate transpose +*> of U, and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CHER, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H * U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + CALL CHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL CLACGV( KN, AB( KD, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL CSSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL CHER( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of CPBTF2 +* + END diff --git a/dspl/liblapack/SRC/cpbtrf.f b/dspl/liblapack/SRC/cpbtrf.f new file mode 100644 index 0000000..8854cbe --- /dev/null +++ b/dspl/liblapack/SRC/cpbtrf.f @@ -0,0 +1,442 @@ +*> \brief \b CPBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H*U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== + SUBROUTINE CPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + COMPLEX WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPBTF2, CPOTF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'CPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL CPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I2, CONE, + $ AB( KD+1, I ), LDAB-1, + $ AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I2, IB, + $ -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I3, CONE, + $ AB( KD+1, I ), LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'Conjugate transpose', + $ 'No transpose', I2, I3, IB, -CONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, WORK, + $ LDWORK, CONE, AB( 1+IB, I+KD ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Upper', 'Conjugate transpose', I3, IB, + $ -ONE, WORK, LDWORK, ONE, + $ AB( KD+1, I+KD ), LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL CPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I2, + $ IB, CONE, AB( 1, I ), LDAB-1, + $ AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL CHERK( 'Lower', 'No transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL CTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I3, + $ IB, CONE, AB( 1, I ), LDAB-1, WORK, + $ LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL CGEMM( 'No transpose', + $ 'Conjugate transpose', I3, I2, IB, + $ -CONE, WORK, LDWORK, AB( 1+IB, I ), + $ LDAB-1, CONE, AB( 1+KD-IB, I+IB ), + $ LDAB-1 ) +* +* Update A33 +* + CALL CHERK( 'Lower', 'No transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of CPBTRF +* + END diff --git a/dspl/liblapack/SRC/cpbtrs.f b/dspl/liblapack/SRC/cpbtrs.f new file mode 100644 index 0000000..1fef333 --- /dev/null +++ b/dspl/liblapack/SRC/cpbtrs.f @@ -0,0 +1,220 @@ +*> \brief \b CPBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPBTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite band matrix A using the Cholesky factorization +*> A = U**H*U or A = L*L**H computed by CPBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H *U. +* + DO 10 J = 1, NRHS +* +* Solve U**H *X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L**H. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L**H *X = B, overwriting B with X. +* + CALL CTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of CPBTRS +* + END diff --git a/dspl/liblapack/SRC/cpftrf.f b/dspl/liblapack/SRC/cpftrf.f new file mode 100644 index 0000000..6d0b527 --- /dev/null +++ b/dspl/liblapack/SRC/cpftrf.f @@ -0,0 +1,471 @@ +*> \brief \b CPFTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPFTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* COMPLEX A( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPFTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension ( N*(N+1)/2 ); +*> On entry, the Hermitian matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'C'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization RFP A = U**H*U or RFP A = L*L**H. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> +*> Further Notes on RFP Format: +*> ============================ +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER N, INFO +* .. +* .. Array Arguments .. + COMPLEX A( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CHERK, CPOTRF, CTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPFTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL CPOTRF( 'L', N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'R', 'L', 'C', 'N', N2, N1, CONE, A( 0 ), N, + $ A( N1 ), N ) + CALL CHERK( 'U', 'N', N2, N1, -ONE, A( N1 ), N, ONE, + $ A( N ), N ) + CALL CPOTRF( 'U', N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL CPOTRF( 'L', N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'L', 'L', 'N', 'N', N1, N2, CONE, A( N2 ), N, + $ A( 0 ), N ) + CALL CHERK( 'U', 'C', N2, N1, -ONE, A( 0 ), N, ONE, + $ A( N1 ), N ) + CALL CPOTRF( 'U', N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + CALL CPOTRF( 'U', N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'L', 'U', 'C', 'N', N1, N2, CONE, A( 0 ), N1, + $ A( N1*N1 ), N1 ) + CALL CHERK( 'L', 'C', N2, N1, -ONE, A( N1*N1 ), N1, ONE, + $ A( 1 ), N1 ) + CALL CPOTRF( 'L', N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + CALL CPOTRF( 'U', N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'R', 'U', 'N', 'N', N2, N1, CONE, A( N2*N2 ), + $ N2, A( 0 ), N2 ) + CALL CHERK( 'L', 'N', N2, N1, -ONE, A( 0 ), N2, ONE, + $ A( N1*N2 ), N2 ) + CALL CPOTRF( 'L', N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL CPOTRF( 'L', K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'R', 'L', 'C', 'N', K, K, CONE, A( 1 ), N+1, + $ A( K+1 ), N+1 ) + CALL CHERK( 'U', 'N', K, K, -ONE, A( K+1 ), N+1, ONE, + $ A( 0 ), N+1 ) + CALL CPOTRF( 'U', K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL CPOTRF( 'L', K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'L', 'L', 'N', 'N', K, K, CONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL CHERK( 'U', 'C', K, K, -ONE, A( 0 ), N+1, ONE, + $ A( K ), N+1 ) + CALL CPOTRF( 'U', K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL CPOTRF( 'U', K, A( 0+K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'L', 'U', 'C', 'N', K, K, CONE, A( K ), N1, + $ A( K*( K+1 ) ), K ) + CALL CHERK( 'L', 'C', K, K, -ONE, A( K*( K+1 ) ), K, ONE, + $ A( 0 ), K ) + CALL CPOTRF( 'L', K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL CPOTRF( 'U', K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRSM( 'R', 'U', 'N', 'N', K, K, CONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL CHERK( 'L', 'N', K, K, -ONE, A( 0 ), K, ONE, + $ A( K*K ), K ) + CALL CPOTRF( 'L', K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CPFTRF +* + END diff --git a/dspl/liblapack/SRC/cpftri.f b/dspl/liblapack/SRC/cpftri.f new file mode 100644 index 0000000..e2b5690 --- /dev/null +++ b/dspl/liblapack/SRC/cpftri.f @@ -0,0 +1,445 @@ +*> \brief \b CPFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. Array Arguments .. +* COMPLEX A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPFTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by CPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension ( N*(N+1)/2 ); +*> On entry, the Hermitian matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'C'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, the Hermitian inverse of the original matrix, in the +*> same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. Array Arguments .. + COMPLEX A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CTFTRI, CLAUUM, CTRMM, CHERK +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTFTRI( TRANSR, UPLO, 'N', N, A, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or +* inv(L)^C*inv(L). There are eight cases. +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) +* T1 -> a(0), T2 -> a(n), S -> a(N1) +* + CALL CLAUUM( 'L', N1, A( 0 ), N, INFO ) + CALL CHERK( 'L', 'C', N1, N2, ONE, A( N1 ), N, ONE, + $ A( 0 ), N ) + CALL CTRMM( 'L', 'U', 'N', 'N', N2, N1, CONE, A( N ), N, + $ A( N1 ), N ) + CALL CLAUUM( 'U', N2, A( N ), N, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) +* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) +* T1 -> a(N2), T2 -> a(N1), S -> a(0) +* + CALL CLAUUM( 'L', N1, A( N2 ), N, INFO ) + CALL CHERK( 'L', 'N', N1, N2, ONE, A( 0 ), N, ONE, + $ A( N2 ), N ) + CALL CTRMM( 'R', 'U', 'C', 'N', N1, N2, CONE, A( N1 ), N, + $ A( 0 ), N ) + CALL CLAUUM( 'U', N2, A( N1 ), N, INFO ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) +* + CALL CLAUUM( 'U', N1, A( 0 ), N1, INFO ) + CALL CHERK( 'U', 'N', N1, N2, ONE, A( N1*N1 ), N1, ONE, + $ A( 0 ), N1 ) + CALL CTRMM( 'R', 'L', 'N', 'N', N1, N2, CONE, A( 1 ), N1, + $ A( N1*N1 ), N1 ) + CALL CLAUUM( 'L', N2, A( 1 ), N1, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is odd +* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) +* + CALL CLAUUM( 'U', N1, A( N2*N2 ), N2, INFO ) + CALL CHERK( 'U', 'C', N1, N2, ONE, A( 0 ), N2, ONE, + $ A( N2*N2 ), N2 ) + CALL CTRMM( 'L', 'L', 'C', 'N', N2, N1, CONE, A( N1*N2 ), + $ N2, A( 0 ), N2 ) + CALL CLAUUM( 'L', N2, A( N1*N2 ), N2, INFO ) +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL CLAUUM( 'L', K, A( 1 ), N+1, INFO ) + CALL CHERK( 'L', 'C', K, K, ONE, A( K+1 ), N+1, ONE, + $ A( 1 ), N+1 ) + CALL CTRMM( 'L', 'U', 'N', 'N', K, K, CONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) + CALL CLAUUM( 'U', K, A( 0 ), N+1, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL CLAUUM( 'L', K, A( K+1 ), N+1, INFO ) + CALL CHERK( 'L', 'N', K, K, ONE, A( 0 ), N+1, ONE, + $ A( K+1 ), N+1 ) + CALL CTRMM( 'R', 'U', 'C', 'N', K, K, CONE, A( K ), N+1, + $ A( 0 ), N+1 ) + CALL CLAUUM( 'U', K, A( K ), N+1, INFO ) +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL CLAUUM( 'U', K, A( K ), K, INFO ) + CALL CHERK( 'U', 'N', K, K, ONE, A( K*( K+1 ) ), K, ONE, + $ A( K ), K ) + CALL CTRMM( 'R', 'L', 'N', 'N', K, K, CONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + CALL CLAUUM( 'L', K, A( 0 ), K, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL CLAUUM( 'U', K, A( K*( K+1 ) ), K, INFO ) + CALL CHERK( 'U', 'C', K, K, ONE, A( 0 ), K, ONE, + $ A( K*( K+1 ) ), K ) + CALL CTRMM( 'L', 'L', 'C', 'N', K, K, CONE, A( K*K ), K, + $ A( 0 ), K ) + CALL CLAUUM( 'L', K, A( K*K ), K, INFO ) +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CPFTRI +* + END diff --git a/dspl/liblapack/SRC/cpftrs.f b/dspl/liblapack/SRC/cpftrs.f new file mode 100644 index 0000000..739ae27 --- /dev/null +++ b/dspl/liblapack/SRC/cpftrs.f @@ -0,0 +1,301 @@ +*> \brief \b CPFTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPFTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( 0: * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPFTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A using the Cholesky factorization +*> A = U**H*U or A = L*L**H computed by CPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( N*(N+1)/2 ); +*> The triangular factor U or L from the Cholesky factorization +*> of RFP A = U**H*U or RFP A = L*L**H, as computed by CPFTRF. +*> See note below for more details about RFP A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( 0: * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CTFSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPFTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* start execution: there are two triangular solves +* + IF( LOWER ) THEN + CALL CTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, CONE, A, B, + $ LDB ) + CALL CTFSM( TRANSR, 'L', UPLO, 'C', 'N', N, NRHS, CONE, A, B, + $ LDB ) + ELSE + CALL CTFSM( TRANSR, 'L', UPLO, 'C', 'N', N, NRHS, CONE, A, B, + $ LDB ) + CALL CTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, CONE, A, B, + $ LDB ) + END IF +* + RETURN +* +* End of CPFTRS +* + END diff --git a/dspl/liblapack/SRC/cpocon.f b/dspl/liblapack/SRC/cpocon.f new file mode 100644 index 0000000..fbaba80 --- /dev/null +++ b/dspl/liblapack/SRC/cpocon.f @@ -0,0 +1,260 @@ +*> \brief \b CPOCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite matrix using the +*> Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the Hermitian matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL CLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CPOCON +* + END diff --git a/dspl/liblapack/SRC/cpoequ.f b/dspl/liblapack/SRC/cpoequ.f new file mode 100644 index 0000000..1463d3c --- /dev/null +++ b/dspl/liblapack/SRC/cpoequ.f @@ -0,0 +1,207 @@ +*> \brief \b CPOEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The N-by-N Hermitian positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = REAL( A( 1, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = REAL( A( I, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPOEQU +* + END diff --git a/dspl/liblapack/SRC/cpoequb.f b/dspl/liblapack/SRC/cpoequb.f new file mode 100644 index 0000000..68641bf --- /dev/null +++ b/dspl/liblapack/SRC/cpoequb.f @@ -0,0 +1,223 @@ +*> \brief \b CPOEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* REAL S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOEQUB computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> +*> This routine differs from CPOEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled diagonal entries are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The N-by-N Hermitian positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) + REAL S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN, BASE, TMP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, LOG, INT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* +* Positive definite only performs 1 pass of equilibration. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF + + BASE = SLAMCH( 'B' ) + TMP = -0.5 / LOG ( BASE ) +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = BASE ** INT( TMP * LOG( S( I ) ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)). +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF +* + RETURN +* +* End of CPOEQUB +* + END diff --git a/dspl/liblapack/SRC/cporfs.f b/dspl/liblapack/SRC/cporfs.f new file mode 100644 index 0000000..bd4054c --- /dev/null +++ b/dspl/liblapack/SRC/cporfs.f @@ -0,0 +1,436 @@ +*> \brief \b CPORFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPORFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, +* LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPORFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite, +*> and provides error bounds and backward error estimates for the +*> solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CPOTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHEMV, CLACN2, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPORFS +* + END diff --git a/dspl/liblapack/SRC/cporfsx.f b/dspl/liblapack/SRC/cporfsx.f new file mode 100644 index 0000000..872bad3 --- /dev/null +++ b/dspl/liblapack/SRC/cporfsx.f @@ -0,0 +1,693 @@ +*> \brief \b CPORFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPORFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, +* LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL RWORK( * ), S( * ), PARAMS(*), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPORFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive +*> definite, and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, + $ LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL RWORK( * ), S( * ), PARAMS(*), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CPOCON, CLA_PORFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL SLAMCH, CLANHE, CLA_PORCOND_X, CLA_PORCOND_C + REAL SLAMCH, CLANHE, CLA_PORCOND_X, CLA_PORCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS(LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS(LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPORFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = CLANHE( NORM, UPLO, N, A, LDA, RWORK ) + CALL CPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + CALL CLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = CLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = CLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = CLA_PORCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF (RCOND_TMP .LT. ILLRCOND_THRESH) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of CPORFSX +* + END diff --git a/dspl/liblapack/SRC/cposv.f b/dspl/liblapack/SRC/cposv.f new file mode 100644 index 0000000..ecd61ec --- /dev/null +++ b/dspl/liblapack/SRC/cposv.f @@ -0,0 +1,193 @@ +*> \brief CPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H* U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOsolve +* +* ===================================================================== + SUBROUTINE CPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPOTRF, CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H*U or A = L*L**H. +* + CALL CPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPOSV +* + END diff --git a/dspl/liblapack/SRC/cposvx.f b/dspl/liblapack/SRC/cposvx.f new file mode 100644 index 0000000..aec8db2 --- /dev/null +++ b/dspl/liblapack/SRC/cposvx.f @@ -0,0 +1,492 @@ +*> \brief CPOSVX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H* U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. A and AF will not +*> be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A, except if FACT = 'F' and +*> EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored form +*> of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS righthand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexPOsolve +* +* ===================================================================== + SUBROUTINE CPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHE, SLAMCH + EXTERNAL LSAME, CLANHE, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLAQHE, CPOCON, CPOEQU, CPORFS, CPOTRF, + $ CPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHE( '1', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CPOSVX +* + END diff --git a/dspl/liblapack/SRC/cposvxx.f b/dspl/liblapack/SRC/cposvxx.f new file mode 100644 index 0000000..64d1b67 --- /dev/null +++ b/dspl/liblapack/SRC/cposvxx.f @@ -0,0 +1,683 @@ +*> \brief CPOSVXX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T +*> to compute the solution to a complex system of linear equations +*> A * X = B, where A is an N-by-N symmetric positive definite matrix +*> and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. CPOSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> CPOSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> CPOSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what CPOSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A (see argument RCOND). If the reciprocal of the condition number +*> is less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A and AF are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = +*> 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper +*> triangular part of A contains the upper triangular part of the +*> matrix A, and the strictly lower triangular part of A is not +*> referenced. If UPLO = 'L', the leading N-by-N lower triangular +*> part of A contains the lower triangular part of the matrix A, and +*> the strictly upper triangular part of A is not referenced. A is +*> not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = +*> 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored +*> form of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexPOsolve +* +* ===================================================================== + SUBROUTINE CPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) + REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, CLA_PORPVGRW + LOGICAL LSAME + REAL SLAMCH, CLA_PORPVGRW +* .. +* .. External Subroutines .. + EXTERNAL CPOEQUB, CPOTRF, CPOTRS, CLACPY, + $ CLAQHE, XERBLA, CLASCL2, CPORFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in CPORFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until CPORFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPOEQUB( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL CLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization of A. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = CLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = CLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, RWORK ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) + +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL CLASCL2( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of CPOSVXX +* + END diff --git a/dspl/liblapack/SRC/cpotf2.f b/dspl/liblapack/SRC/cpotf2.f new file mode 100644 index 0000000..16b917a --- /dev/null +++ b/dspl/liblapack/SRC/cpotf2.f @@ -0,0 +1,237 @@ +*> \brief \b CPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOTF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOTF2 computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H *U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( 1, J ), 1, + $ A( 1, J ), 1 ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( J, 1 ), LDA, + $ A( J, 1 ), LDA ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of CPOTF2 +* + END diff --git a/dspl/liblapack/SRC/cpotrf.f b/dspl/liblapack/SRC/cpotrf.f new file mode 100644 index 0000000..1d981bf --- /dev/null +++ b/dspl/liblapack/SRC/cpotrf.f @@ -0,0 +1,249 @@ +*> \brief \b CPOTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOTRF( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + COMPLEX CONE + PARAMETER ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CHERK, CPOTRF2, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL CPOTRF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H *U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Upper', 'Conjugate transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTRF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL CGEMM( 'Conjugate transpose', 'No transpose', JB, + $ N-J-JB+1, J-1, -CONE, A( 1, J ), LDA, + $ A( 1, J+JB ), LDA, CONE, A( J, J+JB ), + $ LDA ) + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL CHERK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL CPOTRF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL CGEMM( 'No transpose', 'Conjugate transpose', + $ N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ), + $ LDA ) + CALL CTRSM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of CPOTRF +* + END diff --git a/dspl/liblapack/SRC/cpotrf2.f b/dspl/liblapack/SRC/cpotrf2.f new file mode 100644 index 0000000..789843c --- /dev/null +++ b/dspl/liblapack/SRC/cpotrf2.f @@ -0,0 +1,246 @@ +*> \brief \b CPOTRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE CPOTRF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOTRF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A using the recursive algorithm. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = n/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> The subroutine calls itself to factor A11. Update and scale A21 +*> or A12, update A22 then calls itself to factor A22. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE CPOTRF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = (1.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER N1, N2, IINFO + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CHERK, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* N=1 case +* + IF( N.EQ.1 ) THEN +* +* Test for non-positive-definiteness +* + AJJ = REAL( A( 1, 1 ) ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + INFO = 1 + RETURN + END IF +* +* Factor +* + A( 1, 1 ) = SQRT( AJJ ) +* +* Use recursive code +* + ELSE + N1 = N/2 + N2 = N-N1 +* +* Factor A11 +* + CALL CPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H*U +* + IF( UPPER ) THEN +* +* Update and scale A12 +* + CALL CTRSM( 'L', 'U', 'C', 'N', N1, N2, CONE, + $ A( 1, 1 ), LDA, A( 1, N1+1 ), LDA ) +* +* Update and factor A22 +* + CALL CHERK( UPLO, 'C', N2, N1, -ONE, A( 1, N1+1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) +* + CALL CPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) +* + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* +* Compute the Cholesky factorization A = L*L**H +* + ELSE +* +* Update and scale A21 +* + CALL CTRSM( 'R', 'L', 'C', 'N', N2, N1, CONE, + $ A( 1, 1 ), LDA, A( N1+1, 1 ), LDA ) +* +* Update and factor A22 +* + CALL CHERK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) +* + CALL CPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) +* + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* + END IF + END IF + RETURN +* +* End of CPOTRF2 +* + END diff --git a/dspl/liblapack/SRC/cpotri.f b/dspl/liblapack/SRC/cpotri.f new file mode 100644 index 0000000..9b54895 --- /dev/null +++ b/dspl/liblapack/SRC/cpotri.f @@ -0,0 +1,159 @@ +*> \brief \b CPOTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOTRI( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by CPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, as computed by +*> CPOTRF. +*> On exit, the upper or lower triangle of the (Hermitian) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLAUUM, CTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U) * inv(U)**H or inv(L)**H * inv(L). +* + CALL CLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of CPOTRI +* + END diff --git a/dspl/liblapack/SRC/cpotrs.f b/dspl/liblapack/SRC/cpotrs.f new file mode 100644 index 0000000..18c3d0d --- /dev/null +++ b/dspl/liblapack/SRC/cpotrs.f @@ -0,0 +1,204 @@ +*> \brief \b CPOTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPOTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPOTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A using the Cholesky factorization +*> A = U**H*U or A = L*L**H computed by CPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by CPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPOcomputational +* +* ===================================================================== + SUBROUTINE CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H *U. +* +* Solve U**H *X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L**H. +* +* Solve L*X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L**H *X = B, overwriting B with X. +* + CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of CPOTRS +* + END diff --git a/dspl/liblapack/SRC/cppcon.f b/dspl/liblapack/SRC/cppcon.f new file mode 100644 index 0000000..48b0a33 --- /dev/null +++ b/dspl/liblapack/SRC/cppcon.f @@ -0,0 +1,255 @@ +*> \brief \b CPPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite packed matrix using +*> the Cholesky factorization A = U**H*U or A = L*L**H computed by +*> CPPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the Hermitian matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATPS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL CLATPS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL CLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL CLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL CLATPS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of CPPCON +* + END diff --git a/dspl/liblapack/SRC/cppequ.f b/dspl/liblapack/SRC/cppequ.f new file mode 100644 index 0000000..2ceeb3f --- /dev/null +++ b/dspl/liblapack/SRC/cppequ.f @@ -0,0 +1,240 @@ +*> \brief \b CPPEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL S( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A in packed storage and reduce +*> its condition number (with respect to the two-norm). S contains the +*> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +*> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +*> This choice of S puts the condition number of B within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL S( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = REAL( AP( 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = REAL( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = REAL( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of CPPEQU +* + END diff --git a/dspl/liblapack/SRC/cpprfs.f b/dspl/liblapack/SRC/cpprfs.f new file mode 100644 index 0000000..5206b95 --- /dev/null +++ b/dspl/liblapack/SRC/cpprfs.f @@ -0,0 +1,428 @@ +*> \brief \b CPPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, +* BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by SPPTRF/CPPTRF, +*> packed columnwise in a linear array in the same format as A +*> (see AP). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CPPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CHPMV, CLACN2, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CPPRFS +* + END diff --git a/dspl/liblapack/SRC/cppsv.f b/dspl/liblapack/SRC/cppsv.f new file mode 100644 index 0000000..084ef46 --- /dev/null +++ b/dspl/liblapack/SRC/cppsv.f @@ -0,0 +1,205 @@ +*> \brief CPPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, in the same storage +*> format as A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CPPTRF, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL CPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CPPSV +* + END diff --git a/dspl/liblapack/SRC/cppsvx.f b/dspl/liblapack/SRC/cppsvx.f new file mode 100644 index 0000000..5ae669c --- /dev/null +++ b/dspl/liblapack/SRC/cppsvx.f @@ -0,0 +1,493 @@ +*> \brief CPPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, +* X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix, L is a lower triangular +*> matrix, and **H indicates conjugate transpose. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFP contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AP and AFP will not +*> be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array, except if FACT = 'F' +*> and EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). The j-th column of A is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, in the same storage +*> format as A. If EQUED .ne. 'N', then AFP is the factored +*> form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H * U or A = L * L**H of the original +*> matrix A. +*> +*> If FACT = 'E', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H of the equilibrated +*> matrix A (see the description of AP for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHP, SLAMCH + EXTERNAL LSAME, CLANHP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CLAQHP, CPPCON, CPPEQU, CPPRFS, + $ CPPTRF, CPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H * U or A = L * L**H. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CPPSVX +* + END diff --git a/dspl/liblapack/SRC/cpptrf.f b/dspl/liblapack/SRC/cpptrf.f new file mode 100644 index 0000000..bbca265 --- /dev/null +++ b/dspl/liblapack/SRC/cpptrf.f @@ -0,0 +1,241 @@ +*> \brief \b CPPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPTRF( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A stored in packed format. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H*U or A = L*L**H, in the same +*> storage format as A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H * U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', + $ J-1, AP, AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AP( JJ ) ) - CDOTC( J-1, AP( JC ), 1, AP( JC ), + $ 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L * L**H. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = REAL( AP( JJ ) ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL CSSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL CHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of CPPTRF +* + END diff --git a/dspl/liblapack/SRC/cpptri.f b/dspl/liblapack/SRC/cpptri.f new file mode 100644 index 0000000..1340cfb --- /dev/null +++ b/dspl/liblapack/SRC/cpptri.f @@ -0,0 +1,190 @@ +*> \brief \b CPPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPTRI( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by CPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor is stored in AP; +*> = 'L': Lower triangular factor is stored in AP. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, packed columnwise as +*> a linear array. The j-th column of U or L is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> +*> On exit, the upper or lower triangle of the (Hermitian) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTC + EXTERNAL LSAME, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CHPR, CSSCAL, CTPMV, CTPTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL CTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)**H. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL CHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL CSSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)**H * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = REAL( CDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) ) + IF( J.LT.N ) + $ CALL CTPMV( 'Lower', 'Conjugate transpose', 'Non-unit', + $ N-J, AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of CPPTRI +* + END diff --git a/dspl/liblapack/SRC/cpptrs.f b/dspl/liblapack/SRC/cpptrs.f new file mode 100644 index 0000000..7d9b6de --- /dev/null +++ b/dspl/liblapack/SRC/cpptrs.f @@ -0,0 +1,203 @@ +*> \brief \b CPPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPPTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A in packed storage using the Cholesky +*> factorization A = U**H*U or A = L*L**H computed by CPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H * U. +* + DO 10 I = 1, NRHS +* +* Solve U**H *X = B, overwriting B with X. +* + CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL CTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L * L**H. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL CTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L**H *X = Y, overwriting B with X. +* + CALL CTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of CPPTRS +* + END diff --git a/dspl/liblapack/SRC/cpstf2.f b/dspl/liblapack/SRC/cpstf2.f new file mode 100644 index 0000000..f43cb30 --- /dev/null +++ b/dspl/liblapack/SRC/cpstf2.f @@ -0,0 +1,406 @@ +*> \brief \b CPSTF2 computes the Cholesky factorization with complete pivoting of complex Hermitian positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPSTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* REAL TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* REAL WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPSTF2 computes the Cholesky factorization with complete +*> pivoting of a complex Hermitian positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**H * U , if UPLO = 'U', +*> P**T * A * P = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) + REAL WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX CTEMP + REAL AJJ, SSTOP, STEMP + INTEGER I, ITEMP, J, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME, SISNAN + EXTERNAL SLAMCH, LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACGV, CSSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPSTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + DO 110 I = 1, N + WORK( I ) = REAL( A( I, I ) ) + 110 CONTINUE + PVT = MAXLOC( WORK( 1:N ), 1 ) + AJJ = REAL ( A( PVT, PVT ) ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 200 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + SSTOP = N * SLAMCH( 'Epsilon' ) * AJJ + ELSE + SSTOP = TOL + END IF +* +* Set first half of WORK to zero, holds dot products +* + DO 120 I = 1, N + WORK( I ) = 0 + 120 CONTINUE +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**H * U +* + DO 150 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 130 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + + $ REAL( CONJG( A( J-1, I ) )* + $ A( J-1, I ) ) + END IF + WORK( N+I ) = REAL( A( I, I ) ) - WORK( I ) +* + 130 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL CSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL CSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + DO 140 I = J + 1, PVT - 1 + CTEMP = CONJG( A( J, I ) ) + A( J, I ) = CONJG( A( I, PVT ) ) + A( I, PVT ) = CTEMP + 140 CONTINUE + A( J, PVT ) = CONJG( A( J, PVT ) ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CGEMV( 'Trans', J-1, N-J, -CONE, A( 1, J+1 ), LDA, + $ A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 150 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**H +* + DO 180 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 160 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + + $ REAL( CONJG( A( I, J-1 ) )* + $ A( I, J-1 ) ) + END IF + WORK( N+I ) = REAL( A( I, I ) ) - WORK( I ) +* + 160 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL CSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL CSWAP( N-PVT, A( PVT+1, J ), 1, A( PVT+1, PVT ), + $ 1 ) + DO 170 I = J + 1, PVT - 1 + CTEMP = CONJG( A( I, J ) ) + A( I, J ) = CONJG( A( PVT, I ) ) + A( PVT, I ) = CTEMP + 170 CONTINUE + A( PVT, J ) = CONJG( A( PVT, J ) ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CGEMV( 'No Trans', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 180 CONTINUE +* + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 200 + 190 CONTINUE +* +* Rank is number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 200 CONTINUE + RETURN +* +* End of CPSTF2 +* + END diff --git a/dspl/liblapack/SRC/cpstrf.f b/dspl/liblapack/SRC/cpstrf.f new file mode 100644 index 0000000..4b6cc41 --- /dev/null +++ b/dspl/liblapack/SRC/cpstrf.f @@ -0,0 +1,463 @@ +*> \brief \b CPSTRF computes the Cholesky factorization with complete pivoting of complex Hermitian positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPSTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* REAL TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* REAL WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPSTRF computes the Cholesky factorization with complete +*> pivoting of a complex Hermitian positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**H * U , if UPLO = 'U', +*> P**T * A * P = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) + REAL WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX CTEMP + REAL AJJ, SSTOP, STEMP + INTEGER I, ITEMP, J, JB, K, NB, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + LOGICAL LSAME, SISNAN + EXTERNAL SLAMCH, ILAENV, LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CHERK, CLACGV, CPSTF2, CSSCAL, CSWAP, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MIN, REAL, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPSTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get block size +* + NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CPSTF2( UPLO, N, A( 1, 1 ), LDA, PIV, RANK, TOL, WORK, + $ INFO ) + GO TO 230 +* + ELSE +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + DO 110 I = 1, N + WORK( I ) = REAL( A( I, I ) ) + 110 CONTINUE + PVT = MAXLOC( WORK( 1:N ), 1 ) + AJJ = REAL( A( PVT, PVT ) ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 230 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + SSTOP = N * SLAMCH( 'Epsilon' ) * AJJ + ELSE + SSTOP = TOL + END IF +* +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**H * U +* + DO 160 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 120 I = K, N + WORK( I ) = 0 + 120 CONTINUE +* + DO 150 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 130 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + + $ REAL( CONJG( A( J-1, I ) )* + $ A( J-1, I ) ) + END IF + WORK( N+I ) = REAL( A( I, I ) ) - WORK( I ) +* + 130 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 220 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL CSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL CSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + DO 140 I = J + 1, PVT - 1 + CTEMP = CONJG( A( J, I ) ) + A( J, I ) = CONJG( A( I, PVT ) ) + A( I, PVT ) = CTEMP + 140 CONTINUE + A( J, PVT ) = CONJG( A( J, PVT ) ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CGEMV( 'Trans', J-K, N-J, -CONE, A( K, J+1 ), + $ LDA, A( K, J ), 1, CONE, A( J, J+1 ), + $ LDA ) + CALL CLACGV( J-1, A( 1, J ), 1 ) + CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 150 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL CHERK( 'Upper', 'Conj Trans', N-J+1, JB, -ONE, + $ A( K, J ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 160 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**H +* + DO 210 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 170 I = K, N + WORK( I ) = 0 + 170 CONTINUE +* + DO 200 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 180 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + + $ REAL( CONJG( A( I, J-1 ) )* + $ A( I, J-1 ) ) + END IF + WORK( N+I ) = REAL( A( I, I ) ) - WORK( I ) +* + 180 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 220 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL CSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL CSWAP( N-PVT, A( PVT+1, J ), 1, + $ A( PVT+1, PVT ), 1 ) + DO 190 I = J + 1, PVT - 1 + CTEMP = CONJG( A( I, J ) ) + A( I, J ) = CONJG( A( PVT, I ) ) + A( PVT, I ) = CTEMP + 190 CONTINUE + A( PVT, J ) = CONJG( A( PVT, J ) ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CGEMV( 'No Trans', N-J, J-K, -CONE, + $ A( J+1, K ), LDA, A( J, K ), LDA, CONE, + $ A( J+1, J ), 1 ) + CALL CLACGV( J-1, A( J, 1 ), LDA ) + CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 200 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL CHERK( 'Lower', 'No Trans', N-J+1, JB, -ONE, + $ A( J, K ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 210 CONTINUE +* + END IF + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 230 + 220 CONTINUE +* +* Rank is the number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 230 CONTINUE + RETURN +* +* End of CPSTRF +* + END diff --git a/dspl/liblapack/SRC/cptcon.f b/dspl/liblapack/SRC/cptcon.f new file mode 100644 index 0000000..8956d0b --- /dev/null +++ b/dspl/liblapack/SRC/cptcon.f @@ -0,0 +1,223 @@ +*> \brief \b CPTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL D( * ), RWORK( * ) +* COMPLEX E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTCON computes the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite tridiagonal matrix +*> using the factorization A = L*D*L**H or A = U**H*D*U computed by +*> CPTTRF. +*> +*> Norm(inv(A)) is computed by a direct method, and the reciprocal of +*> the condition number is computed as +*> RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization of A, as computed by CPTTRF. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal factor +*> U or L from the factorization of A, as computed by CPTTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +*> 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The method used is described in Nicholas J. Higham, "Efficient +*> Algorithms for Computing the Condition Number of a Tridiagonal +*> Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL D( * ), RWORK( * ) + COMPLEX E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + REAL AINVNM +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**H. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 20 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)**H * x = b. +* + RWORK( N ) = RWORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, RWORK, 1 ) + AINVNM = ABS( RWORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CPTCON +* + END diff --git a/dspl/liblapack/SRC/cpteqr.f b/dspl/liblapack/SRC/cpteqr.f new file mode 100644 index 0000000..07d0779 --- /dev/null +++ b/dspl/liblapack/SRC/cpteqr.f @@ -0,0 +1,263 @@ +*> \brief \b CPTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric positive definite tridiagonal matrix by first factoring the +*> matrix using SPTTRF and then calling CBDSQR to compute the singular +*> values of the bidiagonal factor. +*> +*> This routine computes the eigenvalues of the positive definite +*> tridiagonal matrix to high relative accuracy. This means that if the +*> eigenvalues range over many orders of magnitude in size, then the +*> small eigenvalues and corresponding eigenvectors will be computed +*> more accurately than, for example, with the standard QR method. +*> +*> The eigenvectors of a full or band positive definite Hermitian matrix +*> can also be found if CHETRD, CHPTRD, or CHBTRD has been used to +*> reduce this matrix to tridiagonal form. (The reduction to +*> tridiagonal form, however, may preclude the possibility of obtaining +*> high relative accuracy in the small eigenvalues of the original +*> matrix, if these eigenvalues range over many orders of magnitude.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvectors of original Hermitian +*> matrix also. Array Z contains the unitary matrix +*> used to reduce the original matrix to tridiagonal +*> form. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix. +*> On normal exit, D contains the eigenvalues, in descending +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix used in the +*> reduction to tridiagonal form. +*> On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +*> original Hermitian matrix; +*> if COMPZ = 'I', the orthonormal eigenvectors of the +*> tridiagonal matrix. +*> If INFO > 0 on exit, Z contains the eigenvectors associated +*> with only the stored eigenvalues. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> COMPZ = 'V' or 'I', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is: +*> <= N the Cholesky factorization of the matrix could +*> not be performed because the i-th principal minor +*> was not positive definite. +*> > N the SVD algorithm failed to converge; +*> if INFO = N+i, i off-diagonal elements of the +*> bidiagonal factor did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTcomputational +* +* ===================================================================== + SUBROUTINE CPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CBDSQR, CLASET, SPTTRF, XERBLA +* .. +* .. Local Arrays .. + COMPLEX C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Call SPTTRF to factor the matrix. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call CBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL CBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of CPTEQR +* + END diff --git a/dspl/liblapack/SRC/cptrfs.f b/dspl/liblapack/SRC/cptrfs.f new file mode 100644 index 0000000..a01ef90 --- /dev/null +++ b/dspl/liblapack/SRC/cptrfs.f @@ -0,0 +1,468 @@ +*> \brief \b CPTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), D( * ), DF( * ), FERR( * ), +* $ RWORK( * ) +* COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and tridiagonal, and provides error bounds and backward error +*> estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the superdiagonal or the subdiagonal of the +*> tridiagonal matrix A is stored and the form of the +*> factorization: +*> = 'U': E is the superdiagonal of A, and A = U**H*D*U; +*> = 'L': E is the subdiagonal of A, and A = L*D*L**H. +*> (The two forms are equivalent if A is real.) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n real diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix A +*> (see UPLO). +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from +*> the factorization computed by CPTTRF. +*> \endverbatim +*> +*> \param[in] EF +*> \verbatim +*> EF is COMPLEX array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal +*> factor U or L from the factorization computed by CPTTRF +*> (see UPLO). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CPTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTcomputational +* +* ===================================================================== + SUBROUTINE CPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IX, J, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX BI, CX, DX, EX, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 100 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( UPPER ) THEN + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = CONJG( E( I-1 ) )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 30 CONTINUE + BI = B( N, J ) + CX = CONJG( E( N-1 ) )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + ELSE + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = CONJG( E( 1 ) )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = CONJG( E( I ) )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 40 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO ) + CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE + IX = ISAMAX( N, RWORK, 1 ) + FERR( J ) = RWORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**H. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 70 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) ) + 70 CONTINUE +* +* Solve D * M(L)**H * x = b. +* + RWORK( N ) = RWORK( N ) / DF( N ) + DO 80 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / DF( I ) + + $ RWORK( I+1 )*ABS( EF( I ) ) + 80 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, RWORK, 1 ) + FERR( J ) = FERR( J )*ABS( RWORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 90 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 90 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 100 CONTINUE +* + RETURN +* +* End of CPTRFS +* + END diff --git a/dspl/liblapack/SRC/cptsv.f b/dspl/liblapack/SRC/cptsv.f new file mode 100644 index 0000000..7c6f1ed --- /dev/null +++ b/dspl/liblapack/SRC/cptsv.f @@ -0,0 +1,169 @@ +*> \brief CPTSV computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTSV computes the solution to a complex system of linear equations +*> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal +*> matrix, and X and B are N-by-NRHS matrices. +*> +*> A is factored as A = L*D*L**H, and the factored form of A is then +*> used to solve the system of equations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**H factorization of +*> A. E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**H*D*U factorization of A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the solution has not been +*> computed. The factorization has not been completed +*> unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTsolve +* +* ===================================================================== + SUBROUTINE CPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL CPTTRF, CPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L**H (or U**H*D*U) factorization of A. +* + CALL CPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of CPTSV +* + END diff --git a/dspl/liblapack/SRC/cptsvx.f b/dspl/liblapack/SRC/cptsvx.f new file mode 100644 index 0000000..0482f7f --- /dev/null +++ b/dspl/liblapack/SRC/cptsvx.f @@ -0,0 +1,343 @@ +*> \brief CPTSVX computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL BERR( * ), D( * ), DF( * ), FERR( * ), +* $ RWORK( * ) +* COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTSVX uses the factorization A = L*D*L**H to compute the solution +*> to a complex system of linear equations A*X = B, where A is an +*> N-by-N Hermitian positive definite tridiagonal matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L +*> is a unit lower bidiagonal matrix and D is diagonal. The +*> factorization can also be regarded as having the form +*> A = U**H*D*U. +*> +*> 2. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix +*> A is supplied on entry. +*> = 'F': On entry, DF and EF contain the factored form of A. +*> D, E, DF, and EF will not be modified. +*> = 'N': The matrix A will be copied to DF and EF and +*> factored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**H factorization of A. +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in,out] EF +*> \verbatim +*> EF is COMPLEX array, dimension (N-1) +*> If FACT = 'F', then EF is an input argument and on entry +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**H factorization of A. +*> If FACT = 'N', then EF is an output argument and on exit +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal condition number of the matrix A. If RCOND +*> is less than the machine precision (in particular, if +*> RCOND = 0), the matrix is singular to working precision. +*> This condition is indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in any +*> element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTsolve +* +* ===================================================================== + SUBROUTINE CPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANHT, SLAMCH + EXTERNAL LSAME, CLANHT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CPTCON, CPTRFS, CPTTRF, CPTTRS, + $ SCOPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L**H (or U**H*D*U) factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL CCOPY( N-1, E, 1, EF, 1 ) + CALL CPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANHT( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CPTSVX +* + END diff --git a/dspl/liblapack/SRC/cpttrf.f b/dspl/liblapack/SRC/cpttrf.f new file mode 100644 index 0000000..2f74714 --- /dev/null +++ b/dspl/liblapack/SRC/cpttrf.f @@ -0,0 +1,228 @@ +*> \brief \b CPTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTTRF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTTRF computes the L*D*L**H factorization of a complex Hermitian +*> positive definite tridiagonal matrix A. The factorization may also +*> be regarded as having the form A = U**H *D*U. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**H factorization of A. +*> E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**H *D*U factorization of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite; if k < N, the factorization could not +*> be completed, while if k = N, the factorization was +*> completed, but D(N) <= 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexPTcomputational +* +* ===================================================================== + SUBROUTINE CPTTRF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EII, EIR, F, G +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC AIMAG, CMPLX, MOD, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'CPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L**H (or U**H *D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII + 10 CONTINUE +* + DO 110 I = I4+1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 20 + END IF +* +* Solve for e(i) and d(i+1). +* + EIR = REAL( E( I ) ) + EII = AIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = CMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I+1 + GO TO 20 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EIR = REAL( E( I+1 ) ) + EII = AIMAG( E( I+1 ) ) + F = EIR / D( I+1 ) + G = EII / D( I+1 ) + E( I+1 ) = CMPLX( F, G ) + D( I+2 ) = D( I+2 ) - F*EIR - G*EII +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I+2 + GO TO 20 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EIR = REAL( E( I+2 ) ) + EII = AIMAG( E( I+2 ) ) + F = EIR / D( I+2 ) + G = EII / D( I+2 ) + E( I+2 ) = CMPLX( F, G ) + D( I+3 ) = D( I+3 ) - F*EIR - G*EII +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I+3 + GO TO 20 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EIR = REAL( E( I+3 ) ) + EII = AIMAG( E( I+3 ) ) + F = EIR / D( I+3 ) + G = EII / D( I+3 ) + E( I+3 ) = CMPLX( F, G ) + D( I+4 ) = D( I+4 ) - F*EIR - G*EII + 110 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 20 CONTINUE + RETURN +* +* End of CPTTRF +* + END diff --git a/dspl/liblapack/SRC/cpttrs.f b/dspl/liblapack/SRC/cpttrs.f new file mode 100644 index 0000000..8edf726 --- /dev/null +++ b/dspl/liblapack/SRC/cpttrs.f @@ -0,0 +1,208 @@ +*> \brief \b CPTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTTRS solves a tridiagonal system of the form +*> A * X = B +*> using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF. +*> D is a diagonal matrix specified in the vector D, U (or L) is a unit +*> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +*> the vector E, and X and B are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the form of the factorization and whether the +*> vector E is the superdiagonal of the upper bidiagonal factor +*> U or the subdiagonal of the lower bidiagonal factor L. +*> = 'U': A = U**H*D*U, E is the superdiagonal of U +*> = 'L': A = L*D*L**H, E is the subdiagonal of L +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization A = U**H*D*U or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> If UPLO = 'U', the (n-1) superdiagonal elements of the unit +*> bidiagonal factor U from the factorization A = U**H*D*U. +*> If UPLO = 'L', the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexPTcomputational +* +* ===================================================================== + SUBROUTINE CPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER IUPLO, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' ) + IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'CPTTRS', UPLO, N, NRHS, -1, -1 ) ) + END IF +* +* Decode UPLO +* + IF( UPPER ) THEN + IUPLO = 1 + ELSE + IUPLO = 0 + END IF +* + IF( NB.GE.NRHS ) THEN + CALL CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL CPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of CPTTRS +* + END diff --git a/dspl/liblapack/SRC/cptts2.f b/dspl/liblapack/SRC/cptts2.f new file mode 100644 index 0000000..c2b2b04 --- /dev/null +++ b/dspl/liblapack/SRC/cptts2.f @@ -0,0 +1,245 @@ +*> \brief \b CPTTS2 solves a tridiagonal system of the form AX=B using the L D LH factorization computed by spttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CPTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL D( * ) +* COMPLEX B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CPTTS2 solves a tridiagonal system of the form +*> A * X = B +*> using the factorization A = U**H*D*U or A = L*D*L**H computed by CPTTRF. +*> D is a diagonal matrix specified in the vector D, U (or L) is a unit +*> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +*> the vector E, and X and B are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IUPLO +*> \verbatim +*> IUPLO is INTEGER +*> Specifies the form of the factorization and whether the +*> vector E is the superdiagonal of the upper bidiagonal factor +*> U or the subdiagonal of the lower bidiagonal factor L. +*> = 1: A = U**H *D*U, E is the superdiagonal of U +*> = 0: A = L*D*L**H, E is the subdiagonal of L +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization A = U**H *D*U or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N-1) +*> If IUPLO = 1, the (n-1) superdiagonal elements of the unit +*> bidiagonal factor U from the factorization A = U**H*D*U. +*> If IUPLO = 0, the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexPTcomputational +* +* ===================================================================== + SUBROUTINE CPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL D( * ) + COMPLEX B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL CSSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* + IF( IUPLO.EQ.1 ) THEN +* +* Solve A * X = B using the factorization A = U**H *D*U, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 5 CONTINUE +* +* Solve U**H * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 10 CONTINUE +* +* Solve D * U * x = b. +* + DO 20 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 20 CONTINUE + DO 30 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 5 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve U**H * x = b. +* + DO 40 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) ) + 40 CONTINUE +* +* Solve D * U * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 50 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A * X = B using the factorization A = L*D*L**H, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 65 CONTINUE +* +* Solve L * x = b. +* + DO 70 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 70 CONTINUE +* +* Solve D * L**H * x = b. +* + DO 80 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 80 CONTINUE + DO 90 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*CONJG( E( I ) ) + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 65 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve L * x = b. +* + DO 100 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 100 CONTINUE +* +* Solve D * L**H * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 110 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - + $ B( I+1, J )*CONJG( E( I ) ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CPTTS2 +* + END diff --git a/dspl/liblapack/SRC/crot.f b/dspl/liblapack/SRC/crot.f new file mode 100644 index 0000000..849b9be --- /dev/null +++ b/dspl/liblapack/SRC/crot.f @@ -0,0 +1,162 @@ +*> \brief \b CROT applies a plane rotation with real cosine and complex sine to a pair of complex vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CROT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CROT( N, CX, INCX, CY, INCY, C, S ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* REAL C +* COMPLEX S +* .. +* .. Array Arguments .. +* COMPLEX CX( * ), CY( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CROT applies a plane rotation, where the cos (C) is real and the +*> sin (S) is complex, and the vectors CX and CY are complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vectors CX and CY. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX array, dimension (N) +*> On input, the vector X. +*> On output, CX is overwritten with C*X + S*Y. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of CY. INCX <> 0. +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX array, dimension (N) +*> On input, the vector Y. +*> On output, CY is overwritten with -CONJG(S)*X + C*Y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive values of CY. INCX <> 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX +*> C and S define a rotation +*> [ C S ] +*> [ -conjg(S) C ] +*> where C*C + S*CONJG(S) = 1.0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CROT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL C + COMPLEX S +* .. +* .. Array Arguments .. + COMPLEX CX( * ), CY( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + STEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - CONJG( S )*CX( IX ) + CX( IX ) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + STEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - CONJG( S )*CX( I ) + CX( I ) = STEMP + 30 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/cspcon.f b/dspl/liblapack/SRC/cspcon.f new file mode 100644 index 0000000..f8bcc77 --- /dev/null +++ b/dspl/liblapack/SRC/cspcon.f @@ -0,0 +1,231 @@ +*> \brief \b CSPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric packed matrix A using the +*> factorization A = U*D*U**T or A = L*D*L**T computed by CSPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL CSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSPCON +* + END diff --git a/dspl/liblapack/SRC/cspmv.f b/dspl/liblapack/SRC/cspmv.f new file mode 100644 index 0000000..80f2cef --- /dev/null +++ b/dspl/liblapack/SRC/cspmv.f @@ -0,0 +1,340 @@ +*> \brief \b CSPMV computes a matrix-vector product for complex vectors using a complex symmetric packed matrix +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, INCY, N +* COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n symmetric matrix, supplied in packed form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the matrix A is supplied in the packed +*> array AP as follows: +*> +*> UPLO = 'U' or 'u' The upper triangular part of A is +*> supplied in AP. +*> +*> UPLO = 'L' or 'l' The lower triangular part of A is +*> supplied in AP. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension at least +*> ( ( N*( N + 1 ) )/2 ). +*> Before entry, with UPLO = 'U' or 'u', the array AP must +*> contain the upper triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +*> and a( 2, 2 ) respectively, and so on. +*> Before entry, with UPLO = 'L' or 'l', the array AP must +*> contain the lower triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +*> and a( 3, 1 ) respectively, and so on. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX AP( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY + COMPLEX TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 6 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N-J+1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110 K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N-J+1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSPMV +* + END diff --git a/dspl/liblapack/SRC/cspr.f b/dspl/liblapack/SRC/cspr.f new file mode 100644 index 0000000..2ea80c1 --- /dev/null +++ b/dspl/liblapack/SRC/cspr.f @@ -0,0 +1,280 @@ +*> \brief \b CSPR performs the symmetrical rank-1 update of a complex symmetric packed matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, N +* COMPLEX ALPHA +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPR performs the symmetric rank 1 operation +*> +*> A := alpha*x*x**H + A, +*> +*> where alpha is a complex scalar, x is an n element vector and A is an +*> n by n symmetric matrix, supplied in packed form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the matrix A is supplied in the packed +*> array AP as follows: +*> +*> UPLO = 'U' or 'u' The upper triangular part of A is +*> supplied in AP. +*> +*> UPLO = 'L' or 'l' The lower triangular part of A is +*> supplied in AP. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension at least +*> ( ( N*( N + 1 ) )/2 ). +*> Before entry, with UPLO = 'U' or 'u', the array AP must +*> contain the upper triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +*> and a( 2, 2 ) respectively, and so on. On exit, the array +*> AP is overwritten by the upper triangular part of the +*> updated matrix. +*> Before entry, with UPLO = 'L' or 'l', the array AP must +*> contain the lower triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +*> and a( 3, 1 ) respectively, and so on. On exit, the array +*> AP is overwritten by the lower triangular part of the +*> updated matrix. +*> Note that the imaginary parts of the diagonal elements need +*> not be set, they are assumed to be zero, and on exit they +*> are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, N + COMPLEX ALPHA +* .. +* .. Array Arguments .. + COMPLEX AP( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, K, KK, KX + COMPLEX TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10 I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + AP( KK ) = AP( KK ) + TEMP*X( J ) + K = KK + 1 + DO 50 I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + AP( KK ) = AP( KK ) + TEMP*X( JX ) + IX = JX + DO 70 K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSPR +* + END diff --git a/dspl/liblapack/SRC/csprfs.f b/dspl/liblapack/SRC/csprfs.f new file mode 100644 index 0000000..4ab1220 --- /dev/null +++ b/dspl/liblapack/SRC/csprfs.f @@ -0,0 +1,437 @@ +*> \brief \b CSPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by CSPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CSPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACN2, CSPMV, CSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CSPRFS +* + END diff --git a/dspl/liblapack/SRC/cspsv.f b/dspl/liblapack/SRC/cspsv.f new file mode 100644 index 0000000..4bf0781 --- /dev/null +++ b/dspl/liblapack/SRC/cspsv.f @@ -0,0 +1,224 @@ +*> \brief CSPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is symmetric and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by CSPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSPTRF, CSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of CSPSV +* + END diff --git a/dspl/liblapack/SRC/cspsvx.f b/dspl/liblapack/SRC/cspsvx.f new file mode 100644 index 0000000..821d82a --- /dev/null +++ b/dspl/liblapack/SRC/cspsvx.f @@ -0,0 +1,387 @@ +*> \brief CSPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +*> A = L*D*L**T to compute the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form +*> of A. AP, AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by CSPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by CSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANSP, SLAMCH + EXTERNAL LSAME, CLANSP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLACPY, CSPCON, CSPRFS, CSPTRF, CSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL CSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANSP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of CSPSVX +* + END diff --git a/dspl/liblapack/SRC/csptrf.f b/dspl/liblapack/SRC/csptrf.f new file mode 100644 index 0000000..3499cb4 --- /dev/null +++ b/dspl/liblapack/SRC/csptrf.f @@ -0,0 +1,619 @@ +*> \brief \b CSPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPTRF computes the factorization of a complex symmetric matrix A +*> stored in packed format using the Bunch-Kaufman diagonal pivoting +*> method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSPR, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = CONE / AP( KC+K-1 ) + CALL CSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = CONE / AP( KC ) + CALL CSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL CSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of CSPTRF +* + END diff --git a/dspl/liblapack/SRC/csptri.f b/dspl/liblapack/SRC/csptri.f new file mode 100644 index 0000000..d8fed87 --- /dev/null +++ b/dspl/liblapack/SRC/csptri.f @@ -0,0 +1,404 @@ +*> \brief \b CSPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPTRI computes the inverse of a complex symmetric indefinite matrix +*> A in packed storage using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by CSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + COMPLEX AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTU + EXTERNAL LSAME, CDOTU +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSPMV, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ CDOTU( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+K-1 ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ CDOTU( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ CDOTU( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ CDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+1 ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ CDOTU( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ CDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of CSPTRI +* + END diff --git a/dspl/liblapack/SRC/csptrs.f b/dspl/liblapack/SRC/csptrs.f new file mode 100644 index 0000000..b13c651 --- /dev/null +++ b/dspl/liblapack/SRC/csptrs.f @@ -0,0 +1,450 @@ +*> \brief \b CSPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSPTRS solves a system of linear equations A*X = B with a complex +*> symmetric matrix A stored in packed format using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by CSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CSPTRS +* + END diff --git a/dspl/liblapack/SRC/csrscl.f b/dspl/liblapack/SRC/csrscl.f new file mode 100644 index 0000000..e85168e --- /dev/null +++ b/dspl/liblapack/SRC/csrscl.f @@ -0,0 +1,174 @@ +*> \brief \b CSRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSRSCL( N, SA, SX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL SA +* .. +* .. Array Arguments .. +* COMPLEX SX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSRSCL multiplies an n-element complex vector x by the real scalar +*> 1/a. This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> The scalar a which is used to divide each component of x. +*> SA must be >= 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is COMPLEX array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector SX. +*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CSRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + COMPLEX SX( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL CSSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of CSRSCL +* + END diff --git a/dspl/liblapack/SRC/cstedc.f b/dspl/liblapack/SRC/cstedc.f new file mode 100644 index 0000000..4a00fba --- /dev/null +++ b/dspl/liblapack/SRC/cstedc.f @@ -0,0 +1,486 @@ +*> \brief \b CSTEDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSTEDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, +* LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), RWORK( * ) +* COMPLEX WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSTEDC computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> The eigenvectors of a full or band complex Hermitian matrix can also +*> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See SLAED3 for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> = 'V': Compute eigenvectors of original Hermitian matrix +*> also. On entry, Z contains the unitary matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the subdiagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> On entry, if COMPZ = 'V', then Z contains the unitary +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original Hermitian matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. +*> If COMPZ = 'V' and N > 1, LWORK must be at least N*N. +*> Note that for COMPZ = 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LWORK need +*> only be 1. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. +*> If COMPZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 3*N + 2*N*lg N + 4*N**2 , +*> where lg( N ) = smallest integer k such +*> that 2**k >= N. +*> If COMPZ = 'I' and N > 1, LRWORK must be at least +*> 1 + 4*N + 2*N**2 . +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LRWORK +*> need only be max(1,2*(N-1)). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If COMPZ = 'V' or N > 1, LIWORK must be at least +*> 6 + 6*N + 5*N*lg N. +*> If COMPZ = 'I' or N > 1, LIWORK must be at least +*> 3 + 5*N . +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LIWORK +*> need only be 1. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE CSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), RWORK( * ) + COMPLEX WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FINISH, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL, + $ LRWMIN, LWMIN, M, SMLSIZ, START + REAL EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CLACPY, CLACRM, CLAED0, CSTEQR, CSWAP, + $ SLASCL, SLASET, SSTEDC, SSTEQR, SSTERF +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MOD, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. + $ ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + SMLSIZ = ILAENV( 9, 'CSTEDC', ' ', 0, 0, 0, 0 ) + IF( N.LE.1 .OR. ICOMPZ.EQ.0 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE IF( N.LE.SMLSIZ ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 2*( N - 1 ) + ELSE IF( ICOMPZ.EQ.1 ) THEN + LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWMIN = N*N + LRWMIN = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + LRWMIN = 1 + 4*N + 2*N**2 + LIWMIN = 3 + 5*N + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures SSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. If the conditional clause is removed, then +* information on the size of workspace needs to be changed. +* +* If COMPZ = 'N', use SSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + GO TO 70 + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN +* + CALL CSTEQR( COMPZ, N, D, E, Z, LDZ, RWORK, INFO ) +* + ELSE +* +* If COMPZ = 'I', we simply call SSTEDC instead. +* + IF( ICOMPZ.EQ.2 ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, RWORK, N ) + LL = N*N + 1 + CALL SSTEDC( 'I', N, D, E, RWORK, N, + $ RWORK( LL ), LRWORK-LL+1, IWORK, LIWORK, INFO ) + DO 20 J = 1, N + DO 10 I = 1, N + Z( I, J ) = RWORK( ( J-1 )*N+I ) + 10 CONTINUE + 20 CONTINUE + GO TO 70 + END IF +* +* From now on, only option left to be handled is COMPZ = 'V', +* i.e. ICOMPZ = 1. +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ GO TO 70 +* + EPS = SLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 30 CONTINUE + IF( START.LE.N ) THEN +* +* Let FINISH be the position of the next subdiagonal entry +* such that E( FINISH ) <= TINY or FINISH = N if no such +* subdiagonal exists. The matrix identified by the elements +* between START and FINISH constitutes an independent +* sub-problem. +* + FINISH = START + 40 CONTINUE + IF( FINISH.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( FINISH ) ) )* + $ SQRT( ABS( D( FINISH+1 ) ) ) + IF( ABS( E( FINISH ) ).GT.TINY ) THEN + FINISH = FINISH + 1 + GO TO 40 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = FINISH - START + 1 + IF( M.GT.SMLSIZ ) THEN +* +* Scale. +* + ORGNRM = SLANST( 'M', M, D( START ), E( START ) ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + CALL CLAED0( N, M, D( START ), E( START ), Z( 1, START ), + $ LDZ, WORK, N, RWORK, IWORK, INFO ) + IF( INFO.GT.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + GO TO 70 + END IF +* +* Scale back. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + CALL SSTEQR( 'I', M, D( START ), E( START ), RWORK, M, + $ RWORK( M*M+1 ), INFO ) + CALL CLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N, + $ RWORK( M*M+1 ) ) + CALL CLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ ) + IF( INFO.GT.0 ) THEN + INFO = START*( N+1 ) + FINISH + GO TO 70 + END IF + END IF +* + START = FINISH + 1 + GO TO 30 + END IF +* +* endwhile +* +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 60 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 50 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 60 CONTINUE + END IF +* + 70 CONTINUE + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CSTEDC +* + END diff --git a/dspl/liblapack/SRC/cstegr.f b/dspl/liblapack/SRC/cstegr.f new file mode 100644 index 0000000..98b82c1 --- /dev/null +++ b/dspl/liblapack/SRC/cstegr.f @@ -0,0 +1,302 @@ +*> \brief \b CSTEGR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSTEGR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSTEGR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> CSTEGR is a compatibility wrapper around the improved CSTEMR routine. +*> See SSTEMR for further details. +*> +*> One important change is that the ABSTOL parameter no longer provides any +*> benefit and hence is no longer used. +*> +*> Note : CSTEGR and CSTEMR work only on machines which follow +*> IEEE-754 floating-point standard in their handling of infinities and +*> NaNs. Normal execution may create these exceptiona values and hence +*> may abort due to a floating point exception in environments which +*> do not conform to the IEEE-754 standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> Unused. Was the absolute error tolerance for the +*> eigenvalues/eigenvectors in previous versions. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in SLARRE, +*> if INFO = 2X, internal error in CLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by SLARRE or +*> CLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE CSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL TRYRAC +* .. +* .. External Subroutines .. + EXTERNAL CSTEMR +* .. +* .. Executable Statements .. + INFO = 0 + TRYRAC = .FALSE. + + CALL CSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, N, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* End of CSTEGR +* + END diff --git a/dspl/liblapack/SRC/cstein.f b/dspl/liblapack/SRC/cstein.f new file mode 100644 index 0000000..4f6687d --- /dev/null +++ b/dspl/liblapack/SRC/cstein.f @@ -0,0 +1,469 @@ +*> \brief \b CSTEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSTEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), +* $ IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSTEIN computes the eigenvectors of a real symmetric tridiagonal +*> matrix T corresponding to specified eigenvalues, using inverse +*> iteration. +*> +*> The maximum number of iterations allowed for each eigenvector is +*> specified by an internal parameter MAXITS (currently set to 5). +*> +*> Although the eigenvectors are real, they are stored in a complex +*> array, which may be passed to CUNMTR or CUPMTR for back +*> transformation to the eigenvectors of a complex Hermitian matrix +*> which was reduced to tridiagonal form. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix +*> T, stored in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of eigenvectors to be found. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements of W contain the eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block. ( The output array +*> W from SSTEBZ with ORDER = 'B' is expected here. ) +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The submatrix indices associated with the corresponding +*> eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +*> the first submatrix from the top, =2 if W(i) belongs to +*> the second submatrix, etc. ( The output array IBLOCK +*> from SSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> ( The output array ISPLIT from SSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, M) +*> The computed eigenvectors. The eigenvector associated +*> with the eigenvalue W(i) is stored in the i-th column of +*> Z. Any vector which fails to converge is set to its current +*> iterate after MAXITS iterations. +*> The imaginary parts of the eigenvectors are set to zero. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> On normal exit, all elements of IFAIL are zero. +*> If one or more eigenvectors fail to converge after +*> MAXITS iterations, then their indices are stored in +*> array IFAIL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in MAXITS iterations. Their indices are stored in +*> array IFAIL. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXITS INTEGER, default = 5 +*> The maximum number of iterations performed. +*> +*> EXTRA INTEGER, default = 2 +*> The number of iterations performed after norm growth +*> criterion is satisfied, should be at least 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1, + $ ODM3 = 1.0E-3, ODM1 = 1.0E-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, JR, NBLK, NRMCHK + REAL CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, STPCRT, TOL, XJ, XJM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + EPS = SLAMCH( 'Precision' ) +* +* Initialize seed for random number generator SLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 180 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = J1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + STPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 170 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 180 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 140 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 120 +* +* Normalize and scale the righthand side vector Pb. +* + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ ABS( WORK( INDRV1+JMAX ) ) + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 110 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 100 I = GPIND, J - 1 + CTR = ZERO + DO 80 JR = 1, BLKSIZ + CTR = CTR + WORK( INDRV1+JR )* + $ REAL( Z( B1-1+JR, I ) ) + 80 CONTINUE + DO 90 JR = 1, BLKSIZ + WORK( INDRV1+JR ) = WORK( INDRV1+JR ) - + $ CTR*REAL( Z( B1-1+JR, I ) ) + 90 CONTINUE + 100 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 110 CONTINUE + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.STPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 130 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 120 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 130 CONTINUE + SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 140 CONTINUE + DO 150 I = 1, N + Z( I, J ) = CZERO + 150 CONTINUE + DO 160 I = 1, BLKSIZ + Z( B1+I-1, J ) = CMPLX( WORK( INDRV1+I ), ZERO ) + 160 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 170 CONTINUE + 180 CONTINUE +* + RETURN +* +* End of CSTEIN +* + END diff --git a/dspl/liblapack/SRC/cstemr.f b/dspl/liblapack/SRC/cstemr.f new file mode 100644 index 0000000..22ac842 --- /dev/null +++ b/dspl/liblapack/SRC/cstemr.f @@ -0,0 +1,791 @@ +*> \brief \b CSTEMR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSTEMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* LOGICAL TRYRAC +* INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N +* REAL VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSTEMR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> Depending on the number of desired eigenvalues, these are computed either +*> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are +*> computed by the use of various suitable L D L^T factorizations near clusters +*> of close eigenvalues (referred to as RRRs, Relatively Robust +*> Representations). An informal sketch of the algorithm follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> For more details, see: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> Further Details +*> 1.CSTEMR works only on machines which follow IEEE-754 +*> floating-point standard in their handling of infinities and NaNs. +*> This permits the use of efficient inner loops avoiding a check for +*> zero divisors. +*> +*> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to +*> real symmetric tridiagonal form. +*> +*> (Any complex Hermitean tridiagonal matrix has real values on its diagonal +*> and potentially complex numbers on its off-diagonals. By applying a +*> similarity transform with an appropriate diagonal matrix +*> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean +*> matrix can be transformed into a real symmetric matrix and complex +*> arithmetic can be entirely avoided.) +*> +*> While the eigenvectors of the real symmetric tridiagonal matrix are real, +*> the eigenvectors of original complex Hermitean matrix have complex entries +*> in general. +*> Since LAPACK drivers overwrite the matrix data with the eigenvectors, +*> CSTEMR accepts complex workspace to facilitate interoperability +*> with CUNMTR or CUPMTR. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and can be computed with a workspace +*> query by setting NZC = -1, see below. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[in] NZC +*> \verbatim +*> NZC is INTEGER +*> The number of eigenvectors to be held in the array Z. +*> If RANGE = 'A', then NZC >= max(1,N). +*> If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. +*> If RANGE = 'I', then NZC >= IU-IL+1. +*> If NZC = -1, then a workspace query is assumed; the +*> routine calculates the number of columns of the array Z that +*> are needed to hold the eigenvectors. +*> This value is returned as the first entry of the Z array, and +*> no error message related to NZC is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[in,out] TRYRAC +*> \verbatim +*> TRYRAC is LOGICAL +*> If TRYRAC.EQ..TRUE., indicates that the code should check whether +*> the tridiagonal matrix defines its eigenvalues to high relative +*> accuracy. If so, the code uses relative-accuracy preserving +*> algorithms that might be (a bit) slower depending on the matrix. +*> If the matrix does not define its eigenvalues to high relative +*> accuracy, the code can uses possibly faster algorithms. +*> If TRYRAC.EQ..FALSE., the code is not required to guarantee +*> relatively accurate eigenvalues and can use the fastest possible +*> techniques. +*> On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix +*> does not define its eigenvalues to high relative accuracy. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in SLARRE, +*> if INFO = 2X, internal error in CLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by SLARRE or +*> CLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE CSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + LOGICAL TRYRAC + INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N + REAL VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, FOUR, MINRGP + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ FOUR = 4.0E0, + $ MINRGP = 3.0E-3 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, + $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, + $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, + $ ITMP2, J, JBLK, JJ, LIWMIN, LWMIN, NSPLIT, + $ NZCMIN, OFFSET, WBEGIN, WEND + REAL BIGNUM, CS, EPS, PIVMIN, R1, R2, RMAX, RMIN, + $ RTOL1, RTOL2, SAFMIN, SCALE, SMLNUM, SN, + $ THRESH, TMP, TNRM, WL, WU +* .. +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL CLARRV, CSWAP, SCOPY, SLAE2, SLAEV2, SLARRC, + $ SLARRE, SLARRJ, SLARRR, SLASRT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT + + +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) + ZQUERY = ( NZC.EQ.-1 ) + +* SSTEMR needs WORK of size 6*N, IWORK of size 3*N. +* In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N. +* Furthermore, CLARRV needs WORK of size 12*N, IWORK of size 7*N. + IF( WANTZ ) THEN + LWMIN = 18*N + LIWMIN = 10*N + ELSE +* need less workspace if only the eigenvalues are wanted + LWMIN = 12*N + LIWMIN = 8*N + ENDIF + + WL = ZERO + WU = ZERO + IIL = 0 + IIU = 0 + NSPLIT = 0 + + IF( VALEIG ) THEN +* We do not reference VL, VU in the cases RANGE = 'I','A' +* The interval (WL, WU] contains all the wanted eigenvalues. +* It is either given by the user or computed in SLARRE. + WL = VL + WU = VU + ELSEIF( INDEIG ) THEN +* We do not reference IL, IU in the cases RANGE = 'V','A' + IIL = IL + IIU = IU + ENDIF +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. WU.LE.WL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. ( IIL.LT.1 .OR. IIL.GT.N ) ) THEN + INFO = -8 + ELSE IF( INDEIG .AND. ( IIU.LT.IIL .OR. IIU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( WANTZ .AND. ALLEIG ) THEN + NZCMIN = N + ELSE IF( WANTZ .AND. VALEIG ) THEN + CALL SLARRC( 'T', N, VL, VU, D, E, SAFMIN, + $ NZCMIN, ITMP, ITMP2, INFO ) + ELSE IF( WANTZ .AND. INDEIG ) THEN + NZCMIN = IIU-IIL+1 + ELSE +* WANTZ .EQ. FALSE. + NZCMIN = 0 + ENDIF + IF( ZQUERY .AND. INFO.EQ.0 ) THEN + Z( 1,1 ) = NZCMIN + ELSE IF( NZC.LT.NZCMIN .AND. .NOT.ZQUERY ) THEN + INFO = -14 + END IF + END IF + + IF( INFO.NE.0 ) THEN +* + CALL XERBLA( 'CSTEMR', -INFO ) +* + RETURN + ELSE IF( LQUERY .OR. ZQUERY ) THEN + RETURN + END IF +* +* Handle N = 0, 1, and 2 cases immediately +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( WL.LT.D( 1 ) .AND. WU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, 1 ) = ONE + ISUPPZ(1) = 1 + ISUPPZ(2) = 1 + END IF + RETURN + END IF +* + IF( N.EQ.2 ) THEN + IF( .NOT.WANTZ ) THEN + CALL SLAE2( D(1), E(1), D(2), R1, R2 ) + ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) + END IF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R2.GT.WL).AND. + $ (R2.LE.WU)).OR. + $ (INDEIG.AND.(IIL.EQ.1)) ) THEN + M = M+1 + W( M ) = R2 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R1.GT.WL).AND. + $ (R1.LE.WU)).OR. + $ (INDEIG.AND.(IIU.EQ.2)) ) THEN + M = M+1 + W( M ) = R1 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + ELSE + +* Continue with general N + + INDGRS = 1 + INDERR = 2*N + 1 + INDGP = 3*N + 1 + INDD = 4*N + 1 + INDE2 = 5*N + 1 + INDWRK = 6*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDW = 2*N + 1 + IINDWK = 3*N + 1 +* +* Scale matrix to allowable range, if necessary. +* The allowable range is related to the PIVMIN parameter; see the +* comments in SLARRD. The preference for scaling small values +* up is heuristic; we expect users' matrices not to be close to the +* RMAX threshold. +* + SCALE = ONE + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N, SCALE, D, 1 ) + CALL SSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + IF( VALEIG ) THEN +* If eigenvalues in interval have to be found, +* scale (WL, WU] accordingly + WL = WL*SCALE + WU = WU*SCALE + ENDIF + END IF +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding off-diagonal elements +* are small +* THRESH is the splitting parameter for SLARRE +* A negative THRESH forces the old splitting criterion based on the +* size of the off-diagonal. A positive THRESH switches to splitting +* which preserves relative accuracy. +* + IF( TRYRAC ) THEN +* Test whether the matrix warrants the more expensive relative approach. + CALL SLARRR( N, D, E, IINFO ) + ELSE +* The user does not care about relative accurately eigenvalues + IINFO = -1 + ENDIF +* Set the splitting criterion + IF (IINFO.EQ.0) THEN + THRESH = EPS + ELSE + THRESH = -EPS +* relative accuracy is desired but T does not guarantee it + TRYRAC = .FALSE. + ENDIF +* + IF( TRYRAC ) THEN +* Copy original diagonal, needed to guarantee relative accuracy + CALL SCOPY(N,D,1,WORK(INDD),1) + ENDIF +* Store the squares of the offdiagonal values of T + DO 5 J = 1, N-1 + WORK( INDE2+J-1 ) = E(J)**2 + 5 CONTINUE + +* Set the tolerance parameters for bisection + IF( .NOT.WANTZ ) THEN +* SLARRE computes the eigenvalues to full precision. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ELSE +* SLARRE computes the eigenvalues to less than full precision. +* CLARRV will refine the eigenvalue approximations, and we only +* need less accurate initial bisection in SLARRE. +* Note: these settings do only affect the subset case and SLARRE + RTOL1 = MAX( SQRT(EPS)*5.0E-2, FOUR * EPS ) + RTOL2 = MAX( SQRT(EPS)*5.0E-3, FOUR * EPS ) + ENDIF + CALL SLARRE( RANGE, N, WL, WU, IIL, IIU, D, E, + $ WORK(INDE2), RTOL1, RTOL2, THRESH, NSPLIT, + $ IWORK( IINSPL ), M, W, WORK( INDERR ), + $ WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), PIVMIN, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 10 + ABS( IINFO ) + RETURN + END IF +* Note that if RANGE .NE. 'V', SLARRE computes bounds on the desired +* part of the spectrum. All desired eigenvalues are contained in +* (WL,WU] + + + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + CALL CLARRV( N, WL, WU, D, E, + $ PIVMIN, IWORK( IINSPL ), M, + $ 1, M, MINRGP, RTOL1, RTOL2, + $ W, WORK( INDERR ), WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), Z, LDZ, + $ ISUPPZ, WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 20 + ABS( IINFO ) + RETURN + END IF + ELSE +* SLARRE computes eigenvalues of the (shifted) root representation +* CLARRV returns the eigenvalues of the unshifted matrix. +* However, if the eigenvectors are not desired by the user, we need +* to apply the corresponding shifts from SLARRE to obtain the +* eigenvalues of the original matrix. + DO 20 J = 1, M + ITMP = IWORK( IINDBL+J-1 ) + W( J ) = W( J ) + E( IWORK( IINSPL+ITMP-1 ) ) + 20 CONTINUE + END IF +* + + IF ( TRYRAC ) THEN +* Refine computed eigenvalues so that they are relatively accurate +* with respect to the original matrix T. + IBEGIN = 1 + WBEGIN = 1 + DO 39 JBLK = 1, IWORK( IINDBL+M-1 ) + IEND = IWORK( IINSPL+JBLK-1 ) + IN = IEND - IBEGIN + 1 + WEND = WBEGIN - 1 +* check if any eigenvalues have to be refined in this block + 36 CONTINUE + IF( WEND.LT.M ) THEN + IF( IWORK( IINDBL+WEND ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 36 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 39 + END IF + + OFFSET = IWORK(IINDW+WBEGIN-1)-1 + IFIRST = IWORK(IINDW+WBEGIN-1) + ILAST = IWORK(IINDW+WEND-1) + RTOL2 = FOUR * EPS + CALL SLARRJ( IN, + $ WORK(INDD+IBEGIN-1), WORK(INDE2+IBEGIN-1), + $ IFIRST, ILAST, RTOL2, OFFSET, W(WBEGIN), + $ WORK( INDERR+WBEGIN-1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), PIVMIN, + $ TNRM, IINFO ) + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 39 CONTINUE + ENDIF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( M, ONE / SCALE, W, 1 ) + END IF + END IF +* +* If eigenvalues are not in increasing order, then sort them, +* possibly along with eigenvectors. +* + IF( NSPLIT.GT.1 .OR. N.EQ.2 ) THEN + IF( .NOT. WANTZ ) THEN + CALL SLASRT( 'I', M, W, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 3 + RETURN + END IF + ELSE + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF + ENDIF +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of CSTEMR +* + END diff --git a/dspl/liblapack/SRC/csteqr.f b/dspl/liblapack/SRC/csteqr.f new file mode 100644 index 0000000..fa95db6 --- /dev/null +++ b/dspl/liblapack/SRC/csteqr.f @@ -0,0 +1,576 @@ +*> \brief \b CSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ) +* COMPLEX Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band complex Hermitian matrix can also +*> be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> Hermitian matrix. On entry, Z must contain the +*> unitary matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the unitary +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original Hermitian matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is unitarily similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) + COMPLEX Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ), + $ CONE = ( 1.0E0, 0.0E0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL CLASET, CLASR, CSWAP, SLAE2, SLAEV2, SLARTG, + $ SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL CLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL CLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL CLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL CLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of CSTEQR +* + END diff --git a/dspl/liblapack/SRC/csycon.f b/dspl/liblapack/SRC/csycon.f new file mode 100644 index 0000000..dbd6957 --- /dev/null +++ b/dspl/liblapack/SRC/csycon.f @@ -0,0 +1,239 @@ +*> \brief \b CSYCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by CSYTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL CSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSYCON +* + END diff --git a/dspl/liblapack/SRC/csycon_3.f b/dspl/liblapack/SRC/csycon_3.f new file mode 100644 index 0000000..47d52dd --- /dev/null +++ b/dspl/liblapack/SRC/csycon_3.f @@ -0,0 +1,287 @@ +*> \brief \b CSYCON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* COMPLEX A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYCON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> computed by CSYTRF_RK or CSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver CSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by CSYTRF_RK and CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_RK or CSYTRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSYTRS_3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL CSYTRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSYCON_3 +* + END diff --git a/dspl/liblapack/SRC/csycon_rook.f b/dspl/liblapack/SRC/csycon_rook.f new file mode 100644 index 0000000..85320ba --- /dev/null +++ b/dspl/liblapack/SRC/csycon_rook.f @@ -0,0 +1,255 @@ +*> \brief CSYCON_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYCON_ROOK estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by CSYTRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CSYTRS_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL CSYTRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of CSYCON_ROOK +* + END diff --git a/dspl/liblapack/SRC/csyconv.f b/dspl/liblapack/SRC/csyconv.f new file mode 100644 index 0000000..0937f82 --- /dev/null +++ b/dspl/liblapack/SRC/csyconv.f @@ -0,0 +1,366 @@ +*> \brief \b CSYCONV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCONV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYCONV convert A given by TRF into L and D and vice-versa. +*> Get Non-diag elements of D (returned in workspace) and +*> apply or reverse permutation done in TRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> E stores the supdiagonal/subdiagonal of the symmetric 1-by-1 +*> or 2-by-2 block diagonal matrix D in LDLT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = (0.0E+0,0.0E+0) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, J + COMPLEX TEMP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCONV', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* A is UPPER +* +* Convert A (A is upper) +* +* Convert VALUE +* + IF ( CONVERT ) THEN + I=N + E(1)=ZERO + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + E(I)=A(I-1,I) + E(I-1)=ZERO + A(I-1,I)=ZERO + I=I-1 + ELSE + E(I)=ZERO + ENDIF + I=I-1 + END DO +* +* Convert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO 12 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 12 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF( I .LT. N) THEN + DO 13 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + 13 CONTINUE + ENDIF + I=I-1 + ENDIF + I=I-1 + END DO + + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I+1 + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + END DO + ENDIF + ENDIF + I=I+1 + END DO +* +* Revert VALUE +* + I=N + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I-1,I)=E(I) + I=I-1 + ENDIF + I=I-1 + END DO + END IF + ELSE +* +* A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* + I=1 + E(N)=ZERO + DO WHILE ( I .LE. N ) + IF( I.LT.N .AND. IPIV(I) .LT. 0 ) THEN + E(I)=A(I+1,I) + E(I+1)=ZERO + A(I+1,I)=ZERO + I=I+1 + ELSE + E(I)=ZERO + ENDIF + I=I+1 + END DO +* +* Convert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO 22 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 22 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF (I .GT. 1) THEN + DO 23 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I+1,J) + A(I+1,J)=TEMP + 23 CONTINUE + ENDIF + I=I+1 + ENDIF + I=I+1 + END DO + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I,J) + A(I,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I-1 + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I+1,J) + A(I+1,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ENDIF + I=I-1 + END DO +* +* Revert VALUE +* + I=1 + DO WHILE ( I .LE. N-1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I+1,I)=E(I) + I=I+1 + ENDIF + I=I+1 + END DO + END IF + END IF + + RETURN +* +* End of CSYCONV +* + END diff --git a/dspl/liblapack/SRC/csyconvf.f b/dspl/liblapack/SRC/csyconvf.f new file mode 100644 index 0000000..77ecf46 --- /dev/null +++ b/dspl/liblapack/SRC/csyconvf.f @@ -0,0 +1,562 @@ +*> \brief \b CSYCONVF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCONVF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> CSYCONVF converts the factorization output format used in +*> CSYTRF provided on entry in parameter A into the factorization +*> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored +*> on exit in parameters A and E. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in CSYTRF into +*> the format used in CSYTRF_RK (or CSYTRF_BK). +*> +*> If parameter WAY = 'R': +*> CSYCONVF performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in CSYTRF_RK +*> (or CSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in CSYTRF that is stored +*> on exit in parameter A. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in CSYTRF_RK +*> (or CSYTRF_BK) into the format used in CSYTRF. +*> +*> CSYCONVF can also convert in Hermitian matrix case, i.e. between +*> formats used in CHETRF and CHETRF_RK (or CHETRF_BK). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> CSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> CSYTRF_RK or CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> CSYTRF_RK or CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> CSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> +*> 1) If WAY ='C': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in CSYTRF. +*> On exit, details of the interchanges and the block +*> structure of D in the format used in CSYTRF_RK +*> ( or CSYTRF_BK). +*> +*> 1) If WAY ='R': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in CSYTRF_RK +*> ( or CSYTRF_BK). +*> On exit, details of the interchanges and the block +*> structure of D in the format used in CSYTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE CSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL CSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCONVF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL CSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL CSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i-1 and IPIV(i-1), +* so this should be recorded in two consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I-1 ) +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where k increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL CSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL CSWAP( I-1, A( IP, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i+1 and IPIV(i+1), +* so this should be recorded in consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I+1 ) +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of CSYCONVF +* + END diff --git a/dspl/liblapack/SRC/csyconvf_rook.f b/dspl/liblapack/SRC/csyconvf_rook.f new file mode 100644 index 0000000..1146a97 --- /dev/null +++ b/dspl/liblapack/SRC/csyconvf_rook.f @@ -0,0 +1,547 @@ +*> \brief \b CSYCONVF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYCONVF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> CSYCONVF_ROOK converts the factorization output format used in +*> CSYTRF_ROOK provided on entry in parameter A into the factorization +*> output format used in CSYTRF_RK (or CSYTRF_BK) that is stored +*> on exit in parameters A and E. IPIV format for CSYTRF_ROOK and +*> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. +*> +*> If parameter WAY = 'R': +*> CSYCONVF_ROOK performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in CSYTRF_RK +*> (or CSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in CSYTRF_ROOK that is stored +*> on exit in parameter A. IPIV format for CSYTRF_ROOK and +*> CSYTRF_RK (or CSYTRF_BK) is the same and is not converted. +*> +*> CSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between +*> formats used in CHETRF_ROOK and CHETRF_RK (or CHETRF_BK). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> CSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> CSYTRF_RK or CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> CSYTRF_RK or CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> CSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On entry, details of the interchanges and the block +*> structure of D as determined: +*> 1) by CSYTRF_ROOK, if WAY ='C'; +*> 2) by CSYTRF_RK (or CSYTRF_BK), if WAY ='R'. +*> The IPIV format is the same for all these routines. +*> +*> On exit, is not changed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE CSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL CSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, IP2 +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYCONVF_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i-1 and IPIV(i-1) +* in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + IF( IP2.NE.(I-1) ) THEN + CALL CSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP2, I+1 ), LDA ) + END IF + END IF + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i-1) and i and IPIV(i) +* in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP2.NE.(I-1) ) THEN + CALL CSWAP( N-I, A( IP2, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL CSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i+1 and IPIV(i+1) +* in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + IF( IP2.NE.(I+1) ) THEN + CALL CSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP2, 1 ), LDA ) + END IF + END IF + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i+1) and i and IPIV(i) +* in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP2.NE.(I+1) ) THEN + CALL CSWAP( I-1, A( IP2, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL CSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of CSYCONVF_ROOK +* + END diff --git a/dspl/liblapack/SRC/csyequb.f b/dspl/liblapack/SRC/csyequb.f new file mode 100644 index 0000000..b1c5c6f --- /dev/null +++ b/dspl/liblapack/SRC/csyequb.f @@ -0,0 +1,343 @@ +*> \brief \b CSYEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), WORK( * ) +* REAL S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYEQUB computes row and column scalings intended to equilibrate a +*> symmetric matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The N-by-N symmetric matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE CSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), WORK( * ) + REAL S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + REAL AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP + COMPLEX ZDUM +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, INT, LOG, MAX, MIN, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'CSYEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0E0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0E0 + SUMSQ = 0.0E0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0E0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0E0 + DO I = N+1, 2*N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL CLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = CABS1( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = SLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = SLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/csymv.f b/dspl/liblapack/SRC/csymv.f new file mode 100644 index 0000000..5ae8c21 --- /dev/null +++ b/dspl/liblapack/SRC/csymv.f @@ -0,0 +1,343 @@ +*> \brief \b CSYMV computes a matrix-vector product for a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, INCY, LDA, N +* COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, N ) +*> Before entry, with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. +*> Before entry, with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, N ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + SUBROUTINE CSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, LDA, N + COMPLEX ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY + COMPLEX TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 5 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 7 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110 I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYMV +* + END diff --git a/dspl/liblapack/SRC/csyr.f b/dspl/liblapack/SRC/csyr.f new file mode 100644 index 0000000..9f435cd --- /dev/null +++ b/dspl/liblapack/SRC/csyr.f @@ -0,0 +1,268 @@ +*> \brief \b CSYR performs the symmetric rank-1 update of a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, LDA, N +* COMPLEX ALPHA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYR performs the symmetric rank 1 operation +*> +*> A := alpha*x*x**H + A, +*> +*> where alpha is a complex scalar, x is an n element vector and A is an +*> n by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, N ) +*> Before entry, with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. On exit, the +*> upper triangular part of the array A is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry, with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. On exit, the +*> lower triangular part of the array A is overwritten by the +*> lower triangular part of the updated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, N ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + SUBROUTINE CSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, LDA, N + COMPLEX ALPHA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, KX + COMPLEX TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 10 I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 50 I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70 I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of CSYR +* + END diff --git a/dspl/liblapack/SRC/csyrfs.f b/dspl/liblapack/SRC/csyrfs.f new file mode 100644 index 0000000..b00fde8 --- /dev/null +++ b/dspl/liblapack/SRC/csyrfs.f @@ -0,0 +1,446 @@ +*> \brief \b CSYRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by CSYTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACN2, CSYMV, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL CCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of CSYRFS +* + END diff --git a/dspl/liblapack/SRC/csyrfsx.f b/dspl/liblapack/SRC/csyrfsx.f new file mode 100644 index 0000000..7323ba8 --- /dev/null +++ b/dspl/liblapack/SRC/csyrfsx.f @@ -0,0 +1,703 @@ +*> \brief \b CSYRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYRFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYCON, CLA_SYRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL SLAMCH, CLANSY, CLA_SYRCOND_X, CLA_SYRCOND_C + REAL SLAMCH, CLANSY, CLA_SYRCOND_X, CLA_SYRCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF ( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = CLANSY( NORM, UPLO, N, A, LDA, RWORK ) + CALL CSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + CALL CLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = CLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = CLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = CLA_SYRCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ IPIV, X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( .NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of CSYRFSX +* + END diff --git a/dspl/liblapack/SRC/csysv.f b/dspl/liblapack/SRC/csysv.f new file mode 100644 index 0000000..827ac09 --- /dev/null +++ b/dspl/liblapack/SRC/csysv.f @@ -0,0 +1,270 @@ +*> \brief CSYSV computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by CSYTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> CSYTRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYsolve +* +* ===================================================================== + SUBROUTINE CSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYTRF, CSYTRS, CSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL CSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL CSYTRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV +* + END diff --git a/dspl/liblapack/SRC/csysv_aa.f b/dspl/liblapack/SRC/csysv_aa.f new file mode 100644 index 0000000..9cd669d --- /dev/null +++ b/dspl/liblapack/SRC/csysv_aa.f @@ -0,0 +1,254 @@ +*> \brief CSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(2*N, 3*N-2), and for +*> the best performance, LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for CSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYsolve +* +* ===================================================================== + SUBROUTINE CSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYTRF_AA, CSYTRS_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV_AA +* + END diff --git a/dspl/liblapack/SRC/csysv_aa_2stage.f b/dspl/liblapack/SRC/csysv_aa_2stage.f new file mode 100644 index 0000000..cba57fc --- /dev/null +++ b/dspl/liblapack/SRC/csysv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief CSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSYTRF_AA_2STAGE, + $ CSYTRS_AA_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of CSYSV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/csysv_rk.f b/dspl/liblapack/SRC/csysv_rk.f new file mode 100644 index 0000000..f2b2175 --- /dev/null +++ b/dspl/liblapack/SRC/csysv_rk.f @@ -0,0 +1,316 @@ +*> \brief CSYSV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYSV_RK computes the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CSYTRF_RK is called to compute the factorization of a complex +*> symmetric matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine CSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by CSYTRF_RK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of CSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine CSYTRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of CSYTRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by CSYTRF_RK. +*> +*> For more info see the description of CSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for CSYTRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYTRF_RK, CSYTRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL CSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL CSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV_RK +* + END diff --git a/dspl/liblapack/SRC/csysv_rook.f b/dspl/liblapack/SRC/csysv_rook.f new file mode 100644 index 0000000..e0ab726 --- /dev/null +++ b/dspl/liblapack/SRC/csysv_rook.f @@ -0,0 +1,293 @@ +*> \brief CSYSV_ROOK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSV_ROOK computes the solution to a complex system of linear +*> equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CSYTRF_ROOK is called to compute the factorization of a complex +*> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling CSYTRS_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by CSYTRF_ROOK. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> CSYTRF_ROOK. +*> +*> TRS will be done with Level 2 BLAS +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CSYTRF_ROOK, CSYTRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL CSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS_ROOK ( Use Level 2 BLAS) +* + CALL CSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSV_ROOK +* + END diff --git a/dspl/liblapack/SRC/csysvx.f b/dspl/liblapack/SRC/csysvx.f new file mode 100644 index 0000000..46f65f6 --- /dev/null +++ b/dspl/liblapack/SRC/csysvx.f @@ -0,0 +1,417 @@ +*> \brief CSYSVX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSVX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form +*> of A. A, AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by CSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by CSYTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,2*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,2*N,N*NB), where +*> NB is the optimal blocksize for CSYTRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexSYsolve +* +* ===================================================================== + SUBROUTINE CSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL CLANSY, SLAMCH + EXTERNAL ILAENV, LSAME, CLANSY, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CSYCON, CSYRFS, CSYTRF, CSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 2*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = CLANSY( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL CSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYSVX +* + END diff --git a/dspl/liblapack/SRC/csysvxx.f b/dspl/liblapack/SRC/csysvxx.f new file mode 100644 index 0000000..2fd2c87 --- /dev/null +++ b/dspl/liblapack/SRC/csysvxx.f @@ -0,0 +1,701 @@ +*> \brief CSYSVXX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSVXX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, where +*> A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. CSYSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> CSYSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> CSYSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what CSYSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by SSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by SSYTRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexSYsolve +* +* ===================================================================== + SUBROUTINE CSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, CLA_SYRPVGRW + LOGICAL LSAME + REAL SLAMCH, CLA_SYRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL CSYEQUB, CSYTRF, CSYTRS, CLACPY, + $ CLAQSY, XERBLA, CLASCL2, CSYRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in CSYRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until CSYRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME(UPLO, 'U') .AND. + $ .NOT.LSAME(UPLO, 'L') ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL CSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL CLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + + END IF +* +* Scale the right hand-side. +* + IF( RCEQU ) CALL CLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL CSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF ( N.GT.0 ) + $ RPVGRW = CLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF ( N.GT.0 ) + $ RPVGRW = CLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, RWORK ) +* +* Compute the solution matrix X. +* + CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL CSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL CSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL CLASCL2 (N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of CSYSVXX +* + END diff --git a/dspl/liblapack/SRC/csyswapr.f b/dspl/liblapack/SRC/csyswapr.f new file mode 100644 index 0000000..b8291da --- /dev/null +++ b/dspl/liblapack/SRC/csyswapr.f @@ -0,0 +1,193 @@ +*> \brief \b CSYSWAPR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYSWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYSWAPR applies an elementary permutation on the rows and the columns of +*> a symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +* ===================================================================== + SUBROUTINE CSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL CSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=A(I1+I,I2) + A(I1+I,I2)=TMP + END DO +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from I1 to I1-1 + CALL CSWAP ( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=A(I2,I1+I) + A(I2,I1+I)=TMP + END DO +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + END SUBROUTINE CSYSWAPR + diff --git a/dspl/liblapack/SRC/csytf2.f b/dspl/liblapack/SRC/csytf2.f new file mode 100644 index 0000000..370e57d --- /dev/null +++ b/dspl/liblapack/SRC/csytf2.f @@ -0,0 +1,611 @@ +*> \brief \b CSYTF2 computes the factorization of a real symmetric indefinite matrix, using the diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTF2 computes the factorization of a complex symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.209 and l.377 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +*> +*> 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + INTEGER ICAMAX + EXTERNAL LSAME, ICAMAX, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSWAP, CSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL, SQRT +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL CSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = CONE / A( K, K ) + CALL CSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = CONE / A( K, K ) + CALL CSYR( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL CSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE + RETURN +* +* End of CSYTF2 +* + END diff --git a/dspl/liblapack/SRC/csytf2_rk.f b/dspl/liblapack/SRC/csytf2_rk.f new file mode 100644 index 0000000..3b5e53a --- /dev/null +++ b/dspl/liblapack/SRC/csytf2_rk.f @@ -0,0 +1,952 @@ +*> \brief \b CSYTF2_RK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYTF2_RK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + REAL ABSAKK, ALPHA, COLMAX, ROWMAX, STEMP, SFMIN + COMPLEX D11, D12, D21, D22, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSWAP, CSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX )) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL CSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL CSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL CSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL CSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = CONE / A( K, K ) + CALL CSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = CZERO + A( K-1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX )) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL CSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL CSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL CSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL CSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL CSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = CONE / A( K, K ) + CALL CSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL CSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = CZERO + A( K+1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of CSYTF2_RK +* + END diff --git a/dspl/liblapack/SRC/csytf2_rook.f b/dspl/liblapack/SRC/csytf2_rook.f new file mode 100644 index 0000000..2a80375 --- /dev/null +++ b/dspl/liblapack/SRC/csytf2_rook.f @@ -0,0 +1,821 @@ +*> \brief \b CSYTF2_ROOK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTF2_ROOK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + REAL ABSAKK, ALPHA, COLMAX, ROWMAX, STEMP, SFMIN + COMPLEX D11, D12, D21, D22, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SLAMCH + EXTERNAL LSAME, ICAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSWAP, CSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT, AIMAG, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ICAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ICAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1(A( IMAX, IMAX )).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL CSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL CSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL CSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = CONE / A( K, K ) + CALL CSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL CSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = CABS1( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1(A( IMAX, IMAX )).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL CSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL CSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL CSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = CONE / A( K, K ) + CALL CSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL CSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL CSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of CSYTF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/csytrf.f b/dspl/liblapack/SRC/csytrf.f new file mode 100644 index 0000000..c389725 --- /dev/null +++ b/dspl/liblapack/SRC/csytrf.f @@ -0,0 +1,362 @@ +*> \brief \b CSYTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF computes the factorization of a complex symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLASYF, CSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CSYTRF +* + END diff --git a/dspl/liblapack/SRC/csytrf_aa.f b/dspl/liblapack/SRC/csytrf_aa.f new file mode 100644 index 0000000..2f185b0 --- /dev/null +++ b/dspl/liblapack/SRC/csytrf_aa.f @@ -0,0 +1,467 @@ +*> \brief \b CSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF_AA computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLASYF_AA, CGEMM, CGEMV, CSCAL, CSWAP, CCOPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL CCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with CGEMM +* + CALL CGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by CLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL CLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL CCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with CGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL CGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with CGEMM +* + CALL CGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of CSYTRF_AA +* + END diff --git a/dspl/liblapack/SRC/csytrf_aa_2stage.f b/dspl/liblapack/SRC/csytrf_aa_2stage.f new file mode 100644 index 0000000..0a6bfbe --- /dev/null +++ b/dspl/liblapack/SRC/csytrf_aa_2stage.f @@ -0,0 +1,668 @@ +*> \brief \b CSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF_AA_2STAGE computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGBTRF, CGEMM, CGETRF, CLACPY, + $ CLASET, CTRSM, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'CSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL CGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL CGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Upper', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + CALL CTRSM( 'L', 'U', 'T', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL CTRSM( 'R', 'U', 'N', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -CONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ CONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call CGETRF +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL CCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'U', 'N', 'U', KB, NB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL CLASET( 'Lower', KB, NB, CZERO, CONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL CGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL CGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL CLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Lower', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + CALL CTRSM( 'L', 'L', 'N', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL CTRSM( 'R', 'L', 'T', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Symmetrize T(J,J) +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL CGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL CGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -CONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL CGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL CLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL CLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL CTRSM( 'R', 'L', 'T', 'U', KB, NB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL CLASET( 'Upper', KB, NB, CZERO, CONE, + $ A( (J+1)*NB+1, J*NB+1 ), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL CSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL CSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL CSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL CSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL CLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of CSYTRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/csytrf_rk.f b/dspl/liblapack/SRC/csytrf_rk.f new file mode 100644 index 0000000..f7e3528 --- /dev/null +++ b/dspl/liblapack/SRC/csytrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b CSYTRF_RK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYTRF_RK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLASYF_RK, CSYTF2_RK, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CSYTRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLASYF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLASYF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CSYTF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL CSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLASYF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLASYF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CSYTF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL CSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CSYTRF_RK +* + END diff --git a/dspl/liblapack/SRC/csytrf_rook.f b/dspl/liblapack/SRC/csytrf_rook.f new file mode 100644 index 0000000..c6a8ae5 --- /dev/null +++ b/dspl/liblapack/SRC/csytrf_rook.f @@ -0,0 +1,393 @@ +*> \brief \b CSYTRF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRF_ROOK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLASYF_ROOK, CSYTF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'CSYTRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'CSYTRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by CLASYF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL CLASYF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL CSYTF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by CLASYF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL CLASYF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL CSYTF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of CSYTRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/csytri.f b/dspl/liblapack/SRC/csytri.f new file mode 100644 index 0000000..8f15e36 --- /dev/null +++ b/dspl/liblapack/SRC/csytri.f @@ -0,0 +1,383 @@ +*> \brief \b CSYTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRI computes the inverse of a complex symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> CSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTU + EXTERNAL LSAME, CDOTU +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSWAP, CSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ CDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ CDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of CSYTRI +* + END diff --git a/dspl/liblapack/SRC/csytri2.f b/dspl/liblapack/SRC/csytri2.f new file mode 100644 index 0000000..4c6baaa --- /dev/null +++ b/dspl/liblapack/SRC/csytri2.f @@ -0,0 +1,205 @@ +*> \brief \b CSYTRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRI2 computes the inverse of a COMPLEX symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> CSYTRF. CSYTRI2 sets the LEADING DIMENSION of the workspace +*> before calling CSYTRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LDWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LDWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CSYTRI, CSYTRI2X, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL CSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL CSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of CSYTRI2 +* + END diff --git a/dspl/liblapack/SRC/csytri2x.f b/dspl/liblapack/SRC/csytri2x.f new file mode 100644 index 0000000..151f063 --- /dev/null +++ b/dspl/liblapack/SRC/csytri2x.f @@ -0,0 +1,588 @@ +*> \brief \b CSYTRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRI2X computes the inverse of a real symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> CSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + COMPLEX AK, AKKP1, AKP1, D, T + COMPLEX U01_I_J, U01_IP1_J + COMPLEX U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSYCONV, XERBLA, CTRTRI + EXTERNAL CGEMM, CTRMM, CSYSWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL CSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K+1,1) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = -AKKP1 / D + K=K+2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**T*invD1*U11->U11 +* + CALL CTRMM('L','U','T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**T*invD*U01->A(CUT+I,CUT+J) +* + CALL CGEMM('T','N',NNB,NNB,CUT,ONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) +* +* U11 = U11**T*invD1*U11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T*invD0*U01 +* + CALL CTRMM('L',UPLO,'T','U',CUT, NNB, + $ ONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL CSYSWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K-1,1) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = -AKKP1 / D + K=K-2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GE. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**T*invD1*L11->L11 +* + CALL CTRMM('L',UPLO,'T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**T*invD2*L21->A(CUT+I,CUT+J) +* + CALL CGEMM('T','N',NNB,NNB,N-NNB-CUT,ONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**T*invD1*L11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T*invD2*L21 +* + CALL CTRMM('L',UPLO,'T','U', N-NNB-CUT, NNB, + $ ONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) + +* Update L21 + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + ELSE +* +* L11 = L11**T*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of CSYTRI2X +* + END + diff --git a/dspl/liblapack/SRC/csytri_3.f b/dspl/liblapack/SRC/csytri_3.f new file mode 100644 index 0000000..d618c26 --- /dev/null +++ b/dspl/liblapack/SRC/csytri_3.f @@ -0,0 +1,248 @@ +*> \brief \b CSYTRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYTRI_3 computes the inverse of a complex symmetric indefinite +*> matrix A using the factorization computed by CSYTRF_RK or CSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> CSYTRI_3 sets the leading dimension of the workspace before calling +*> CSYTRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by CSYTRF_RK and CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_RK or CSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CSYTRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'CSYTRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CSYTRI_3 +* + END diff --git a/dspl/liblapack/SRC/csytri_3x.f b/dspl/liblapack/SRC/csytri_3x.f new file mode 100644 index 0000000..5a81ee3 --- /dev/null +++ b/dspl/liblapack/SRC/csytri_3x.f @@ -0,0 +1,647 @@ +*> \brief \b CSYTRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYTRI_3X computes the inverse of a complex symmetric indefinite +*> matrix A using the factorization computed by CSYTRF_RK or CSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by CSYTRF_RK and CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_RK or CSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + COMPLEX AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, + $ U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CSYSWAPR, CTRTRI, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = CONE / A( K, K ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K+1, 1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = WORK( K, INVD+1 ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = CONE + DO J = 1, I-1 + WORK( U11+I, J ) = CZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**T * invD1 * U11 -> U11 +* + CALL CTRMM( 'L', 'U', 'T', 'U', NNB, NNB, + $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**T * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL CGEMM( 'T', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1), + $ N+NB+1 ) + +* +* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T * invD0 * U01 +* + CALL CTRMM( 'L', UPLO, 'T', 'U', CUT, NNB, + $ CONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**T) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. +* + CALL CTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = CONE / A( K, K ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K-1, 1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**T) = (inv(L))**T +* +* inv(L**T) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = CONE + DO J = I+1, NNB + WORK( U11+I, J ) = CZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**T * invD1 * L11 -> L11 +* + CALL CTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, CONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL CGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, CONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ CZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T * invD2 * L21 +* + CALL CTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, CONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**T * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**T) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL CSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of CSYTRI_3X +* + END + diff --git a/dspl/liblapack/SRC/csytri_rook.f b/dspl/liblapack/SRC/csytri_rook.f new file mode 100644 index 0000000..c375e81 --- /dev/null +++ b/dspl/liblapack/SRC/csytri_rook.f @@ -0,0 +1,451 @@ +*> \brief \b CSYTRI_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRI_ROOK computes the inverse of a complex symmetric +*> matrix A using the factorization A = U*D*U**T or A = L*D*L**T +*> computed by CSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF_ROOK. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX CDOTU + EXTERNAL LSAME, CDOTU +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CSWAP, CSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = CONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-CONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ CDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ CDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k+1,1:k+1) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1)in the leading submatrix A(1:k+1,1:k+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = CONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-CONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ CDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL CSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ CDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k-1:n,k-1:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of CSYTRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/csytrs.f b/dspl/liblapack/SRC/csytrs.f new file mode 100644 index 0000000..3ab72b8 --- /dev/null +++ b/dspl/liblapack/SRC/csytrs.f @@ -0,0 +1,445 @@ +*> \brief \b CSYTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by CSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CSYTRS +* + END diff --git a/dspl/liblapack/SRC/csytrs2.f b/dspl/liblapack/SRC/csytrs2.f new file mode 100644 index 0000000..1002b54 --- /dev/null +++ b/dspl/liblapack/SRC/csytrs2.f @@ -0,0 +1,361 @@ +*> \brief \b CSYTRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS2 solves a system of linear equations A*X = B with a COMPLEX +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by CSYTRF and converted by CSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF. +*> Note that A is input / output. This might be counter-intuitive, +*> and one may think that A is input only. A is input / output. This +*> is because, at the start of the subroutine, we permute A in a +*> "better" form and then we permute A back to its original form at +*> the end. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0E+0,0.0E+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSYCONV, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL CSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + CALL CSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL CTRSM('L','U','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + CALL CSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL CTRSM('L','L','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL CSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of CSYTRS2 +* + END diff --git a/dspl/liblapack/SRC/csytrs_3.f b/dspl/liblapack/SRC/csytrs_3.f new file mode 100644 index 0000000..8008c5d --- /dev/null +++ b/dspl/liblapack/SRC/csytrs_3.f @@ -0,0 +1,371 @@ +*> \brief \b CSYTRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> CSYTRS_3 solves a system of linear equations A * X = B with a complex +*> symmetric matrix A using the factorization computed +*> by CSYTRF_RK or CSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by CSYTRF_RK and CSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_RK or CSYTRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0,0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + CALL CSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + CALL CSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL CTRSM('L', 'L', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of CSYTRS_3 +* + END diff --git a/dspl/liblapack/SRC/csytrs_aa.f b/dspl/liblapack/SRC/csytrs_aa.f new file mode 100644 index 0000000..7cf9504 --- /dev/null +++ b/dspl/liblapack/SRC/csytrs_aa.f @@ -0,0 +1,285 @@ +*> \brief \b CSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS_AA solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Details of factors computed by CSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by CSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CGTSV, CSWAP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL CTRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A( 1, 1 ), LDA+1, WORK( N ), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1 ) + CALL CLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL CGTSV( N, NRHS, WORK( 1 ), WORK( N ), WORK( 2*N ), B, LDB, + $ INFO ) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1 ) + CALL CLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL CGTSV( N, NRHS, WORK( 1 ), WORK(N), WORK( 2*N ), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of CSYTRS_AA +* + END diff --git a/dspl/liblapack/SRC/csytrs_aa_2stage.f b/dspl/liblapack/SRC/csytrs_aa_2stage.f new file mode 100644 index 0000000..03bccda --- /dev/null +++ b/dspl/liblapack/SRC/csytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b CSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by CSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> Details of factors computed by CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> Details of factors computed by CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> CSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGBTRS, CLASWP, CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL CTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL CTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL CTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL CGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL CTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL CLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of CSYTRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/csytrs_rook.f b/dspl/liblapack/SRC/csytrs_rook.f new file mode 100644 index 0000000..3cfe45a --- /dev/null +++ b/dspl/liblapack/SRC/csytrs_rook.f @@ -0,0 +1,484 @@ +*> \brief \b CSYTRS_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CSYTRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CSYTRS_ROOK solves a system of linear equations A*X = B with +*> a complex symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by CSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE CSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CGERU, CSCAL, CSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CSYTRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL CGERU( K-1, NRHS, -CONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, CONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.GT.2 ) THEN + CALL CGERU( K-2, NRHS,-CONE, A( 1, K ), 1, B( K, 1 ), + $ LDB, B( 1, 1 ), LDB ) + CALL CGERU( K-2, NRHS,-CONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - CONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) + $ CALL CGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K ), 1, CONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL CGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K ), 1, CONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K+1 ), 1, CONE, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGERU( N-K, NRHS, -CONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL CSCAL( NRHS, CONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL CGERU( N-K-1, NRHS,-CONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL CGERU( N-K-1, NRHS,-CONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - CONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL CGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, CONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL CGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, CONE, B( K, 1 ), LDB ) + CALL CGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, CONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of CSYTRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/ctbcon.f b/dspl/liblapack/SRC/ctbcon.f new file mode 100644 index 0000000..a11a55a --- /dev/null +++ b/dspl/liblapack/SRC/ctbcon.f @@ -0,0 +1,291 @@ +*> \brief \b CTBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, KD, LDAB, N +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTBCON estimates the reciprocal of the condition number of a +*> triangular band matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTB, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTB, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATBS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( N, 1 ) ) +* +* Compute the 1-norm of the triangular matrix A or A**H. +* + ANORM = CLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the 1-norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL CLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTBCON +* + END diff --git a/dspl/liblapack/SRC/ctbrfs.f b/dspl/liblapack/SRC/ctbrfs.f new file mode 100644 index 0000000..c6cb9f9 --- /dev/null +++ b/dspl/liblapack/SRC/ctbrfs.f @@ -0,0 +1,497 @@ +*> \brief \b CTBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AB( LDAB, * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTBRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular band +*> coefficient matrix. +*> +*> The solution matrix X must be computed by CTBTRS or some other +*> means before entering this routine. CTBRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AB( LDAB, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACN2, CTBMV, CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, MIN, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTBRFS +* + END diff --git a/dspl/liblapack/SRC/ctbtrs.f b/dspl/liblapack/SRC/ctbtrs.f new file mode 100644 index 0000000..3ca70c6 --- /dev/null +++ b/dspl/liblapack/SRC/ctbtrs.f @@ -0,0 +1,244 @@ +*> \brief \b CTBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTBTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular band matrix of order N, and B is an +*> N-by-NRHS matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B, A**T * X = B, or A**H * X = B. +* + DO 30 J = 1, NRHS + CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of CTBTRS +* + END diff --git a/dspl/liblapack/SRC/ctfsm.f b/dspl/liblapack/SRC/ctfsm.f new file mode 100644 index 0000000..b4b26dd --- /dev/null +++ b/dspl/liblapack/SRC/ctfsm.f @@ -0,0 +1,1026 @@ +*> \brief \b CTFSM solves a matrix equation (one operand is a triangular matrix in RFP format). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTFSM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO +* INTEGER LDB, M, N +* COMPLEX ALPHA +* .. +* .. Array Arguments .. +* COMPLEX A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for A in RFP Format. +*> +*> CTFSM solves the matrix equation +*> +*> op( A )*X = alpha*B or X*op( A ) = alpha*B +*> +*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or +*> non-unit, upper or lower triangular matrix and op( A ) is one of +*> +*> op( A ) = A or op( A ) = A**H. +*> +*> A is in Rectangular Full Packed (RFP) Format. +*> +*> The matrix X is overwritten on B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'C': The Conjugate-transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether op( A ) appears on the left +*> or right of X as follows: +*> +*> SIDE = 'L' or 'l' op( A )*X = alpha*B. +*> +*> SIDE = 'R' or 'r' X*op( A ) = alpha*B. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> UPLO = 'U' or 'u' RFP A came from an upper triangular matrix +*> UPLO = 'L' or 'l' RFP A came from a lower triangular matrix +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the form of op( A ) to be used +*> in the matrix multiplication as follows: +*> +*> TRANS = 'N' or 'n' op( A ) = A. +*> +*> TRANS = 'C' or 'c' op( A ) = conjg( A' ). +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not RFP A is unit +*> triangular as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of B. M must be at +*> least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of B. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX +*> On entry, ALPHA specifies the scalar alpha. When alpha is +*> zero then A is not referenced and B need not be set before +*> entry. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (N*(N+1)/2) +*> NT = N*(N+1)/2. On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A as +*> defined when TRANSR = 'N'. The contents of RFP A are defined +*> by UPLO as follows: If UPLO = 'U' the RFP A contains the NT +*> elements of upper packed A either in normal or +*> conjugate-transpose Format. If UPLO = 'L' the RFP A contains +*> the NT elements of lower packed A either in normal or +*> conjugate-transpose Format. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'C'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and is N when is odd. +*> See the Note below for more details. Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> Before entry, the leading m by n part of the array B must +*> contain the right-hand side matrix B, and on exit is +*> overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, + $ B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO + INTEGER LDB, M, N + COMPLEX ALPHA +* .. +* .. Array Arguments .. + COMPLEX A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* ===================================================================== +* .. +* .. Parameters .. + COMPLEX CONE, CZERO + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ), + $ CZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LSIDE, MISODD, NISODD, NORMALTRANSR, + $ NOTRANS + INTEGER M1, M2, N1, N2, K, INFO, I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CGEMM, CTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LSIDE = LSAME( SIDE, 'L' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSIDE .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -4 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTFSM ', -INFO ) + RETURN + END IF +* +* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* +* Quick return when ALPHA.EQ.(0E+0,0E+0) +* + IF( ALPHA.EQ.CZERO ) THEN + DO 20 J = 0, N - 1 + DO 10 I = 0, M - 1 + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* + IF( LSIDE ) THEN +* +* SIDE = 'L' +* +* A is M-by-M. +* If M is odd, set NISODD = .TRUE., and M1 and M2. +* If M is even, NISODD = .FALSE., and M. +* + IF( MOD( M, 2 ).EQ.0 ) THEN + MISODD = .FALSE. + K = M / 2 + ELSE + MISODD = .TRUE. + IF( LOWER ) THEN + M2 = M / 2 + M1 = M - M2 + ELSE + M1 = M / 2 + M2 = M - M1 + END IF + END IF +* + IF( MISODD ) THEN +* +* SIDE = 'L' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL CTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A, M, B, LDB ) + ELSE + CALL CTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + CALL CGEMM( 'N', 'N', M2, N, M1, -CONE, A( M1 ), + $ M, B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL CTRSM( 'L', 'U', 'C', DIAG, M2, N, CONE, + $ A( M ), M, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'C' +* + IF( M.EQ.1 ) THEN + CALL CTRSM( 'L', 'L', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + ELSE + CALL CTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M ), M, B( M1, 0 ), LDB ) + CALL CGEMM( 'C', 'N', M1, N, M2, -CONE, A( M1 ), + $ M, B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'L', 'C', DIAG, M1, N, CONE, + $ A( 0 ), M, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL CTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( M2 ), M, B, LDB ) + CALL CGEMM( 'C', 'N', M2, N, M1, -CONE, A( 0 ), M, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL CTRSM( 'L', 'U', 'C', DIAG, M2, N, CONE, + $ A( M1 ), M, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'C' +* + CALL CTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M1 ), M, B( M1, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M1, N, M2, -CONE, A( 0 ), M, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'L', 'C', DIAG, M1, N, CONE, + $ A( M2 ), M, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL CTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL CTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + CALL CGEMM( 'C', 'N', M2, N, M1, -CONE, + $ A( M1*M1 ), M1, B, LDB, ALPHA, + $ B( M1, 0 ), LDB ) + CALL CTRSM( 'L', 'L', 'N', DIAG, M2, N, CONE, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'C' +* + IF( M.EQ.1 ) THEN + CALL CTRSM( 'L', 'U', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL CTRSM( 'L', 'L', 'C', DIAG, M2, N, ALPHA, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M1, N, M2, -CONE, + $ A( M1*M1 ), M1, B( M1, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL CTRSM( 'L', 'U', 'N', DIAG, M1, N, CONE, + $ A( 0 ), M1, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'N' +* + CALL CTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( M2*M2 ), M2, B, LDB ) + CALL CGEMM( 'N', 'N', M2, N, M1, -CONE, A( 0 ), M2, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL CTRSM( 'L', 'L', 'N', DIAG, M2, N, CONE, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'C' +* + CALL CTRSM( 'L', 'L', 'C', DIAG, M2, N, ALPHA, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) + CALL CGEMM( 'C', 'N', M1, N, M2, -CONE, A( 0 ), M2, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'U', 'N', DIAG, M1, N, CONE, + $ A( M2*M2 ), M2, B, LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL CTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( 1 ), M+1, B, LDB ) + CALL CGEMM( 'N', 'N', K, N, K, -CONE, A( K+1 ), + $ M+1, B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL CTRSM( 'L', 'U', 'C', DIAG, K, N, CONE, + $ A( 0 ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'C' +* + CALL CTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( 0 ), M+1, B( K, 0 ), LDB ) + CALL CGEMM( 'C', 'N', K, N, K, -CONE, A( K+1 ), + $ M+1, B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'L', 'C', DIAG, K, N, CONE, + $ A( 1 ), M+1, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL CTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( K+1 ), M+1, B, LDB ) + CALL CGEMM( 'C', 'N', K, N, K, -CONE, A( 0 ), M+1, + $ B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL CTRSM( 'L', 'U', 'C', DIAG, K, N, CONE, + $ A( K ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'C' + CALL CTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( K ), M+1, B( K, 0 ), LDB ) + CALL CGEMM( 'N', 'N', K, N, K, -CONE, A( 0 ), M+1, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'L', 'C', DIAG, K, N, CONE, + $ A( K+1 ), M+1, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'N' +* + CALL CTRSM( 'L', 'U', 'C', DIAG, K, N, ALPHA, + $ A( K ), K, B, LDB ) + CALL CGEMM( 'C', 'N', K, N, K, -CONE, + $ A( K*( K+1 ) ), K, B, LDB, ALPHA, + $ B( K, 0 ), LDB ) + CALL CTRSM( 'L', 'L', 'N', DIAG, K, N, CONE, + $ A( 0 ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'C' +* + CALL CTRSM( 'L', 'L', 'C', DIAG, K, N, ALPHA, + $ A( 0 ), K, B( K, 0 ), LDB ) + CALL CGEMM( 'N', 'N', K, N, K, -CONE, + $ A( K*( K+1 ) ), K, B( K, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL CTRSM( 'L', 'U', 'N', DIAG, K, N, CONE, + $ A( K ), K, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'N' +* + CALL CTRSM( 'L', 'U', 'C', DIAG, K, N, ALPHA, + $ A( K*( K+1 ) ), K, B, LDB ) + CALL CGEMM( 'N', 'N', K, N, K, -CONE, A( 0 ), K, B, + $ LDB, ALPHA, B( K, 0 ), LDB ) + CALL CTRSM( 'L', 'L', 'N', DIAG, K, N, CONE, + $ A( K*K ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'C' +* + CALL CTRSM( 'L', 'L', 'C', DIAG, K, N, ALPHA, + $ A( K*K ), K, B( K, 0 ), LDB ) + CALL CGEMM( 'C', 'N', K, N, K, -CONE, A( 0 ), K, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL CTRSM( 'L', 'U', 'N', DIAG, K, N, CONE, + $ A( K*( K+1 ) ), K, B, LDB ) +* + END IF +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' +* +* A is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and K. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + K = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* SIDE = 'R' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + CALL CTRSM( 'R', 'U', 'C', DIAG, M, N2, ALPHA, + $ A( N ), N, B( 0, N1 ), LDB ) + CALL CGEMM( 'N', 'N', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, 0 ), + $ LDB ) + CALL CTRSM( 'R', 'L', 'N', DIAG, M, N1, CONE, + $ A( 0 ), N, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'C' +* + CALL CTRSM( 'R', 'L', 'C', DIAG, M, N1, ALPHA, + $ A( 0 ), N, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'C', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'N', DIAG, M, N2, CONE, + $ A( N ), N, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL CTRSM( 'R', 'L', 'C', DIAG, M, N1, ALPHA, + $ A( N2 ), N, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'N', DIAG, M, N2, CONE, + $ A( N1 ), N, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'C' +* + CALL CTRSM( 'R', 'U', 'C', DIAG, M, N2, ALPHA, + $ A( N1 ), N, B( 0, N1 ), LDB ) + CALL CGEMM( 'N', 'C', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, 0 ), LDB ) + CALL CTRSM( 'R', 'L', 'N', DIAG, M, N1, CONE, + $ A( N2 ), N, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'N' +* + CALL CTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( 1 ), N1, B( 0, N1 ), LDB ) + CALL CGEMM( 'N', 'C', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'C', DIAG, M, N1, CONE, + $ A( 0 ), N1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'C' +* + CALL CTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( 0 ), N1, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, N1 ), + $ LDB ) + CALL CTRSM( 'R', 'L', 'C', DIAG, M, N2, CONE, + $ A( 1 ), N1, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'N' +* + CALL CTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'C', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, N1 ), + $ LDB ) + CALL CTRSM( 'R', 'L', 'C', DIAG, M, N2, CONE, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'C' +* + CALL CTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) + CALL CGEMM( 'N', 'N', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, 0 ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'C', DIAG, M, N1, CONE, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL CTRSM( 'R', 'U', 'C', DIAG, M, K, ALPHA, + $ A( 0 ), N+1, B( 0, K ), LDB ) + CALL CGEMM( 'N', 'N', M, K, K, -CONE, B( 0, K ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL CTRSM( 'R', 'L', 'N', DIAG, M, K, CONE, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'C' +* + CALL CTRSM( 'R', 'L', 'C', DIAG, M, K, ALPHA, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'C', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'N', DIAG, M, K, CONE, + $ A( 0 ), N+1, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL CTRSM( 'R', 'L', 'C', DIAG, M, K, ALPHA, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL CTRSM( 'R', 'U', 'N', DIAG, M, K, CONE, + $ A( K ), N+1, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'C' +* + CALL CTRSM( 'R', 'U', 'C', DIAG, M, K, ALPHA, + $ A( K ), N+1, B( 0, K ), LDB ) + CALL CGEMM( 'N', 'C', M, K, K, -CONE, B( 0, K ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL CTRSM( 'R', 'L', 'N', DIAG, M, K, CONE, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'N' +* + CALL CTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( 0 ), K, B( 0, K ), LDB ) + CALL CGEMM( 'N', 'C', M, K, K, -CONE, B( 0, K ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, 0 ), LDB ) + CALL CTRSM( 'R', 'U', 'C', DIAG, M, K, CONE, + $ A( K ), K, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'C' +* + CALL CTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( K ), K, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'N', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, K ), LDB ) + CALL CTRSM( 'R', 'L', 'C', DIAG, M, K, CONE, + $ A( 0 ), K, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'N' +* + CALL CTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) + CALL CGEMM( 'N', 'C', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), K, ALPHA, B( 0, K ), LDB ) + CALL CTRSM( 'R', 'L', 'C', DIAG, M, K, CONE, + $ A( K*K ), K, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'C' +* + CALL CTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( K*K ), K, B( 0, K ), LDB ) + CALL CGEMM( 'N', 'N', M, K, K, -CONE, B( 0, K ), + $ LDB, A( 0 ), K, ALPHA, B( 0, 0 ), LDB ) + CALL CTRSM( 'R', 'U', 'C', DIAG, M, K, CONE, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + END IF + END IF +* + RETURN +* +* End of CTFSM +* + END diff --git a/dspl/liblapack/SRC/ctftri.f b/dspl/liblapack/SRC/ctftri.f new file mode 100644 index 0000000..2e51a3d --- /dev/null +++ b/dspl/liblapack/SRC/ctftri.f @@ -0,0 +1,492 @@ +*> \brief \b CTFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO, DIAG +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTFTRI computes the inverse of a triangular matrix A stored in RFP +*> format. +*> +*> This is a Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension ( N*(N+1)/2 ); +*> On entry, the triangular matrix A in RFP format. RFP format +*> is described by TRANSR, UPLO, and N as follows: If TRANSR = +*> 'N' then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A; If UPLO = 'L' the RFP A contains the nt +*> elements of lower packed A. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'C'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and N is odd. See the Note below for more details. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO, DIAG + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CONE + PARAMETER ( CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CTRMM, CTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL CTRTRI( 'L', DIAG, N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'L', 'N', DIAG, N2, N1, -CONE, A( 0 ), + $ N, A( N1 ), N ) + CALL CTRTRI( 'U', DIAG, N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'U', 'C', DIAG, N2, N1, CONE, A( N ), N, + $ A( N1 ), N ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL CTRTRI( 'L', DIAG, N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'L', 'C', DIAG, N1, N2, -CONE, A( N2 ), + $ N, A( 0 ), N ) + CALL CTRTRI( 'U', DIAG, N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'U', 'N', DIAG, N1, N2, CONE, A( N1 ), + $ N, A( 0 ), N ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) +* + CALL CTRTRI( 'U', DIAG, N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'U', 'N', DIAG, N1, N2, -CONE, A( 0 ), + $ N1, A( N1*N1 ), N1 ) + CALL CTRTRI( 'L', DIAG, N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'L', 'C', DIAG, N1, N2, CONE, A( 1 ), + $ N1, A( N1*N1 ), N1 ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) +* + CALL CTRTRI( 'U', DIAG, N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'U', 'C', DIAG, N2, N1, -CONE, + $ A( N2*N2 ), N2, A( 0 ), N2 ) + CALL CTRTRI( 'L', DIAG, N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'L', 'N', DIAG, N2, N1, CONE, + $ A( N1*N2 ), N2, A( 0 ), N2 ) + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL CTRTRI( 'L', DIAG, K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'L', 'N', DIAG, K, K, -CONE, A( 1 ), + $ N+1, A( K+1 ), N+1 ) + CALL CTRTRI( 'U', DIAG, K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'U', 'C', DIAG, K, K, CONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL CTRTRI( 'L', DIAG, K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'L', 'C', DIAG, K, K, -CONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL CTRTRI( 'U', DIAG, K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'U', 'N', DIAG, K, K, CONE, A( K ), N+1, + $ A( 0 ), N+1 ) + END IF + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL CTRTRI( 'U', DIAG, K, A( K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'U', 'N', DIAG, K, K, -CONE, A( K ), K, + $ A( K*( K+1 ) ), K ) + CALL CTRTRI( 'L', DIAG, K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'L', 'C', DIAG, K, K, CONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL CTRTRI( 'U', DIAG, K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'R', 'U', 'C', DIAG, K, K, -CONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL CTRTRI( 'L', DIAG, K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL CTRMM( 'L', 'L', 'N', DIAG, K, K, CONE, A( K*K ), K, + $ A( 0 ), K ) + END IF + END IF + END IF +* + RETURN +* +* End of CTFTRI +* + END diff --git a/dspl/liblapack/SRC/ctfttp.f b/dspl/liblapack/SRC/ctfttp.f new file mode 100644 index 0000000..6cd47e1 --- /dev/null +++ b/dspl/liblapack/SRC/ctfttp.f @@ -0,0 +1,543 @@ +*> \brief \b CTFTTP copies a triangular matrix from the rectangular full packed format (TF) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTFTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX AP( 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTFTTP copies a triangular matrix A from rectangular full packed +*> format (TF) to standard packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'C': ARF is in Conjugate-transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Intrinsic Functions .. +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTFTTP', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + AP( 0 ) = ARF( 0 ) + ELSE + AP( 0 ) = CONJG( ARF( 0 ) ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + AP( IJP ) = CONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CTFTTP +* + END diff --git a/dspl/liblapack/SRC/ctfttr.f b/dspl/liblapack/SRC/ctfttr.f new file mode 100644 index 0000000..3720b69 --- /dev/null +++ b/dspl/liblapack/SRC/ctfttr.f @@ -0,0 +1,538 @@ +*> \brief \b CTFTTR copies a triangular matrix from the rectangular full packed format (TF) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTFTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTFTTR copies a triangular matrix A from rectangular full packed +*> format (TF) to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'C': ARF is in Conjugate-transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT, NX2, NP1X2 + INTEGER I, J, L, IJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTFTTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + A( 0, 0 ) = ARF( 0 ) + ELSE + A( 0, 0 ) = CONJG( ARF( 0 ) ) + END IF + END IF + RETURN + END IF +* +* Size of array ARF(1:2,0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda=n +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + A( N2+J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0); lda=n +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + A( J-N1, L ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> A(0+0) , T2 -> A(1+0) , S -> A(0+n1*n1); lda=n1 +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + A( I, N1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> A(n2*n2), T2 -> A(n1*n2), S -> A(0); lda = n2 +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + A( N2+J, L ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1); lda=n+1 +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + A( K+J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0); lda=n+1 +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + A( J-K, L ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,1) , T2 -> A(0,0) , S -> A(0,k+1) : +* T1 -> A(0+k) , T2 -> A(0+0) , S -> A(0+k*(k+1)); lda=k +* + IJ = 0 + J = K + DO I = K, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + A( I, K+1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,k+1) , T2 -> A(0,k) , S -> A(0,0) +* T1 -> A(0+k*(k+1)) , T2 -> A(0+k*k) , S -> A(0+0)); lda=k +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + A( J, I ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + A( K+1+J, L ) = CONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* +* Note that here J = K-1 +* + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CTFTTR +* + END diff --git a/dspl/liblapack/SRC/ctgevc.f b/dspl/liblapack/SRC/ctgevc.f new file mode 100644 index 0000000..342d562 --- /dev/null +++ b/dspl/liblapack/SRC/ctgevc.f @@ -0,0 +1,737 @@ +*> \brief \b CTGEVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGEVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, +* LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL RWORK( * ) +* COMPLEX P( LDP, * ), S( LDS, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGEVC computes some or all of the right and/or left eigenvectors of +*> a pair of complex matrices (S,P), where S and P are upper triangular. +*> Matrix pairs of this type are produced by the generalized Schur +*> factorization of a complex matrix pair (A,B): +*> +*> A = Q*S*Z**H, B = Q*P*Z**H +*> +*> as computed by CGGHRD + CHGEQZ. +*> +*> The right eigenvector x and the left eigenvector y of (S,P) +*> corresponding to an eigenvalue w are defined by: +*> +*> S*x = w*P*x, (y**H)*S = w*(y**H)*P, +*> +*> where y**H denotes the conjugate tranpose of y. +*> The eigenvalues are not input to this routine, but are computed +*> directly from the diagonal elements of S and P. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of (S,P), or the products Z*X and/or Q*Y, +*> where Z and Q are input matrices. +*> If Q and Z are the unitary factors from the generalized Schur +*> factorization of a matrix pair (A,B), then Z*X and Q*Y +*> are the matrices of right and left eigenvectors of (A,B). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> specified by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY='S', SELECT specifies the eigenvectors to be +*> computed. The eigenvector corresponding to the j-th +*> eigenvalue is computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices S and P. N >= 0. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX array, dimension (LDS,N) +*> The upper triangular matrix S from a generalized Schur +*> factorization, as computed by CHGEQZ. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of array S. LDS >= max(1,N). +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is COMPLEX array, dimension (LDP,N) +*> The upper triangular matrix P from a generalized Schur +*> factorization, as computed by CHGEQZ. P must have real +*> diagonal elements. +*> \endverbatim +*> +*> \param[in] LDP +*> \verbatim +*> LDP is INTEGER +*> The leading dimension of array P. LDP >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q +*> of left Schur vectors returned by CHGEQZ). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VL, in the same order as their eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'l' or 'B' or 'b', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Z +*> of right Schur vectors returned by CHGEQZ). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Z*X; +*> if HOWMNY = 'S', the right eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VR, in the same order as their eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected eigenvector occupies one column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexGEcomputational +* +* ===================================================================== + SUBROUTINE CTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX P( LDP, * ), S( LDS, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP, + $ LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC, + $ J, JE, JR + REAL ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG, + $ BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA, + $ SCALE, SMALL, TEMP, ULP, XMAX + COMPLEX BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + COMPLEX CLADIV + EXTERNAL LSAME, SLAMCH, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Statement Functions .. + REAL ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDS.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDP.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors +* + IF( .NOT.ILALL ) THEN + IM = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ IM = IM + 1 + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check diagonal of B +* + ILBBAD = .FALSE. + DO 20 J = 1, N + IF( AIMAG( P( J, J ) ).NE.ZERO ) + $ ILBBAD = .TRUE. + 20 CONTINUE +* + IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = SLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL SLABAD( SAFMIN, BIG ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part of A and B to check for possible overflow in the triangular +* solver. +* + ANORM = ABS1( S( 1, 1 ) ) + BNORM = ABS1( P( 1, 1 ) ) + RWORK( 1 ) = ZERO + RWORK( N+1 ) = ZERO + DO 40 J = 2, N + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + DO 30 I = 1, J - 1 + RWORK( J ) = RWORK( J ) + ABS1( S( I, J ) ) + RWORK( N+J ) = RWORK( N+J ) + ABS1( P( I, J ) ) + 30 CONTINUE + ANORM = MAX( ANORM, RWORK( J )+ABS1( S( J, J ) ) ) + BNORM = MAX( BNORM, RWORK( N+J )+ABS1( P( J, J ) ) ) + 40 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + DO 140 JE = 1, N + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG + 1 +* + IF( ABS1( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( REAL( P( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 50 JR = 1, N + VL( JR, IEIG ) = CZERO + 50 CONTINUE + VL( IEIG, IEIG ) = CONE + GO TO 140 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* H +* y ( a A - b B ) = 0 +* + TEMP = ONE / MAX( ABS1( S( JE, JE ) )*ASCALE, + $ ABS( REAL( P( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*REAL( P( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 60 JR = 1, N + WORK( JR ) = CZERO + 60 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* H +* Triangular solve of (a A - b B) y = 0 +* +* H +* (rowwise in (a A - b B) , or columnwise in a A - b B) +* + DO 100 J = JE + 1, N +* +* Compute +* j-1 +* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) +* k=je +* (Scale if necessary) +* + TEMP = ONE / XMAX + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM* + $ TEMP ) THEN + DO 70 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 70 CONTINUE + XMAX = ONE + END IF + SUMA = CZERO + SUMB = CZERO +* + DO 80 JR = JE, J - 1 + SUMA = SUMA + CONJG( S( JR, J ) )*WORK( JR ) + SUMB = SUMB + CONJG( P( JR, J ) )*WORK( JR ) + 80 CONTINUE + SUM = ACOEFF*SUMA - CONJG( BCOEFF )*SUMB +* +* Form x(j) = - SUM / conjg( a*S(j,j) - b*P(j,j) ) +* +* with scaling and perturbation of the denominator +* + D = CONJG( ACOEFF*S( J, J )-BCOEFF*P( J, J ) ) + IF( ABS1( D ).LE.DMIN ) + $ D = CMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( SUM ) + DO 90 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 90 CONTINUE + XMAX = TEMP*XMAX + SUM = TEMP*SUM + END IF + END IF + WORK( J ) = CLADIV( -SUM, D ) + XMAX = MAX( XMAX, ABS1( WORK( J ) ) ) + 100 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL CGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL, + $ WORK( JE ), 1, CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IBEG = 1 + ELSE + ISRC = 1 + IBEG = JE + END IF +* +* Copy and scale eigenvector into column of VL +* + XMAX = ZERO + DO 110 JR = IBEG, N + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 110 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 120 JR = IBEG, N + VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 120 CONTINUE + ELSE + IBEG = N + 1 + END IF +* + DO 130 JR = 1, IBEG - 1 + VL( JR, IEIG ) = CZERO + 130 CONTINUE +* + END IF + 140 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + DO 250 JE = N, 1, -1 + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG - 1 +* + IF( ABS1( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( REAL( P( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 150 JR = 1, N + VR( JR, IEIG ) = CZERO + 150 CONTINUE + VR( IEIG, IEIG ) = CONE + GO TO 250 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* +* ( a A - b B ) x = 0 +* + TEMP = ONE / MAX( ABS1( S( JE, JE ) )*ASCALE, + $ ABS( REAL( P( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*REAL( P( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 160 JR = 1, N + WORK( JR ) = CZERO + 160 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Triangular solve of (a A - b B) x = 0 (columnwise) +* +* WORK(1:j-1) contains sums w, +* WORK(j+1:JE) contains x +* + DO 170 JR = 1, JE - 1 + WORK( JR ) = ACOEFF*S( JR, JE ) - BCOEFF*P( JR, JE ) + 170 CONTINUE + WORK( JE ) = CONE +* + DO 210 J = JE - 1, 1, -1 +* +* Form x(j) := - w(j) / d +* with scaling and perturbation of the denominator +* + D = ACOEFF*S( J, J ) - BCOEFF*P( J, J ) + IF( ABS1( D ).LE.DMIN ) + $ D = CMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + DO 180 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 180 CONTINUE + END IF + END IF +* + WORK( J ) = CLADIV( -WORK( J ), D ) +* + IF( J.GT.1 ) THEN +* +* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling +* + IF( ABS1( WORK( J ) ).GT.ONE ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE. + $ BIGNUM*TEMP ) THEN + DO 190 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 190 CONTINUE + END IF + END IF +* + CA = ACOEFF*WORK( J ) + CB = BCOEFF*WORK( J ) + DO 200 JR = 1, J - 1 + WORK( JR ) = WORK( JR ) + CA*S( JR, J ) - + $ CB*P( JR, J ) + 200 CONTINUE + END IF + 210 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL CGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1, + $ CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IEND = N + ELSE + ISRC = 1 + IEND = JE + END IF +* +* Copy and scale eigenvector into column of VR +* + XMAX = ZERO + DO 220 JR = 1, IEND + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 220 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 230 JR = 1, IEND + VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 230 CONTINUE + ELSE + IEND = 0 + END IF +* + DO 240 JR = IEND + 1, N + VR( JR, IEIG ) = CZERO + 240 CONTINUE +* + END IF + 250 CONTINUE + END IF +* + RETURN +* +* End of CTGEVC +* + END diff --git a/dspl/liblapack/SRC/ctgex2.f b/dspl/liblapack/SRC/ctgex2.f new file mode 100644 index 0000000..7ac1784 --- /dev/null +++ b/dspl/liblapack/SRC/ctgex2.f @@ -0,0 +1,363 @@ +*> \brief \b CTGEX2 swaps adjacent diagonal blocks in an upper (quasi) triangular matrix pair by an unitary equivalence transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGEX2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, J1, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) +*> in an upper triangular matrix pair (A, B) by an unitary equivalence +*> transformation. +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H +*> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, +*> the updated matrix Q. +*> Not referenced if WANTQ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit, +*> the updated matrix Z. +*> Not referenced if WANTZ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index to the first block (A11, B11). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEauxiliary +* +*> \par Further Details: +* ===================== +*> +*> In the current code both weak and strong stability tests are +*> performed. The user can omit the strong stability test by changing +*> the internal logical parameter WANDS to .FALSE.. See ref. [2] for +*> details. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report UMINF-94.04, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, 1994. Also as LAPACK Working Note 87. To appear in +*> Numerical Algorithms, 1996. +*> +* ===================================================================== + SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + REAL TWENTY + PARAMETER ( TWENTY = 2.0E+1 ) + INTEGER LDST + PARAMETER ( LDST = 2 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL STRONG, WEAK + INTEGER I, M + REAL CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM, + $ THRESH, WS + COMPLEX CDUM, F, G, SQ, SZ +* .. +* .. Local Arrays .. + COMPLEX S( LDST, LDST ), T( LDST, LDST ), WORK( 8 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACPY, CLARTG, CLASSQ, CROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + M = LDST + WEAK = .FALSE. + STRONG = .FALSE. +* +* Make a local copy of selected block in (A, B) +* + CALL CLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL CLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute the threshold for testing the acceptance of swapping. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + SCALE = REAL( CZERO ) + SUM = REAL( CONE ) + CALL CLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SA = SCALE*SQRT( SUM ) +* +* THRES has been changed from +* THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* to +* THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* on 04/01/10. +* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by +* Jim Demmel and Guillaume Revy. See forum post 1783. +* + THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* +* Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SA = ABS( S( 2, 2 ) ) + SB = ABS( T( 2, 2 ) ) + CALL CLARTG( G, F, CZ, SZ, CDUM ) + SZ = -SZ + CALL CROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, CONJG( SZ ) ) + IF( SA.GE.SB ) THEN + CALL CLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM ) + ELSE + CALL CLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM ) + END IF + CALL CROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ ) + CALL CROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ ) +* +* Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 20 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL**H*S*QR, B-QL**H*T*QR)) <= O(EPS*F-norm((A, B))) +* + CALL CLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL CROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -CONJG( SZ ) ) + CALL CROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -CONJG( SZ ) ) + CALL CROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ ) + CALL CROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ ) + DO 10 I = 1, 2 + WORK( I ) = WORK( I ) - A( J1+I-1, J1 ) + WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 ) + WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 ) + WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 ) + 10 CONTINUE + SCALE = REAL( CZERO ) + SUM = REAL( CONE ) + CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SS = SCALE*SQRT( SUM ) + STRONG = SS.LE.THRESH + IF( .NOT.STRONG ) + $ GO TO 20 + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* equivalence transformations to the original matrix pair (A,B) +* + CALL CROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + CALL CROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ ) + CALL CROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ ) +* +* Set N1 by N2 (2,1) blocks to 0 +* + A( J1+1, J1 ) = CZERO + B( J1+1, J1 ) = CZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL CROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ, CONJG( SZ ) ) + IF( WANTQ ) + $ CALL CROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ, CONJG( SQ ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 20 CONTINUE + INFO = 1 + RETURN +* +* End of CTGEX2 +* + END diff --git a/dspl/liblapack/SRC/ctgexc.f b/dspl/liblapack/SRC/ctgexc.f new file mode 100644 index 0000000..ba1c281 --- /dev/null +++ b/dspl/liblapack/SRC/ctgexc.f @@ -0,0 +1,300 @@ +*> \brief \b CTGEXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, IFST, ILST, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGEXC reorders the generalized Schur decomposition of a complex +*> matrix pair (A,B), using an unitary equivalence transformation +*> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with +*> row index IFST is moved to row ILST. +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H +*> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the upper triangular matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the upper triangular matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the unitary matrix Q. +*> On exit, the updated matrix Q. +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., the unitary matrix Z. +*> On exit, the updated matrix Z. +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> Specify the reordering of the diagonal blocks of (A, B). +*> The block with row index IFST is moved to row ILST, by a +*> sequence of swapping between adjacent blocks. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: if INFO = -i, the i-th argument had an illegal value. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. (A, B) may have been partially reordered, +*> and ILST points to the first row of the current +*> position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexGEcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> \n +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> +* ===================================================================== + SUBROUTINE CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER HERE +* .. +* .. External Subroutines .. + EXTERNAL CTGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGEXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below +* + CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + IF( HERE.LT.ILST ) + $ GO TO 10 + HERE = HERE - 1 + ELSE + HERE = IFST - 1 +* + 20 CONTINUE +* +* Swap with next one above +* + CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + IF( HERE.GE.ILST ) + $ GO TO 20 + HERE = HERE + 1 + END IF + ILST = HERE + RETURN +* +* End of CTGEXC +* + END diff --git a/dspl/liblapack/SRC/ctgsen.f b/dspl/liblapack/SRC/ctgsen.f new file mode 100644 index 0000000..8198d45 --- /dev/null +++ b/dspl/liblapack/SRC/ctgsen.f @@ -0,0 +1,784 @@ +*> \brief \b CTGSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, +* ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, +* $ M, N +* REAL PL, PR +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL DIF( * ) +* COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGSEN reorders the generalized Schur decomposition of a complex +*> matrix pair (A, B) (in terms of an unitary equivalence trans- +*> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues +*> appears in the leading diagonal blocks of the pair (A,B). The leading +*> columns of Q and Z form unitary bases of the corresponding left and +*> right eigenspaces (deflating subspaces). (A, B) must be in +*> generalized Schur canonical form, that is, A and B are both upper +*> triangular. +*> +*> CTGSEN also computes the generalized eigenvalues +*> +*> w(j)= ALPHA(j) / BETA(j) +*> +*> of the reordered matrix pair (A, B). +*> +*> Optionally, the routine computes estimates of reciprocal condition +*> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +*> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +*> between the matrix pairs (A11, B11) and (A22,B22) that correspond to +*> the selected cluster and the eigenvalues outside the cluster, resp., +*> and norms of "projections" onto left and right eigenspaces w.r.t. +*> the selected cluster in the (1,1)-block. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (PL and PR) or the deflating subspaces +*> (Difu and Difl): +*> =0: Only reorder w.r.t. SELECT. No extras. +*> =1: Reciprocal of norms of "projections" onto left and right +*> eigenspaces w.r.t. the selected cluster (PL and PR). +*> =2: Upper bounds on Difu and Difl. F-norm-based estimate +*> (DIF(1:2)). +*> =3: Estimate of Difu and Difl. 1-norm-based estimate +*> (DIF(1:2)). +*> About 5 times as expensive as IJOB = 2. +*> =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +*> version to get it all. +*> =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +*> \endverbatim +*> +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select an eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension(LDA,N) +*> On entry, the upper triangular matrix A, in generalized +*> Schur canonical form. +*> On exit, A is overwritten by the reordered matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension(LDB,N) +*> On entry, the upper triangular matrix B, in generalized +*> Schur canonical form. +*> On exit, B is overwritten by the reordered matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX array, dimension (N) +*> +*> The diagonal elements of A and B, respectively, +*> when the pair (A,B) has been reduced to generalized Schur +*> form. ALPHA(i)/BETA(i) i=1,...,N are the generalized +*> eigenvalues. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +*> On exit, Q has been postmultiplied by the left unitary +*> transformation matrix which reorder (A, B); The leading M +*> columns of Q form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +*> On exit, Z has been postmultiplied by the left unitary +*> transformation matrix which reorder (A, B); The leading M +*> columns of Z form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified pair of left and right +*> eigenspaces, (deflating subspaces) 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] PL +*> \verbatim +*> PL is REAL +*> \endverbatim +*> +*> \param[out] PR +*> \verbatim +*> PR is REAL +*> +*> If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +*> reciprocal of the norm of "projections" onto left and right +*> eigenspace with respect to the selected cluster. +*> 0 < PL, PR <= 1. +*> If M = 0 or M = N, PL = PR = 1. +*> If IJOB = 0, 2 or 3 PL, PR are not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL array, dimension (2). +*> If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +*> If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +*> Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +*> estimates of Difu and Difl, computed using reversed +*> communication with CLACN2. +*> If M = 0 or N, DIF(1:2) = F-norm([A, B]). +*> If IJOB = 0 or 1, DIF is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1 +*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +*> If IJOB = 3 or 5, LWORK >= 4*M*(N-M) +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= 1. +*> If IJOB = 1, 2 or 4, LIWORK >= N+2; +*> If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M)); +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> =1: Reordering of (A, B) failed because the transformed +*> matrix pair (A, B) would be too far from generalized +*> Schur form; the problem is very ill-conditioned. +*> (A, B) may have been partially reordered. +*> If requested, 0 is returned in DIF(*), PL and PR. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CTGSEN first collects the selected eigenvalues by computing unitary +*> U and W that move them to the top left corner of (A, B). In other +*> words, the selected eigenvalues are the eigenvalues of (A11, B11) in +*> +*> U**H*(A, B)*W = (A11 A12) (B11 B12) n1 +*> ( 0 A22),( 0 B22) n2 +*> n1 n2 n1 n2 +*> +*> where N = n1+n2 and U**H means the conjugate transpose of U. The first +*> n1 columns of U and W span the specified pair of left and right +*> eigenspaces (deflating subspaces) of (A, B). +*> +*> If (A, B) has been obtained from the generalized real Schur +*> decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the +*> reordered generalized Schur form of (C, D) is given by +*> +*> (C, D) = (Q*U)*(U**H *(A, B)*W)*(Z*W)**H, +*> +*> and the first n1 columns of Q*U and Z*W span the corresponding +*> deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +*> +*> Note that if the selected eigenvalue is sufficiently ill-conditioned, +*> then its value may differ significantly from its value before +*> reordering. +*> +*> The reciprocal condition numbers of the left and right eigenspaces +*> spanned by the first n1 columns of U and W (or Q*U and Z*W) may +*> be returned in DIF(1:2), corresponding to Difu and Difl, resp. +*> +*> The Difu and Difl are defined as: +*> +*> Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +*> and +*> Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +*> +*> where sigma-min(Zu) is the smallest singular value of the +*> (2*n1*n2)-by-(2*n1*n2) matrix +*> +*> Zu = [ kron(In2, A11) -kron(A22**H, In1) ] +*> [ kron(In2, B11) -kron(B22**H, In1) ]. +*> +*> Here, Inx is the identity matrix of size nx and A22**H is the +*> conjuguate transpose of A22. kron(X, Y) is the Kronecker product between +*> the matrices X and Y. +*> +*> When DIF(2) is small, small changes in (A, B) can cause large changes +*> in the deflating subspace. An approximate (asymptotic) bound on the +*> maximum angular error in the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / DIF(2), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal norm of the projectors on the left and right +*> eigenspaces associated with (A11, B11) may be returned in PL and PR. +*> They are computed as follows. First we compute L and R so that +*> P*(A, B)*Q is block diagonal, where +*> +*> P = ( I -L ) n1 Q = ( I R ) n1 +*> ( 0 I ) n2 and ( 0 I ) n2 +*> n1 n2 n1 n2 +*> +*> and (L, R) is the solution to the generalized Sylvester equation +*> +*> A11*R - L*A22 = -A12 +*> B11*R - L*B22 = -B12 +*> +*> Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / PL. +*> +*> There are also global error bounds which valid for perturbations up +*> to a certain restriction: A lower bound (x) on the smallest +*> F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +*> coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +*> (i.e. (A + E, B + F), is +*> +*> x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +*> +*> An approximate bound on x can be computed from DIF(1:2), PL and PR. +*> +*> If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +*> (L', R') and unperturbed (L, R) left and right deflating subspaces +*> associated with the selected cluster in the (1,1)-blocks can be +*> bounded as +*> +*> max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +*> max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +*> +*> See LAPACK User's Guide section 4.11 or the following references +*> for more information. +*> +*> Note that if the default method for computing the Frobenius-norm- +*> based estimate DIF is not wanted (see CLATDF), then the parameter +*> IDIFJB (see below) should be changed from 3 to 4 (routine CLATDF +*> (IJOB = 2 will be used)). See CTGSYL for more details. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> \n +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> +* ===================================================================== + SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + REAL PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL DIF( * ) + COMPLEX A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP + INTEGER I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2, + $ N1, N2 + REAL DSCALE, DSUM, RDSCAL, SAFMIN + COMPLEX TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + REAL SLAMCH + EXTERNAL CLACN2, CLACPY, CLASSQ, CSCAL, CTGEXC, CTGSYL, + $ SLAMCH, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, CONJG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSEN', -INFO ) + RETURN + END IF +* + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + IF( .NOT.LQUERY .OR. IJOB.NE.0 ) THEN + DO 10 K = 1, N + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) + IF( K.LT.N ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + 10 CONTINUE + END IF +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 2*M*(N-M) ) + LIWMIN = MAX( 1, N+2 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*M*(N-M) ) + LIWMIN = MAX( 1, 2*M*(N-M), N+2 ) + ELSE + LWMIN = 1 + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL CLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL CLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 70 + END IF +* +* Get machine constant +* + SAFMIN = SLAMCH( 'S' ) +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + DO 30 K = 1, N + SWAP = SELECT( K ) + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. Compute unitary Q +* and Z that will swap adjacent diagonal blocks in (A, B). +* + IF( K.NE.KS ) + $ CALL CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, K, KS, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 70 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L: +* A11 * R - L * A22 = A12 +* B11 * R - L * B22 = B12 +* + N1 = M + N2 = N - M + I = N1 + 1 + CALL CLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL CLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + IJB = 0 + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto +* left and right eigenspaces +* + RDSCAL = ZERO + DSUM = ONE + CALL CLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL CLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF + IF( WANTD ) THEN +* +* Compute estimates Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu estimate. +* + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl estimate. +* + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with CLACN2. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL CLACN2( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE, + $ ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL CTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL CLACN2( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE, + $ ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL CTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) + END IF + END IF +* +* If B(K,K) is complex, make it real and positive (normalization +* of the generalized Schur form) and Store the generalized +* eigenvalues of reordered pair (A, B) +* + DO 60 K = 1, N + DSCALE = ABS( B( K, K ) ) + IF( DSCALE.GT.SAFMIN ) THEN + TEMP1 = CONJG( B( K, K ) / DSCALE ) + TEMP2 = B( K, K ) / DSCALE + B( K, K ) = DSCALE + CALL CSCAL( N-K, TEMP1, B( K, K+1 ), LDB ) + CALL CSCAL( N-K+1, TEMP1, A( K, K ), LDA ) + IF( WANTQ ) + $ CALL CSCAL( N, TEMP2, Q( 1, K ), 1 ) + ELSE + B( K, K ) = CMPLX( ZERO, ZERO ) + END IF +* + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) +* + 60 CONTINUE +* + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of CTGSEN +* + END diff --git a/dspl/liblapack/SRC/ctgsja.f b/dspl/liblapack/SRC/ctgsja.f new file mode 100644 index 0000000..38a6106 --- /dev/null +++ b/dspl/liblapack/SRC/ctgsja.f @@ -0,0 +1,665 @@ +*> \brief \b CTGSJA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGSJA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, +* LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, +* Q, LDQ, WORK, NCYCLE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, +* $ NCYCLE, P +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* REAL ALPHA( * ), BETA( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGSJA computes the generalized singular value decomposition (GSVD) +*> of two complex upper triangular (or trapezoidal) matrices A and B. +*> +*> On entry, it is assumed that matrices A and B have the following +*> forms, which may be obtained by the preprocessing subroutine CGGSVP +*> from a general M-by-N matrix A and P-by-N matrix B: +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> B = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. +*> +*> On exit, +*> +*> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), +*> +*> where U, V and Q are unitary matrices. +*> R is a nonsingular upper triangular matrix, and D1 +*> and D2 are ``diagonal'' matrices, which are of the following +*> structures: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) K +*> L ( 0 0 R22 ) L +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The computation of the unitary transformation matrices U, V or Q +*> is optional. These matrices may either be formed explicitly, or they +*> may be postmultiplied into input matrices U1, V1, or Q1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': U must contain a unitary matrix U1 on entry, and +*> the product U1*U is returned; +*> = 'I': U is initialized to the unit matrix, and the +*> unitary matrix U is returned; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': V must contain a unitary matrix V1 on entry, and +*> the product V1*V is returned; +*> = 'I': V is initialized to the unit matrix, and the +*> unitary matrix V is returned; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Q must contain a unitary matrix Q1 on entry, and +*> the product Q1*Q is returned; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> +*> K and L specify the subblocks in the input matrices A and B: +*> A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N) +*> of A and B, whose GSVD is going to be computed by CTGSJA. +*> See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +*> matrix R or part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +*> a part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the convergence criteria for the Jacobi- +*> Kogbetliantz iteration procedure. Generally, they are the +*> same as used in the preprocessing step, say +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = diag(C), +*> BETA(K+1:K+L) = diag(S), +*> or if M-K-L < 0, +*> ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +*> BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +*> Furthermore, if K+L < N, +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is COMPLEX array, dimension (LDU,M) +*> On entry, if JOBU = 'U', U must contain a matrix U1 (usually +*> the unitary matrix returned by CGGSVP). +*> On exit, +*> if JOBU = 'I', U contains the unitary matrix U; +*> if JOBU = 'U', U contains the product U1*U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX array, dimension (LDV,P) +*> On entry, if JOBV = 'V', V must contain a matrix V1 (usually +*> the unitary matrix returned by CGGSVP). +*> On exit, +*> if JOBV = 'I', V contains the unitary matrix V; +*> if JOBV = 'V', V contains the product V1*V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +*> the unitary matrix returned by CGGSVP). +*> On exit, +*> if JOBQ = 'I', Q contains the unitary matrix Q; +*> if JOBQ = 'Q', Q contains the product Q1*Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] NCYCLE +*> \verbatim +*> NCYCLE is INTEGER +*> The number of cycles required for convergence. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the procedure does not converge after MAXIT cycles. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXIT INTEGER +*> MAXIT specifies the total loops that the iterative procedure +*> may take. If after MAXIT cycles, the routine fails to +*> converge, we return INFO = 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +*> min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +*> matrix B13 to the form: +*> +*> U1**H *A13*Q1 = C1*R1; V1**H *B13*Q1 = S1*R1, +*> +*> where U1, V1 and Q1 are unitary matrix. +*> C1 and S1 are diagonal matrices satisfying +*> +*> C1**2 + S1**2 = I, +*> +*> and R1 is an L-by-L nonsingular upper triangular matrix. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + REAL ALPHA( * ), BETA( * ) + COMPLEX A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + REAL A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA, + $ RWK, SSMIN + COMPLEX A2, B2, SNQ, SNU, SNV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CLAGS2, CLAPLL, CLASET, CROT, CSSCAL, + $ SLARTG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL CLASET( 'Full', M, M, CZERO, CONE, U, LDU ) + IF( INITV ) + $ CALL CLASET( 'Full', P, P, CZERO, CONE, V, LDV ) + IF( INITQ ) + $ CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = CZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = REAL( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A3 = REAL( A( K+J, N-L+J ) ) +* + B1 = REAL( B( I, N-L+I ) ) + B3 = REAL( B( J, N-L+J ) ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U**H *A +* + IF( K+J.LE.M ) + $ CALL CROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, CONJG( SNU ) ) +* +* Update I-th and J-th rows of matrix B: V**H *B +* + CALL CROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, CONJG( SNV ) ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL CROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL CROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = CZERO + B( I, N-L+J ) = CZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = CZERO + B( J, N-L+I ) = CZERO + END IF +* +* Ensure that the diagonal elements of A and B are real. +* + IF( K+I.LE.M ) + $ A( K+I, N-L+I ) = REAL( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A( K+J, N-L+J ) = REAL( A( K+J, N-L+J ) ) + B( I, N-L+I ) = REAL( B( I, N-L+I ) ) + B( J, N-L+J ) = REAL( B( J, N-L+J ) ) +* +* Update unitary matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL CROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL CROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL CROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL CCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL CLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = REAL( A( K+I, N-L+I ) ) + B1 = REAL( B( I, N-L+I ) ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* + IF( GAMMA.LT.ZERO ) THEN + CALL CSSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL CSSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL CSSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL CSSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE +* + RETURN +* +* End of CTGSJA +* + END diff --git a/dspl/liblapack/SRC/ctgsna.f b/dspl/liblapack/SRC/ctgsna.f new file mode 100644 index 0000000..6081d19 --- /dev/null +++ b/dspl/liblapack/SRC/ctgsna.f @@ -0,0 +1,519 @@ +*> \brief \b CTGSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, +* LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL DIF( * ), S( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or eigenvectors of a matrix pair (A, B). +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (DIF): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (DIF); +*> = 'B': for both eigenvalues and eigenvectors (S and DIF). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the corresponding j-th eigenvalue and/or eigenvector, +*> SELECT(j) must be set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the square matrix pair (A, B). N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The upper triangular matrix A in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> The upper triangular matrix B in the pair (A, B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,M) +*> IF JOB = 'E' or 'B', VL must contain left eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VL, as returned by CTGEVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; and +*> If JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,M) +*> IF JOB = 'E' or 'B', VR must contain right eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VR, as returned by CTGEVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; +*> If JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. +*> If the eigenvalues cannot be reordered to compute DIF(j), +*> DIF(j) is set to 0; this can only occur when the true value +*> would be very small anyway. +*> For each eigenvalue/vector specified by SELECT, DIF stores +*> a Frobenius norm-based estimate of Difl. +*> If JOB = 'E', DIF is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S and DIF. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and DIF used to store +*> the specified condition numbers; for each selected eigenvalue +*> one element is used. If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> If JOB = 'V' or 'B', LWORK >= max(1,2*N*N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+2) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of the i-th generalized +*> eigenvalue w = (a, b) is defined as +*> +*> S(I) = (|v**HAu|**2 + |v**HBu|**2)**(1/2) / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of (A, B) +*> corresponding to w; |z| denotes the absolute value of the complex +*> number, and norm(u) denotes the 2-norm of the vector u. The pair +*> (a, b) corresponds to an eigenvalue w = a/b (= v**HAu/v**HBu) of the +*> matrix pair (A, B). If both a and b equal zero, then (A,B) is +*> singular and S(I) = -1 is returned. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(A, B) / S(I), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> and left eigenvector v corresponding to the generalized eigenvalue w +*> is defined as follows. Suppose +*> +*> (A, B) = ( a * ) ( b * ) 1 +*> ( 0 A22 ),( 0 B22 ) n-1 +*> 1 n-1 1 n-1 +*> +*> Then the reciprocal condition number DIF(I) is +*> +*> Difl[(a, b), (A22, B22)] = sigma-min( Zl ) +*> +*> where sigma-min(Zl) denotes the smallest singular value of +*> +*> Zl = [ kron(a, In-1) -kron(1, A22) ] +*> [ kron(b, In-1) -kron(1, B22) ]. +*> +*> Here In-1 is the identity matrix of size n-1 and X**H is the conjugate +*> transpose of X. kron(X, Y) is the Kronecker product between the +*> matrices X and Y. +*> +*> We approximate the smallest singular value of Zl with an upper +*> bound. This is done by CLATDF. +*> +*> An approximate error bound for a computed eigenvector VL(i) or +*> VR(i) is given by +*> +*> EPS * norm(A, B) / DIF(i). +*> +*> See ref. [2-3] for more details and further references. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. +*> To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL DIF( * ), S( * ) + COMPLEX A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + INTEGER IDIFJB + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, IDIFJB = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, K, KS, LWMIN, N1, N2 + REAL BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM + COMPLEX YHAX, YHBX +* .. +* .. Local Arrays .. + COMPLEX DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SCNRM2, SLAMCH, SLAPY2 + COMPLEX CDOTC + EXTERNAL LSAME, SCNRM2, SLAMCH, SLAPY2, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLACPY, CTGEXC, CTGSYL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CMPLX, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + IF( N.EQ.0 ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = 2*N*N + ELSE + LWMIN = N + END IF + WORK( 1 ) = LWMIN +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + KS = 0 + DO 20 K = 1, N +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + RNRM = SCNRM2( N, VR( 1, KS ), 1 ) + LNRM = SCNRM2( N, VL( 1, KS ), 1 ) + CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), A, LDA, + $ VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 ) + YHAX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), B, LDB, + $ VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 ) + YHBX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + COND = SLAPY2( ABS( YHAX ), ABS( YHBX ) ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = SLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) ) + ELSE +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. +* +* Copy the matrix (A, B) to the array WORK and move the +* (k,k)th pair to the (1,1) position. +* + CALL CLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL CLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL CTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), + $ N, DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl[(A11,B11), (A22, B22)]. +* + N1 = 1 + N2 = N - N1 + I = N*N + 1 + CALL CTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), DUMMY, + $ 1, IWORK, IERR ) + END IF + END IF + END IF +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of CTGSNA +* + END diff --git a/dspl/liblapack/SRC/ctgsy2.f b/dspl/liblapack/SRC/ctgsy2.f new file mode 100644 index 0000000..66a8980 --- /dev/null +++ b/dspl/liblapack/SRC/ctgsy2.f @@ -0,0 +1,472 @@ +*> \brief \b CTGSY2 solves the generalized Sylvester equation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGSY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N +* REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGSY2 solves the generalized Sylvester equation +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, +*> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +*> N-by-N and M-by-N, respectively. A, B, D and E are upper triangular +*> (i.e., (A,D) and (B,E) in generalized Schur form). +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +*> scaling factor chosen to avoid overflow. +*> +*> In matrix notation solving equation (1) corresponds to solve +*> Zx = scale * b, where Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**H, Im) ] (2) +*> [ kron(In, D) -kron(E**H, Im) ], +*> +*> Ik is the identity matrix of size k and X**H is the transpose of X. +*> kron(X, Y) is the Kronecker product between the matrices X and Y. +*> +*> If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b +*> is solved for, which is equivalent to solve for R and L in +*> +*> A**H * R + D**H * L = scale * C (3) +*> R * B**H + L * E**H = scale * -F +*> +*> This case is used to compute an estimate of Dif[(A, D), (B, E)] = +*> = sigma_min(Z) using reverse communicaton with CLACON. +*> +*> CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL +*> of an upper bound on the separation between to matrix pairs. Then +*> the input (A, D), (B, E) are sub-pencils of two matrix pairs in +*> CTGSYL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T': solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (look ahead strategy is used). +*> =2: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (SGECON on sub-systems is used.) +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the order of A and D, and the row +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of B and E, and the column +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, M) +*> On entry, A contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> On entry, B contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1). +*> On exit, if IJOB = 0, C has been overwritten by the solution +*> R. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the matrix C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (LDD, M) +*> On entry, D contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the matrix D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (LDE, N) +*> On entry, E contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the matrix E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1). +*> On exit, if IJOB = 0, F has been overwritten by the solution +*> L. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the matrix F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +*> R and L (C and F on entry) will hold the solutions to a +*> slightly perturbed system but the input matrices A, B, D and +*> E have not been changed. If SCALE = 0, R and L will hold the +*> solutions to the homogeneous system with C = F = 0. +*> Normally, SCALE = 1. +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is REAL +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by CTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when CTGSY2 is called by +*> CTGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is REAL +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when CTGSY2 is called by +*> CTGSYL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, if INFO is set to +*> =0: Successful exit +*> <0: If INFO = -i, input argument number i is illegal. +*> >0: The matrix pairs (A, D) and (B, E) have common or very +*> close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE CTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N + REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + INTEGER LDZ + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, LDZ = 2 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, K + REAL SCALOC + COMPLEX ALPHA +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + COMPLEX RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CGESC2, CGETC2, CSCAL, CLATDF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSY2', -INFO ) + RETURN + END IF +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - system +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = M, M - 1, ..., 1; J = 1, 2, ..., N +* + SCALE = ONE + SCALOC = ONE + DO 30 J = 1, N + DO 20 I = M, 1, -1 +* +* Build 2 by 2 system +* + Z( 1, 1 ) = A( I, I ) + Z( 2, 1 ) = D( I, I ) + Z( 1, 2 ) = -B( J, J ) + Z( 2, 2 ) = -E( J, J ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z * x = RHS +* + CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 10 K = 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL CLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL, + $ IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 ) + CALL CAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 ) + END IF + IF( J.LT.N ) THEN + CALL CAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB, + $ C( I, J+1 ), LDC ) + CALL CAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE, + $ F( I, J+1 ), LDF ) + END IF +* + 20 CONTINUE + 30 CONTINUE + ELSE +* +* Solve transposed (I, J) - system: +* A(I, I)**H * R(I, J) + D(I, I)**H * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., M, J = N, N - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 80 I = 1, M + DO 70 J = N, 1, -1 +* +* Build 2 by 2 system Z**H +* + Z( 1, 1 ) = CONJG( A( I, I ) ) + Z( 2, 1 ) = -CONJG( B( J, J ) ) + Z( 1, 2 ) = CONJG( D( I, I ) ) + Z( 2, 2 ) = -CONJG( E( J, J ) ) +* +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z**H * x = RHS +* + CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 40 K = 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + DO 50 K = 1, J - 1 + F( I, K ) = F( I, K ) + RHS( 1 )*CONJG( B( K, J ) ) + + $ RHS( 2 )*CONJG( E( K, J ) ) + 50 CONTINUE + DO 60 K = I + 1, M + C( K, J ) = C( K, J ) - CONJG( A( I, K ) )*RHS( 1 ) - + $ CONJG( D( I, K ) )*RHS( 2 ) + 60 CONTINUE +* + 70 CONTINUE + 80 CONTINUE + END IF + RETURN +* +* End of CTGSY2 +* + END diff --git a/dspl/liblapack/SRC/ctgsyl.f b/dspl/liblapack/SRC/ctgsyl.f new file mode 100644 index 0000000..602e809 --- /dev/null +++ b/dspl/liblapack/SRC/ctgsyl.f @@ -0,0 +1,695 @@ +*> \brief \b CTGSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTGSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, +* $ LWORK, M, N +* REAL DIF, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTGSYL solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> where R and L are unknown m-by-n matrices, (A, D), (B, E) and +*> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +*> respectively, with complex entries. A, B, D and E are upper +*> triangular (i.e., (A,D) and (B,E) in generalized Schur form). +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 +*> is an output scaling factor chosen to avoid overflow. +*> +*> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z +*> is defined as +*> +*> Z = [ kron(In, A) -kron(B**H, Im) ] (2) +*> [ kron(In, D) -kron(E**H, Im) ], +*> +*> Here Ix is the identity matrix of size x and X**H is the conjugate +*> transpose of X. Kron(X, Y) is the Kronecker product between the +*> matrices X and Y. +*> +*> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b +*> is solved for, which is equivalent to solve for R and L in +*> +*> A**H * R + D**H * L = scale * C (3) +*> R * B**H + L * E**H = scale * -F +*> +*> This case (TRANS = 'C') is used to compute an one-norm-based estimate +*> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +*> and (B,E), using CLACON. +*> +*> If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of +*> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +*> reciprocal of the smallest singular value of Z. +*> +*> This is a level-3 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': solve the generalized sylvester equation (1). +*> = 'C': solve the "conjugate transposed" system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: The functionality of 0 and 3. +*> =2: The functionality of 0 and 4. +*> =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (look ahead strategy is used). +*> =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (CGECON on sub-systems is used). +*> Not referenced if TRANS = 'C'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrices A and D, and the row dimension of +*> the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices B and E, and the column dimension +*> of the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA, M) +*> The upper triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB, N) +*> The upper triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, C has been overwritten by +*> the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX array, dimension (LDD, M) +*> The upper triangular matrix D. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the array D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX array, dimension (LDE, N) +*> The upper triangular matrix E. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the array E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, F has been overwritten by +*> the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL +*> On exit DIF is the reciprocal of a lower bound of the +*> reciprocal of the Dif-function, i.e. DIF is an upper bound of +*> Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2). +*> IF IJOB = 0 or TRANS = 'C', DIF is not referenced. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit SCALE is the scaling factor in (1) or (3). +*> If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +*> to a slightly perturbed system but the input matrices A, B, +*> D and E have not been changed. If SCALE = 0, R and L will +*> hold the solutions to the homogenious system with C = F = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK > = 1. +*> If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+2) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: (A, D) and (B, E) have common or very close +*> eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> \n +*> [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +*> Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +*> Appl., 15(4):1045-1060, 1994. +*> \n +*> [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +*> Condition Estimators for Solving the Generalized Sylvester +*> Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +*> July 1989, pp 745-751. +*> +* ===================================================================== + SUBROUTINE CTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + REAL DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to CCOPY by calls to CLASET. +* Sven Hammarling, 1/5/02. +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO + PARAMETER ( CZERO = (0.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PQ, Q + REAL DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CLACPY, CLASET, CSCAL, CTGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NOTRAN ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF + ELSE + LWMIN = 1 + END IF + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + SCALE = 1 + IF( NOTRAN ) THEN + IF( IJOB.NE.0 ) THEN + DIF = 0 + END IF + END IF + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'CTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'CTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( NOTRAN ) THEN + IF( IJOB.GE.3 ) THEN + IFUNC = IJOB - 2 + CALL CLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL CLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* +* Use unblocked Level 2 solver +* + DO 30 IROUND = 1, ISOLVE +* + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + PQ = M*N + CALL CTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL CLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL CLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL CLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL CLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN +* + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + GO TO 40 + 50 CONTINUE + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 +* + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + GO TO 60 +* + 70 CONTINUE + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + PQ = 0 + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + PQ = PQ + MB*NB + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( I.GT.1 ) THEN + CALL CGEMM( 'N', 'N', IS-1, NB, MB, + $ CMPLX( -ONE, ZERO ), A( 1, IS ), LDA, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ C( 1, JS ), LDC ) + CALL CGEMM( 'N', 'N', IS-1, NB, MB, + $ CMPLX( -ONE, ZERO ), D( 1, IS ), LDD, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL CGEMM( 'N', 'N', MB, N-JE, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ B( JS, JE+1 ), LDB, CMPLX( ONE, ZERO ), + $ C( IS, JE+1 ), LDC ) + CALL CGEMM( 'N', 'N', MB, N-JE, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( JS, JE+1 ), LDE, CMPLX( ONE, ZERO ), + $ F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL CLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL CLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL CLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL CLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)**H * R(I, J) + D(I, I)**H * L(I, J) = C(I, J) +* R(I, J) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL CGEMM( 'N', 'C', MB, JS-1, NB, + $ CMPLX( ONE, ZERO ), C( IS, JS ), LDC, + $ B( 1, JS ), LDB, CMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + CALL CGEMM( 'N', 'C', MB, JS-1, NB, + $ CMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( 1, JS ), LDE, CMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL CGEMM( 'C', 'N', M-IE, NB, MB, + $ CMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA, + $ C( IS, JS ), LDC, CMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + CALL CGEMM( 'C', 'N', M-IE, NB, MB, + $ CMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD, + $ F( IS, JS ), LDF, CMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CTGSYL +* + END diff --git a/dspl/liblapack/SRC/ctpcon.f b/dspl/liblapack/SRC/ctpcon.f new file mode 100644 index 0000000..6b439fb --- /dev/null +++ b/dspl/liblapack/SRC/ctpcon.f @@ -0,0 +1,274 @@ +*> \brief \b CTPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, N +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPCON estimates the reciprocal of the condition number of a packed +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTP, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTP, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATPS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = CLANTP( NORM, UPLO, DIAG, N, AP, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL CLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, AP, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTPCON +* + END diff --git a/dspl/liblapack/SRC/ctplqt.f b/dspl/liblapack/SRC/ctplqt.f new file mode 100644 index 0000000..cb4d419 --- /dev/null +++ b/dspl/liblapack/SRC/ctplqt.f @@ -0,0 +1,253 @@ +* Definition: +* =========== +* +* SUBROUTINE CTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPLQT computes a blocked LQ factorization of a complex +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B, and the order of the +*> triangular matrix A. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. M >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The lower triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MB*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ] [ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L +*> upper trapezoidal matrix B2: +*> [ B ] = [ B1 ] [ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> [ C ] = [ A ] [ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> [ W ] = [ I ] [ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> [ V ] = [ V1 ] [ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(M/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = M - (M-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, NB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL CTPLQT2, CTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( MB.LT.1 .OR. (MB.GT.M .AND. M.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPLQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, M, MB +* +* Compute the QR factorization of the current block +* + IB = MIN( M-I+1, MB ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF +* + CALL CTPLQT2( IB, NB, LB, A(I,I), LDA, B( I, 1 ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**T to B(I+IB:M,:) from the right +* + IF( I+IB.LE.M ) THEN + CALL CTPRFB( 'R', 'N', 'F', 'R', M-I-IB+1, NB, IB, LB, + $ B( I, 1 ), LDB, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, B( I+IB, 1 ), LDB, + $ WORK, M-I-IB+1) + END IF + END DO + RETURN +* +* End of CTPLQT +* + END diff --git a/dspl/liblapack/SRC/ctplqt2.f b/dspl/liblapack/SRC/ctplqt2.f new file mode 100644 index 0000000..b16d614 --- /dev/null +++ b/dspl/liblapack/SRC/ctplqt2.f @@ -0,0 +1,316 @@ +* Definition: +* =========== +* +* SUBROUTINE CTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPLQT2 computes a LQ a factorization of a complex "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,M) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ][ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ][ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> +*> C = [ A ][ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ][ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> W = [ V1 ][ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W**T * T * W +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER( ZERO = ( 0.0E+0, 0.0E+0 ),ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CGEMV, CGERC, CTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPLQT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, M +* +* Generate elementary reflector H(I) to annihilate B(I,:) +* + P = N-L+MIN( L, I ) + CALL CLARFG( P+1, A( I, I ), B( I, 1 ), LDB, T( 1, I ) ) + T(1,I)=CONJG(T(1,I)) + IF( I.LT.M ) THEN + DO J = 1, P + B( I, J ) = CONJG(B(I,J)) + END DO +* +* W(M-I:1) := C(I+1:M,I:N) * C(I,I:N) [use W = T(M,:)] +* + DO J = 1, M-I + T( M, J ) = (A( I+J, I )) + END DO + CALL CGEMV( 'N', M-I, P, ONE, B( I+1, 1 ), LDB, + $ B( I, 1 ), LDB, ONE, T( M, 1 ), LDT ) +* +* C(I+1:M,I:N) = C(I+1:M,I:N) + alpha * C(I,I:N)*W(M-1:1)^H +* + ALPHA = -(T( 1, I )) + DO J = 1, M-I + A( I+J, I ) = A( I+J, I ) + ALPHA*(T( M, J )) + END DO + CALL CGERC( M-I, P, (ALPHA), T( M, 1 ), LDT, + $ B( I, 1 ), LDB, B( I+1, 1 ), LDB ) + DO J = 1, P + B( I, J ) = CONJG(B(I,J)) + END DO + END IF + END DO +* + DO I = 2, M +* +* T(I,1:I-1) := C(I:I-1,1:N)**H * (alpha * C(I,I:N)) +* + ALPHA = -(T( 1, I )) + DO J = 1, I-1 + T( I, J ) = ZERO + END DO + P = MIN( I-1, L ) + NP = MIN( N-L+1, N ) + MP = MIN( P+1, M ) + DO J = 1, N-L+P + B(I,J)=CONJG(B(I,J)) + END DO +* +* Triangular part of B2 +* + DO J = 1, P + T( I, J ) = (ALPHA*B( I, N-L+J )) + END DO + CALL CTRMV( 'L', 'N', 'N', P, B( 1, NP ), LDB, + $ T( I, 1 ), LDT ) +* +* Rectangular part of B2 +* + CALL CGEMV( 'N', I-1-P, L, ALPHA, B( MP, NP ), LDB, + $ B( I, NP ), LDB, ZERO, T( I,MP ), LDT ) +* +* B1 + +* + CALL CGEMV( 'N', I-1, N-L, ALPHA, B, LDB, B( I, 1 ), LDB, + $ ONE, T( I, 1 ), LDT ) +* + +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(I,1:I-1) +* + DO J = 1, I-1 + T(I,J)=CONJG(T(I,J)) + END DO + CALL CTRMV( 'L', 'C', 'N', I-1, T, LDT, T( I, 1 ), LDT ) + DO J = 1, I-1 + T(I,J)=CONJG(T(I,J)) + END DO + DO J = 1, N-L+P + B(I,J)=CONJG(B(I,J)) + END DO +* +* T(I,I) = tau(I) +* + T( I, I ) = T( 1, I ) + T( 1, I ) = ZERO + END DO + DO I=1,M + DO J= I+1,M + T(I,J)=(T(J,I)) + T(J,I)=ZERO + END DO + END DO + +* +* End of CTPLQT2 +* + END diff --git a/dspl/liblapack/SRC/ctpmlqt.f b/dspl/liblapack/SRC/ctpmlqt.f new file mode 100644 index 0000000..cb5f033 --- /dev/null +++ b/dspl/liblapack/SRC/ctpmlqt.f @@ -0,0 +1,349 @@ +* Definition: +* =========== +* +* SUBROUTINE CTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. +* COMPLEX V( LDV, * ), A( LDA, * ), B( LDB, * ), +* $ T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPMLQT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DTPLQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (LDA,K) +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTPLQT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DTPLQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array. The dimension of WORK is +*> N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] [V2]. +*> +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is lower trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is lower triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is K-by-M. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is K-by-N. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. + COMPLEX V( LDV, * ), A( LDA, * ), B( LDB, * ), + $ T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, NB, LB, KF, LDAQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CTPRFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.K ) THEN + INFO = -9 + ELSE IF( LDT.LT.MB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL CTPRFB( 'L', 'C', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL CTPRFB( 'R', 'N', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL CTPRFB( 'L', 'N', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL CTPRFB( 'R', 'C', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of CTPMLQT +* + END diff --git a/dspl/liblapack/SRC/ctpmqrt.f b/dspl/liblapack/SRC/ctpmqrt.f new file mode 100644 index 0000000..fd3d1b1 --- /dev/null +++ b/dspl/liblapack/SRC/ctpmqrt.f @@ -0,0 +1,368 @@ +*> \brief \b CTPMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. +* COMPLEX V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPMQRT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CTPQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTPQRT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CTPQRT, stored as a NB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] +*> [V2]. +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. +*> +*> The complex orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. + COMPLEX V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, MB, LB, KF, LDAQ, LDVQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTPRFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDVQ = MAX( 1, M ) + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDVQ = MAX( 1, N ) + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.LDVQ ) THEN + INFO = -9 + ELSE IF( LDT.LT.NB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL CTPRFB( 'L', 'C', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL CTPRFB( 'R', 'N', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL CTPRFB( 'L', 'N', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL CTPRFB( 'R', 'C', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of CTPMQRT +* + END diff --git a/dspl/liblapack/SRC/ctpqrt.f b/dspl/liblapack/SRC/ctpqrt.f new file mode 100644 index 0000000..96b3856 --- /dev/null +++ b/dspl/liblapack/SRC/ctpqrt.f @@ -0,0 +1,270 @@ +*> \brief \b CTPQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPQRT computes a blocked QR factorization of a complex +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of the +*> triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(N/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, MB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL CTPQRT2, CTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. (NB.GT.N .AND. N.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, N, NB +* +* Compute the QR factorization of the current block +* + IB = MIN( N-I+1, NB ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF +* + CALL CTPQRT2( MB, IB, LB, A(I,I), LDA, B( 1, I ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**H to B(:,I+IB:N) from the left +* + IF( I+IB.LE.N ) THEN + CALL CTPRFB( 'L', 'C', 'F', 'C', MB, N-I-IB+1, IB, LB, + $ B( 1, I ), LDB, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, B( 1, I+IB ), LDB, + $ WORK, IB ) + END IF + END DO + RETURN +* +* End of CTPQRT +* + END diff --git a/dspl/liblapack/SRC/ctpqrt2.f b/dspl/liblapack/SRC/ctpqrt2.f new file mode 100644 index 0000000..4cebe76 --- /dev/null +++ b/dspl/liblapack/SRC/ctpqrt2.f @@ -0,0 +1,302 @@ +*> \brief \b CTPQRT2 computes a QR factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPQRT2 computes a QR factorization of a complex "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W * T * W**H +*> +*> where W**H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER( ONE = (1.0,0.0), ZERO = (0.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + COMPLEX ALPHA +* .. +* .. External Subroutines .. + EXTERNAL CLARFG, CGEMV, CGERC, CTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPQRT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, N +* +* Generate elementary reflector H(I) to annihilate B(:,I) +* + P = M-L+MIN( L, I ) + CALL CLARFG( P+1, A( I, I ), B( 1, I ), 1, T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* W(1:N-I) := C(I:M,I+1:N)**H * C(I:M,I) [use W = T(:,N)] +* + DO J = 1, N-I + T( J, N ) = CONJG(A( I, I+J )) + END DO + CALL CGEMV( 'C', P, N-I, ONE, B( 1, I+1 ), LDB, + $ B( 1, I ), 1, ONE, T( 1, N ), 1 ) +* +* C(I:M,I+1:N) = C(I:m,I+1:N) + alpha*C(I:M,I)*W(1:N-1)**H +* + ALPHA = -CONJG(T( I, 1 )) + DO J = 1, N-I + A( I, I+J ) = A( I, I+J ) + ALPHA*CONJG(T( J, N )) + END DO + CALL CGERC( P, N-I, ALPHA, B( 1, I ), 1, + $ T( 1, N ), 1, B( 1, I+1 ), LDB ) + END IF + END DO +* + DO I = 2, N +* +* T(1:I-1,I) := C(I:M,1:I-1)**H * (alpha * C(I:M,I)) +* + ALPHA = -T( I, 1 ) + + DO J = 1, I-1 + T( J, I ) = ZERO + END DO + P = MIN( I-1, L ) + MP = MIN( M-L+1, M ) + NP = MIN( P+1, N ) +* +* Triangular part of B2 +* + DO J = 1, P + T( J, I ) = ALPHA*B( M-L+J, I ) + END DO + CALL CTRMV( 'U', 'C', 'N', P, B( MP, 1 ), LDB, + $ T( 1, I ), 1 ) +* +* Rectangular part of B2 +* + CALL CGEMV( 'C', L, I-1-P, ALPHA, B( MP, NP ), LDB, + $ B( MP, I ), 1, ZERO, T( NP, I ), 1 ) +* +* B1 +* + CALL CGEMV( 'C', M-L, I-1, ALPHA, B, LDB, B( 1, I ), 1, + $ ONE, T( 1, I ), 1 ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL CTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1 ) = ZERO + END DO + +* +* End of CTPQRT2 +* + END diff --git a/dspl/liblapack/SRC/ctprfb.f b/dspl/liblapack/SRC/ctprfb.f new file mode 100644 index 0000000..1538deb --- /dev/null +++ b/dspl/liblapack/SRC/ctprfb.f @@ -0,0 +1,814 @@ +*> \brief \b CTPRFB applies a real or complex "triangular-pentagonal" blocked reflector to a real or complex matrix, which is composed of two blocks. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPRFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, +* V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPRFB applies a complex "triangular-pentagonal" block reflector H or its +*> conjugate transpose H**H to a complex matrix C, which is composed of two +*> blocks A and B, either from the left or right. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columns +*> = 'R': Rows +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T, i.e. the number of elementary +*> reflectors whose product defines the block reflector. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The pentagonal matrix V, which contains the elementary reflectors +*> H(1), H(2), ..., H(K). See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,N) if SIDE = 'L' or (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> H*C or H**H*C or C*H or C*H**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> H*C or H**H*C or C*H or C*H**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (LDWORK,N) if SIDE = 'L', +*> (LDWORK,K) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= K; +*> if SIDE = 'R', LDWORK >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix C is a composite matrix formed from blocks A and B. +*> The block B is of size M-by-N; if SIDE = 'R', A is of size M-by-K, +*> and if SIDE = 'L', A is of size K-by-N. +*> +*> If SIDE = 'R' and DIRECT = 'F', C = [A B]. +*> +*> If SIDE = 'L' and DIRECT = 'F', C = [A] +*> [B]. +*> +*> If SIDE = 'R' and DIRECT = 'B', C = [B A]. +*> +*> If SIDE = 'L' and DIRECT = 'B', C = [B] +*> [A]. +*> +*> The pentagonal matrix V is composed of a rectangular block V1 and a +*> trapezoidal block V2. The size of the trapezoidal block is determined by +*> the parameter L, where 0<=L<=K. If L=K, the V2 block of V is triangular; +*> if L=0, there is no trapezoidal block, thus V = V1 is rectangular. +*> +*> If DIRECT = 'F' and STOREV = 'C': V = [V1] +*> [V2] +*> - V2 is upper trapezoidal (first L rows of K-by-K upper triangular) +*> +*> If DIRECT = 'F' and STOREV = 'R': V = [V1 V2] +*> +*> - V2 is lower trapezoidal (first L columns of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'C': V = [V2] +*> [V1] +*> - V2 is lower trapezoidal (last L rows of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'R': V = [V2 V1] +*> +*> - V2 is upper trapezoidal (last L columns of K-by-K upper triangular) +*> +*> If STOREV = 'C' and SIDE = 'L', V is M-by-K with V2 L-by-K. +*> +*> If STOREV = 'C' and SIDE = 'R', V is N-by-K with V2 L-by-K. +*> +*> If STOREV = 'R' and SIDE = 'L', V is K-by-M with V2 K-by-L. +*> +*> If STOREV = 'R' and SIDE = 'R', V is K-by-N with V2 K-by-L. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, + $ V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* ========================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = (1.0,0.0), ZERO = (0.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, J, MP, NP, KP + LOGICAL LEFT, FORWARD, COLUMN, RIGHT, BACKWARD, ROW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 .OR. L.LT.0 ) RETURN +* + IF( LSAME( STOREV, 'C' ) ) THEN + COLUMN = .TRUE. + ROW = .FALSE. + ELSE IF ( LSAME( STOREV, 'R' ) ) THEN + COLUMN = .FALSE. + ROW = .TRUE. + ELSE + COLUMN = .FALSE. + ROW = .FALSE. + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN + LEFT = .TRUE. + RIGHT = .FALSE. + ELSE IF( LSAME( SIDE, 'R' ) ) THEN + LEFT = .FALSE. + RIGHT = .TRUE. + ELSE + LEFT = .FALSE. + RIGHT = .FALSE. + END IF +* + IF( LSAME( DIRECT, 'F' ) ) THEN + FORWARD = .TRUE. + BACKWARD = .FALSE. + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + FORWARD = .FALSE. + BACKWARD = .TRUE. + ELSE + FORWARD = .FALSE. + BACKWARD = .FALSE. + END IF +* +* --------------------------------------------------------------------------- +* + IF( COLUMN .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (M-by-K) +* +* Form H C or H**H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - T (A + V**H B) or A = A - T**H (A + V**H B) +* B = B - V T (A + V**H B) or B = B - V T**H (A + V**H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL CTRMM( 'L', 'U', 'C', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + CALL CGEMM( 'C', 'N', L, N, M-L, ONE, V, LDV, B, LDB, + $ ONE, WORK, LDWORK ) + CALL CGEMM( 'C', 'N', K-L, N, M, ONE, V( 1, KP ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL CGEMM( 'N', 'N', L, N, K-L, -ONE, V( MP, KP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL CTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (N-by-K) +* +* Form C H or C H**H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - (A + B V) T or A = A - (A + B V) T**H +* B = B - (A + B V) T V**H or B = B - (A + B V) T**H V**H +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL CTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + CALL CGEMM( 'N', 'N', M, L, N-L, ONE, B, LDB, + $ V, LDV, ONE, WORK, LDWORK ) + CALL CGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V( 1, KP ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'C', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL CGEMM( 'N', 'C', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( NP, KP ), LDV, ONE, B( 1, NP ), LDB ) + CALL CTRMM( 'R', 'U', 'C', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (M-by-K) +* [ I ] (K-by-K) +* +* Form H C or H**H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - T (A + V**H B) or A = A - T**H (A + V**H B) +* B = B - V T (A + V**H B) or B = B - V T**H (A + V**H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO +* + CALL CTRMM( 'L', 'L', 'C', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL CGEMM( 'C', 'N', L, N, M-L, ONE, V( MP, KP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL CGEMM( 'C', 'N', K-L, N, M, ONE, V, LDV, + $ B, LDB, ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'L', 'L', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'N', M-L, N, K, -ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL CGEMM( 'N', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL CTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (N-by-K) +* [ I ] (K-by-K) +* +* Form C H or C H**H where C = [ B A ] (B is M-by-N, A is M-by-K) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - (A + B V) T or A = A - (A + B V) T**H +* B = B - (A + B V) T V**H or B = B - (A + B V) T**H V**H +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL CTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL CGEMM( 'N', 'N', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( NP, KP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL CGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V, LDV, ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'C', M, N-L, K, -ONE, WORK, LDWORK, + $ V( NP, 1 ), LDV, ONE, B( 1, NP ), LDB ) + CALL CGEMM( 'N', 'C', M, L, K-L, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL CTRMM( 'R', 'L', 'C', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - T (A + V B) or A = A - T**H (A + V B) +* B = B - V**H T (A + V B) or B = B - V**H T**H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL CTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDB ) + CALL CGEMM( 'N', 'N', L, N, M-L, ONE, V, LDV,B, LDB, + $ ONE, WORK, LDWORK ) + CALL CGEMM( 'N', 'N', K-L, N, M, ONE, V( KP, 1 ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'C', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL CGEMM( 'C', 'N', L, N, K-L, -ONE, V( KP, MP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL CTRMM( 'L', 'L', 'C', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - (A + B V**H) T or A = A - (A + B V**H) T**H +* B = B - (A + B V**H) T V or B = B - (A + B V**H) T**H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL CTRMM( 'R', 'L', 'C', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + CALL CGEMM( 'N', 'C', M, L, N-L, ONE, B, LDB, V, LDV, + $ ONE, WORK, LDWORK ) + CALL CGEMM( 'N', 'C', M, K-L, N, ONE, B, LDB, + $ V( KP, 1 ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL CGEMM( 'N', 'N', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( KP, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL CTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - T (A + V B) or A = A - T**H (A + V B) +* B = B - V**H T (A + V B) or B = B - V**H T**H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO + CALL CTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL CGEMM( 'N', 'N', L, N, M-L, ONE, V( KP, MP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL CGEMM( 'N', 'N', K-L, N, M, ONE, V, LDV, B, LDB, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'L', 'L ', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'C', 'N', M-L, N, K, -ONE, V( 1, MP ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL CGEMM( 'C', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL CTRMM( 'L', 'U', 'C', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**H where C = [ B A ] (A is M-by-K, B is M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - (A + B V**H) T or A = A - (A + B V**H) T**H +* B = B - (A + B V**H) T V or B = B - (A + B V**H) T**H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL CTRMM( 'R', 'U', 'C', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL CGEMM( 'N', 'C', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( KP, NP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL CGEMM( 'N', 'C', M, K-L, N, ONE, B, LDB, V, LDV, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL CTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL CGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V( 1, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL CGEMM( 'N', 'N', M, L, K-L , -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL CTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* + END IF +* + RETURN +* +* End of CTPRFB +* + END diff --git a/dspl/liblapack/SRC/ctprfs.f b/dspl/liblapack/SRC/ctprfs.f new file mode 100644 index 0000000..b66bd7c --- /dev/null +++ b/dspl/liblapack/SRC/ctprfs.f @@ -0,0 +1,484 @@ +*> \brief \b CTPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular packed +*> coefficient matrix. +*> +*> The solution matrix X must be computed by CTPTRS or some other +*> means before entering this routine. CTPRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, KC, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACN2, CTPMV, CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTPRFS +* + END diff --git a/dspl/liblapack/SRC/ctptri.f b/dspl/liblapack/SRC/ctptri.f new file mode 100644 index 0000000..b6566ae --- /dev/null +++ b/dspl/liblapack/SRC/ctptri.f @@ -0,0 +1,242 @@ +*> \brief \b CTPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPTRI computes the inverse of a complex upper or lower triangular +*> matrix A stored in packed format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangular matrix A, stored +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same packed storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A triangular matrix A can be transferred to packed storage using one +*> of the following program segments: +*> +*> UPLO = 'U': UPLO = 'L': +*> +*> JC = 1 JC = 1 +*> DO 2 J = 1, N DO 2 J = 1, N +*> DO 1 I = 1, J DO 1 I = J, N +*> AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +*> 1 CONTINUE 1 CONTINUE +*> JC = JC + J JC = JC + N - J + 1 +*> 2 CONTINUE 2 CONTINUE +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + COMPLEX AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CTPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL CTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL CSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL CTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL CSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of CTPTRI +* + END diff --git a/dspl/liblapack/SRC/ctptrs.f b/dspl/liblapack/SRC/ctptrs.f new file mode 100644 index 0000000..8a75e2f --- /dev/null +++ b/dspl/liblapack/SRC/ctptrs.f @@ -0,0 +1,228 @@ +*> \brief \b CTPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular matrix of order N stored in packed format, +*> and B is an N-by-NRHS matrix. A check is made to verify that A is +*> nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + DO 30 J = 1, NRHS + CALL CTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of CTPTRS +* + END diff --git a/dspl/liblapack/SRC/ctpttf.f b/dspl/liblapack/SRC/ctpttf.f new file mode 100644 index 0000000..91e6fa7 --- /dev/null +++ b/dspl/liblapack/SRC/ctpttf.f @@ -0,0 +1,539 @@ +*> \brief \b CTPTTF copies a triangular matrix from the standard packed format (TP) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX AP( 0: * ), ARF( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPTTF copies a triangular matrix A from standard packed format (TP) +*> to rectangular full packed format (TF). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal format is wanted; +*> = 'C': ARF in Conjugate-transpose format is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX AP( 0: * ), ARF( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = AP( 0 ) + ELSE + ARF( 0 ) = CONJG( AP( 0 ) ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + ARF( IJ ) = CONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CTPTTF +* + END diff --git a/dspl/liblapack/SRC/ctpttr.f b/dspl/liblapack/SRC/ctpttr.f new file mode 100644 index 0000000..a0548a3 --- /dev/null +++ b/dspl/liblapack/SRC/ctpttr.f @@ -0,0 +1,176 @@ +*> \brief \b CTPTTR copies a triangular matrix from the standard packed format (TP) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTPTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTPTTR copies a triangular matrix A from standard packed format (TP) +*> to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTPTTR', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + END IF +* +* + RETURN +* +* End of CTPTTR +* + END diff --git a/dspl/liblapack/SRC/ctrcon.f b/dspl/liblapack/SRC/ctrcon.f new file mode 100644 index 0000000..81ba45d --- /dev/null +++ b/dspl/liblapack/SRC/ctrcon.f @@ -0,0 +1,283 @@ +*> \brief \b CTRCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, LDA, N +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL RWORK( * ) +* COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRCON estimates the reciprocal of the condition number of a +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + REAL RWORK( * ) + COMPLEX A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL CLANTR, SLAMCH + EXTERNAL LSAME, ICAMAX, CLANTR, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLATRS, CSRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = CLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL CLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL CLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, A, LDA, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ICAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of CTRCON +* + END diff --git a/dspl/liblapack/SRC/ctrevc.f b/dspl/liblapack/SRC/ctrevc.f new file mode 100644 index 0000000..2b95034 --- /dev/null +++ b/dspl/liblapack/SRC/ctrevc.f @@ -0,0 +1,486 @@ +*> \brief \b CTREVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTREVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, MM, M, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL RWORK( * ) +* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTREVC computes some or all of the right and/or left eigenvectors of +*> a complex upper triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of the vector y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the unitary factor that reduces a matrix A to +*> Schur form T, then Q*X and Q*Y are the matrices of right and left +*> eigenvectors of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed using the matrices supplied in +*> VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> The eigenvector corresponding to the j-th eigenvalue is +*> computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The upper triangular matrix T. T is modified, but restored +*> on exit. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by CHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by CHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B'; LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected eigenvector occupies one +*> column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CMZERO, CMONE + PARAMETER ( CMZERO = ( 0.0E+0, 0.0E+0 ), + $ CMONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI + REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SCASUM, SLAMCH + EXTERNAL LSAME, ICAMAX, SCASUM, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL CCOPY, CGEMV, CLATRS, CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I+N ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = SCASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IS = M + DO 80 KI = N, 1, -1 +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( 1 ) = CMONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K ) = -T( K, KI ) + 40 CONTINUE +* +* Solve the triangular system: +* (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 ), SCALE, RWORK, + $ INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 ) +* + II = ICAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL CSSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CMZERO + 60 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL CGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ), + $ 1, CMPLX( SCALE ), VR( 1, KI ), 1 ) +* + II = ICAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL CSSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K+N ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( N ) = CMONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K ) = -CONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve the triangular system: +* (T(KI+1:N,KI+1:N) - T(KI,KI))**H*X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 ), SCALE, RWORK, INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL CCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 ) +* + II = ICAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL CSSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CMZERO + 110 CONTINUE + ELSE + IF( KI.LT.N ) + $ CALL CGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 ), 1, CMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = ICAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL CSSCAL( N, REMAX, VL( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K+N ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of CTREVC +* + END diff --git a/dspl/liblapack/SRC/ctrevc3.f b/dspl/liblapack/SRC/ctrevc3.f new file mode 100644 index 0000000..c06b404 --- /dev/null +++ b/dspl/liblapack/SRC/ctrevc3.f @@ -0,0 +1,631 @@ +*> \brief \b CTREVC3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTREVC3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL RWORK( * ) +* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTREVC3 computes some or all of the right and/or left eigenvectors of +*> a complex upper triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a complex general matrix: A = Q*T*Q**H, as computed by CHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of the vector y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the unitary factor that reduces a matrix A to +*> Schur form T, then Q*X and Q*Y are the matrices of right and left +*> eigenvectors of A. +*> +*> This uses a Level 3 BLAS version of the back transformation. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed using the matrices supplied in +*> VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> The eigenvector corresponding to the j-th eigenvalue is +*> computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The upper triangular matrix T. T is modified, but restored +*> on exit. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by CHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1, and if SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by CHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1, and if SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected eigenvector occupies one column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. LWORK >= max(1,2*N). +*> For optimum performance, LWORK >= N + 2*N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (LRWORK) +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. LRWORK >= max(1,N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the RWORK array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* @generated from ztrevc3.f, fortran z -> c, Tue Apr 19 01:47:44 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, LWORK, LRWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) + INTEGER NBMIN, NBMAX + PARAMETER ( NBMIN = 8, NBMAX = 128 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI, IV, MAXWRK, NB + REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ICAMAX + REAL SLAMCH, SCASUM + EXTERNAL LSAME, ILAENV, ICAMAX, SLAMCH, SCASUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CCOPY, CLASET, CSSCAL, CGEMM, CGEMV, + $ CLATRS, CLACPY, SLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, CMPLX, CONJG, AIMAG, MAX +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + NB = ILAENV( 1, 'CTREVC', SIDE // HOWMNY, N, -1, -1, -1 ) + MAXWRK = N + 2*N*NB + WORK(1) = MAXWRK + RWORK(1) = N + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF ( LRWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREVC3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Use blocked version of back-transformation if sufficient workspace. +* Zero-out the workspace to avoid potential NaN propagation. +* + IF( OVER .AND. LWORK .GE. N + 2*N*NBMIN ) THEN + NB = (LWORK - N) / (2*N) + NB = MIN( NB, NBMAX ) + CALL CLASET( 'F', N, 1+2*NB, CZERO, CZERO, WORK, N ) + ELSE + NB = 1 + END IF +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = SCASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* ============================================================ +* Compute right eigenvectors. +* +* IV is index of column in current block. +* Non-blocked version always uses IV=NB=1; +* blocked version starts with IV=NB, goes down to 1. +* (Note the "0-th" column is used to store the original diagonal.) + IV = NB + IS = M + DO 80 KI = N, 1, -1 + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* +* -------------------------------------------------------- +* Complex right eigenvector +* + WORK( KI + IV*N ) = CONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K + IV*N ) = -T( K, KI ) + 40 CONTINUE +* +* Solve upper triangular system: +* [ T(1:KI-1,1:KI-1) - T(KI,KI) ]*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 + IV*N ), SCALE, + $ RWORK, INFO ) + WORK( KI + IV*N ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL CCOPY( KI, WORK( 1 + IV*N ), 1, VR( 1, IS ), 1 ) +* + II = ICAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL CSSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CZERO + 60 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.1 ) + $ CALL CGEMV( 'N', N, KI-1, CONE, VR, LDVR, + $ WORK( 1 + IV*N ), 1, CMPLX( SCALE ), + $ VR( 1, KI ), 1 ) +* + II = ICAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL CSSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + IV*N ) = CZERO + END DO +* +* Columns IV:NB of work are valid vectors. +* When the number of vectors stored reaches NB, +* or if this was last vector, do the GEMM + IF( (IV.EQ.1) .OR. (KI.EQ.1) ) THEN + CALL CGEMM( 'N', 'N', N, NB-IV+1, KI+NB-IV, CONE, + $ VR, LDVR, + $ WORK( 1 + (IV)*N ), N, + $ CZERO, + $ WORK( 1 + (NB+IV)*N ), N ) +* normalize vectors + DO K = IV, NB + II = ICAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / CABS1( WORK( II + (NB+K)*N ) ) + CALL CSSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL CLACPY( 'F', N, NB-IV+1, + $ WORK( 1 + (NB+IV)*N ), N, + $ VR( 1, KI ), LDVR ) + IV = NB + ELSE + IV = IV - 1 + END IF + END IF +* +* Restore the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* ============================================================ +* Compute left eigenvectors. +* +* IV is index of column in current block. +* Non-blocked version always uses IV=1; +* blocked version starts with IV=1, goes up to NB. +* (Note the "0-th" column is used to store the original diagonal.) + IV = 1 + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* +* -------------------------------------------------------- +* Complex left eigenvector +* + WORK( KI + IV*N ) = CONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K + IV*N ) = -CONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve conjugate-transposed triangular system: +* [ T(KI+1:N,KI+1:N) - T(KI,KI) ]**H * X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 + IV*N ), SCALE, RWORK, INFO ) + WORK( KI + IV*N ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL CCOPY( N-KI+1, WORK( KI + IV*N ), 1, VL(KI,IS), 1 ) +* + II = ICAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL CSSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CZERO + 110 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N ) + $ CALL CGEMV( 'N', N, N-KI, CONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 + IV*N ), 1, CMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = ICAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL CSSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + IV*N ) = CZERO + END DO +* +* Columns 1:IV of work are valid vectors. +* When the number of vectors stored reaches NB, +* or if this was last vector, do the GEMM + IF( (IV.EQ.NB) .OR. (KI.EQ.N) ) THEN + CALL CGEMM( 'N', 'N', N, IV, N-KI+IV, CONE, + $ VL( 1, KI-IV+1 ), LDVL, + $ WORK( KI-IV+1 + (1)*N ), N, + $ CZERO, + $ WORK( 1 + (NB+1)*N ), N ) +* normalize vectors + DO K = 1, IV + II = ICAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / CABS1( WORK( II + (NB+K)*N ) ) + CALL CSSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL CLACPY( 'F', N, IV, + $ WORK( 1 + (NB+1)*N ), N, + $ VL( 1, KI-IV+1 ), LDVL ) + IV = 1 + ELSE + IV = IV + 1 + END IF + END IF +* +* Restore the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of CTREVC3 +* + END diff --git a/dspl/liblapack/SRC/ctrexc.f b/dspl/liblapack/SRC/ctrexc.f new file mode 100644 index 0000000..cefc32c --- /dev/null +++ b/dspl/liblapack/SRC/ctrexc.f @@ -0,0 +1,240 @@ +*> \brief \b CTREXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTREXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ +* INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. +* COMPLEX Q( LDQ, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTREXC reorders the Schur factorization of a complex matrix +*> A = Q*T*Q**H, so that the diagonal element of T with row index IFST +*> is moved to row ILST. +*> +*> The Schur form T is reordered by a unitary similarity transformation +*> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by +*> postmultplying it with Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> If N == 0 arguments ILST and IFST may be any value. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> On entry, the upper triangular matrix T. +*> On exit, the reordered upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> unitary transformation matrix Z which reorders T. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1, and if +*> COMPQ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in] ILST +*> \verbatim +*> ILST is INTEGER +*> +*> Specify the reordering of the diagonal elements of T: +*> The element with row index IFST is moved to row ILST by a +*> sequence of transpositions between adjacent elements. +*> 1 <= IFST <= N; 1 <= ILST <= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + COMPLEX Q( LDQ, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER K, M1, M2, M3 + REAL CS + COMPLEX SN, T11, T22, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARTG, CROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF(( IFST.LT.1 .OR. IFST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -7 + ELSE IF(( ILST.LT.1 .OR. ILST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 .OR. IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Move the IFST-th diagonal element forward down the diagonal. +* + M1 = 0 + M2 = -1 + M3 = 1 + ELSE +* +* Move the IFST-th diagonal element backward up the diagonal. +* + M1 = -1 + M2 = 0 + M3 = -1 + END IF +* + DO 10 K = IFST + M1, ILST + M2, M3 +* +* Interchange the k-th and (k+1)-th diagonal elements. +* + T11 = T( K, K ) + T22 = T( K+1, K+1 ) +* +* Determine the transformation to perform the interchange. +* + CALL CLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( K+2.LE.N ) + $ CALL CROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS, + $ SN ) + CALL CROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, CONJG( SN ) ) +* + T( K, K ) = T22 + T( K+1, K+1 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL CROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS, + $ CONJG( SN ) ) + END IF +* + 10 CONTINUE +* + RETURN +* +* End of CTREXC +* + END diff --git a/dspl/liblapack/SRC/ctrrfs.f b/dspl/liblapack/SRC/ctrrfs.f new file mode 100644 index 0000000..840f9b5 --- /dev/null +++ b/dspl/liblapack/SRC/ctrrfs.f @@ -0,0 +1,479 @@ +*> \brief \b CTRRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, +* LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular +*> coefficient matrix. +*> +*> The solution matrix X must be computed by CTRTRS or some other +*> means before entering this routine. CTRRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL BERR( * ), FERR( * ), RWORK( * ) + COMPLEX A( LDA, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CCOPY, CLACN2, CTRMV, CTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL CCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL CTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 ) + CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use CLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL CLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL CTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL CTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of CTRRFS +* + END diff --git a/dspl/liblapack/SRC/ctrsen.f b/dspl/liblapack/SRC/ctrsen.f new file mode 100644 index 0000000..aaba3c5 --- /dev/null +++ b/dspl/liblapack/SRC/ctrsen.f @@ -0,0 +1,456 @@ +*> \brief \b CTRSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, +* SEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, JOB +* INTEGER INFO, LDQ, LDT, LWORK, M, N +* REAL S, SEP +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* COMPLEX Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRSEN reorders the Schur factorization of a complex matrix +*> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in +*> the leading positions on the diagonal of the upper triangular matrix +*> T, and the leading columns of Q form an orthonormal basis of the +*> corresponding right invariant subspace. +*> +*> Optionally the routine computes the reciprocal condition numbers of +*> the cluster of eigenvalues and/or the invariant subspace. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (S) or the invariant subspace (SEP): +*> = 'N': none; +*> = 'E': for eigenvalues only (S); +*> = 'V': for invariant subspace only (SEP); +*> = 'B': for both eigenvalues and invariant subspace (S and +*> SEP). +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> On entry, the upper triangular matrix T. +*> On exit, T is overwritten by the reordered matrix T, with the +*> selected eigenvalues as the leading diagonal elements. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> unitary transformation matrix which reorders T; the leading M +*> columns of Q form an orthonormal basis for the specified +*> invariant subspace. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX array, dimension (N) +*> The reordered eigenvalues of T, in the same order as they +*> appear on the diagonal of T. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified invariant subspace. +*> 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> If JOB = 'E' or 'B', S is a lower bound on the reciprocal +*> condition number for the selected cluster of eigenvalues. +*> S cannot underestimate the true reciprocal condition number +*> by more than a factor of sqrt(N). If M = 0 or N, S = 1. +*> If JOB = 'N' or 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is REAL +*> If JOB = 'V' or 'B', SEP is the estimated reciprocal +*> condition number of the specified invariant subspace. If +*> M = 0 or N, SEP = norm(T). +*> If JOB = 'N' or 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOB = 'N', LWORK >= 1; +*> if JOB = 'E', LWORK = max(1,M*(N-M)); +*> if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CTRSEN first collects the selected eigenvalues by computing a unitary +*> transformation Z to move them to the top left corner of T. In other +*> words, the selected eigenvalues are the eigenvalues of T11 in: +*> +*> Z**H * T * Z = ( T11 T12 ) n1 +*> ( 0 T22 ) n2 +*> n1 n2 +*> +*> where N = n1+n2. The first +*> n1 columns of Z span the specified invariant subspace of T. +*> +*> If T has been obtained from the Schur factorization of a matrix +*> A = Q*T*Q**H, then the reordered Schur factorization of A is given by +*> A = (Q*Z)*(Z**H*T*Z)*(Q*Z)**H, and the first n1 columns of Q*Z span the +*> corresponding invariant subspace of A. +*> +*> The reciprocal condition number of the average of the eigenvalues of +*> T11 may be returned in S. S lies between 0 (very badly conditioned) +*> and 1 (very well conditioned). It is computed as follows. First we +*> compute R so that +*> +*> P = ( I R ) n1 +*> ( 0 0 ) n2 +*> n1 n2 +*> +*> is the projector on the invariant subspace associated with T11. +*> R is the solution of the Sylvester equation: +*> +*> T11*R - R*T22 = T12. +*> +*> Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +*> the two-norm of M. Then S is computed as the lower bound +*> +*> (1 + F-norm(R)**2)**(-1/2) +*> +*> on the reciprocal of 2-norm(P), the true reciprocal condition number. +*> S cannot underestimate 1 / 2-norm(P) by more than a factor of +*> sqrt(N). +*> +*> An approximate error bound for the computed average of the +*> eigenvalues of T11 is +*> +*> EPS * norm(T) / S +*> +*> where EPS is the machine precision. +*> +*> The reciprocal condition number of the right invariant subspace +*> spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +*> SEP is defined as the separation of T11 and T22: +*> +*> sep( T11, T22 ) = sigma-min( C ) +*> +*> where sigma-min(C) is the smallest singular value of the +*> n1*n2-by-n1*n2 matrix +*> +*> C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +*> +*> I(m) is an m by m identity matrix, and kprod denotes the Kronecker +*> product. We estimate sigma-min(C) by the reciprocal of an estimate of +*> the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +*> cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +*> +*> When SEP is small, small changes in T can cause large changes in +*> the invariant subspace. An approximate bound on the maximum angular +*> error in the computed right invariant subspace is +*> +*> EPS * norm(T) / SEP +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, + $ SEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + COMPLEX Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTBH, WANTQ, WANTS, WANTSP + INTEGER IERR, K, KASE, KS, LWMIN, N1, N2, NN + REAL EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE + EXTERNAL LSAME, CLANGE +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLACPY, CTREXC, CTRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* +* Set M to the number of selected eigenvalues. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + END IF +* + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = CLANGE( '1', N, N, T, LDT, RWORK ) + GO TO 40 + END IF +* +* Collect the selected eigenvalues at the top left corner of T. +* + KS = 0 + DO 20 K = 1, N + IF( SELECT( K ) ) THEN + KS = KS + 1 +* +* Swap the K-th eigenvalue to position KS. +* + IF( K.NE.KS ) + $ CALL CTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR ) + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve the Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL CLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = CLANGE( 'F', N1, N2, WORK, N1, RWORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL CLACN2( NN, WORK( NN+1 ), WORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11**H*R - R*T22**H = scale*X. +* + CALL CTRSYL( 'C', 'C', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Copy reordered eigenvalues to W. +* + DO 50 K = 1, N + W( K ) = T( K, K ) + 50 CONTINUE +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of CTRSEN +* + END diff --git a/dspl/liblapack/SRC/ctrsna.f b/dspl/liblapack/SRC/ctrsna.f new file mode 100644 index 0000000..e08185b --- /dev/null +++ b/dspl/liblapack/SRC/ctrsna.f @@ -0,0 +1,464 @@ +*> \brief \b CTRSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL RWORK( * ), S( * ), SEP( * ) +* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or right eigenvectors of a complex upper triangular +*> matrix T (or of any matrix Q*T*Q**H with Q unitary). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (SEP): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (SEP); +*> = 'B': for both eigenvalues and eigenvectors (S and SEP). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the j-th eigenpair, SELECT(j) must be set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX array, dimension (LDT,N) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is COMPLEX array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of T +*> (or of any Q*T*Q**H with Q unitary), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VL, as returned by +*> CHSEIN or CTREVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is COMPLEX array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of T +*> (or of any Q*T*Q**H with Q unitary), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VR, as returned by +*> CHSEIN or CTREVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. Thus S(j), SEP(j), and the j-th columns of VL and VR +*> all correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is REAL array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. +*> If JOB = 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S (if JOB = 'E' or 'B') +*> and/or SEP (if JOB = 'V' or 'B'). MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and/or SEP actually +*> used to store the estimated condition numbers. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LDWORK,N+6) +*> If JOB = 'E', WORK is not referenced. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (N) +*> If JOB = 'E', RWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of an eigenvalue lambda is +*> defined as +*> +*> S(lambda) = |v**H*u| / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of T corresponding +*> to lambda; v**H denotes the conjugate transpose of v, and norm(u) +*> denotes the Euclidean norm. These reciprocal condition numbers always +*> lie between zero (very badly conditioned) and one (very well +*> conditioned). If n = 1, S(lambda) is defined to be 1. +*> +*> An approximate error bound for a computed eigenvalue W(i) is given by +*> +*> EPS * norm(T) / S(i) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> corresponding to lambda is defined as follows. Suppose +*> +*> T = ( lambda c ) +*> ( 0 T22 ) +*> +*> Then the reciprocal condition number is +*> +*> SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +*> +*> where sigma-min denotes the smallest singular value. We approximate +*> the smallest singular value by the reciprocal of an estimate of the +*> one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +*> defined to be abs(T(1,1)). +*> +*> An approximate error bound for a computed right eigenvector VR(i) +*> is given by +*> +*> EPS * norm(T) / SEP(i) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL RWORK( * ), S( * ), SEP( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0+0 ) +* .. +* .. Local Scalars .. + LOGICAL SOMCON, WANTBH, WANTS, WANTSP + CHARACTER NORMIN + INTEGER I, IERR, IX, J, K, KASE, KS + REAL BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM, + $ XNORM + COMPLEX CDUM, PROD +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + COMPLEX DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ICAMAX + REAL SCNRM2, SLAMCH + COMPLEX CDOTC + EXTERNAL LSAME, ICAMAX, SCNRM2, SLAMCH, CDOTC +* .. +* .. External Subroutines .. + EXTERNAL CLACN2, CLACPY, CLATRS, CSRSCL, CTREXC, SLABAD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, MAX, REAL +* .. +* .. Statement Functions .. + REAL CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of eigenpairs for which condition numbers are +* to be computed. +* + IF( SOMCON ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* + KS = 1 + DO 50 K = 1, N +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 50 + END IF +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + PROD = CDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = SCNRM2( N, VR( 1, KS ), 1 ) + LNRM = SCNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) +* + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the k-th +* diagonal element to the (1,1) position. +* + CALL CLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + CALL CTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR ) +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE +* +* Estimate a lower bound for the 1-norm of inv(C**H). The 1st +* and (N+1)th columns of WORK are used to store work vectors. +* + SEP( KS ) = ZERO + EST = ZERO + KASE = 0 + NORMIN = 'N' + 30 CONTINUE + CALL CLACN2( N-1, WORK( 1, N+1 ), WORK, EST, KASE, ISAVE ) +* + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve C**H*x = scale*b +* + CALL CLATRS( 'Upper', 'Conjugate transpose', + $ 'Nonunit', NORMIN, N-1, WORK( 2, 2 ), + $ LDWORK, WORK, SCALE, RWORK, IERR ) + ELSE +* +* Solve C*x = scale*b +* + CALL CLATRS( 'Upper', 'No transpose', 'Nonunit', + $ NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK, + $ SCALE, RWORK, IERR ) + END IF + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN +* +* Multiply by 1/SCALE if doing so will not cause +* overflow. +* + IX = ICAMAX( N-1, WORK, 1 ) + XNORM = CABS1( WORK( IX, 1 ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL CSRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 30 + END IF +* + SEP( KS ) = ONE / MAX( EST, SMLNUM ) + END IF +* + 40 CONTINUE + KS = KS + 1 + 50 CONTINUE + RETURN +* +* End of CTRSNA +* + END diff --git a/dspl/liblapack/SRC/ctrsyl.f b/dspl/liblapack/SRC/ctrsyl.f new file mode 100644 index 0000000..2561804 --- /dev/null +++ b/dspl/liblapack/SRC/ctrsyl.f @@ -0,0 +1,454 @@ +*> \brief \b CTRSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, +* LDC, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N +* REAL SCALE +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRSYL solves the complex Sylvester matrix equation: +*> +*> op(A)*X + X*op(B) = scale*C or +*> op(A)*X - X*op(B) = scale*C, +*> +*> where op(A) = A or A**H, and A and B are both upper triangular. A is +*> M-by-M and B is N-by-N; the right hand side C and the solution X are +*> M-by-N; and scale is an output scale factor, set <= 1 to avoid +*> overflow in X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANA +*> \verbatim +*> TRANA is CHARACTER*1 +*> Specifies the option op(A): +*> = 'N': op(A) = A (No transpose) +*> = 'C': op(A) = A**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] TRANB +*> \verbatim +*> TRANB is CHARACTER*1 +*> Specifies the option op(B): +*> = 'N': op(B) = B (No transpose) +*> = 'C': op(B) = B**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> Specifies the sign in the equation: +*> = +1: solve op(A)*X + X*op(B) = scale*C +*> = -1: solve op(A)*X - X*op(B) = scale*C +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrix A, and the number of rows in the +*> matrices X and C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B, and the number of columns in the +*> matrices X and C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,M) +*> The upper triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,N) +*> The upper triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N right hand side matrix C. +*> On exit, C is overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M) +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scale factor, scale, set <= 1 to avoid overflow in X. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: A and B have common or very close eigenvalues; perturbed +*> values were used to solve the equation (but the matrices +*> A and B are unchanged). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexSYcomputational +* +* ===================================================================== + SUBROUTINE CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER J, K, L + REAL BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM + COMPLEX A11, SUML, SUMR, VEC, X11 +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL CLANGE, SLAMCH + COMPLEX CDOTC, CDOTU, CLADIV + EXTERNAL LSAME, CLANGE, SLAMCH, CDOTC, CDOTU, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CSSCAL, SLABAD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + SCALE = ONE + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM + SMIN = MAX( SMLNUM, EPS*CLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*CLANGE( 'M', N, N, B, LDB, DUM ) ) + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* + DO 30 L = 1, N + DO 20 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A**H *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A**H(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A**H(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* + DO 60 L = 1, N + DO 50 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = CONJG( A( K, K ) ) + SGN*B( L, L ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 50 CONTINUE + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A**H*X + ISGN*X*B**H = C. +* +* The (K,L)th block of X is determined starting from +* upper-right corner column by column by +* +* A**H(K,K)*X(K,L) + ISGN*X(K,L)*B**H(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 +* R(K,L) = SUM [A**H(I,K)*X(I,L)] + +* I=1 +* N +* ISGN*SUM [X(K,J)*B**H(L,J)]. +* J=L+1 +* + DO 90 L = N, 1, -1 + DO 80 K = 1, M +* + SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = CONJG( A( K, K )+SGN*B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B**H = C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B**H(L,L) = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B**H(L,J)] +* I=K+1 J=L+1 +* + DO 120 L = N, 1, -1 + DO 110 K = M, 1, -1 +* + SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*CONJG( B( L, L ) ) + DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL CSSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 110 CONTINUE + 120 CONTINUE +* + END IF +* + RETURN +* +* End of CTRSYL +* + END diff --git a/dspl/liblapack/SRC/ctrti2.f b/dspl/liblapack/SRC/ctrti2.f new file mode 100644 index 0000000..55d8d1f --- /dev/null +++ b/dspl/liblapack/SRC/ctrti2.f @@ -0,0 +1,212 @@ +*> \brief \b CTRTI2 computes the inverse of a triangular matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRTI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRTI2 computes the inverse of a complex upper or lower triangular +*> matrix. +*> +*> This is the Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading n by n upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + COMPLEX AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL CTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL CSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL CTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL CSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of CTRTI2 +* + END diff --git a/dspl/liblapack/SRC/ctrtri.f b/dspl/liblapack/SRC/ctrtri.f new file mode 100644 index 0000000..4c284d1 --- /dev/null +++ b/dspl/liblapack/SRC/ctrtri.f @@ -0,0 +1,243 @@ +*> \brief \b CTRTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRTRI computes the inverse of a complex upper or lower triangular +*> matrix A. +*> +*> This is the Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CTRMM, CTRSM, CTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'CTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL CTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL CTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL CTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL CTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL CTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL CTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of CTRTRI +* + END diff --git a/dspl/liblapack/SRC/ctrtrs.f b/dspl/liblapack/SRC/ctrtrs.f new file mode 100644 index 0000000..4019ced --- /dev/null +++ b/dspl/liblapack/SRC/ctrtrs.f @@ -0,0 +1,227 @@ +*> \brief \b CTRTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular matrix of order N, and B is an N-by-NRHS +*> matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the solutions +*> X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + CALL CTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of CTRTRS +* + END diff --git a/dspl/liblapack/SRC/ctrttf.f b/dspl/liblapack/SRC/ctrttf.f new file mode 100644 index 0000000..c762b0c --- /dev/null +++ b/dspl/liblapack/SRC/ctrttf.f @@ -0,0 +1,537 @@ +*> \brief \b CTRTTF copies a triangular matrix from the standard full format (TR) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRTTF copies a triangular matrix A from standard full format (TR) +*> to rectangular full packed format (TF) . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal mode is wanted; +*> = 'C': ARF in Conjugate Transpose mode is wanted; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension ( LDA, N ) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER I, IJ, J, K, L, N1, N2, NT, NX2, NP1X2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = A( 0, 0 ) + ELSE + ARF( 0 ) = CONJG( A( 0, 0 ) ) + END IF + END IF + RETURN + END IF +* +* Size of array ARF(1:2,0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda=n +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + ARF( IJ ) = CONJG( A( N2+J, I ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0); lda=n +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + ARF( IJ ) = CONJG( A( J-N1, L ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> A(0+0) , T2 -> A(1+0) , S -> A(0+n1*n1); lda=n1 +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + ARF( IJ ) = A( I, N1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> A(n2*n2), T2 -> A(n1*n2), S -> A(0); lda=n2 +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + ARF( IJ ) = CONJG( A( N2+J, L ) ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1); lda=n+1 +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + ARF( IJ ) = CONJG( A( K+J, I ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0); lda=n+1 +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + ARF( IJ ) = CONJG( A( J-K, L ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,1) , T2 -> A(0,0) , S -> A(0,k+1) : +* T1 -> A(0+k) , T2 -> A(0+0) , S -> A(0+k*(k+1)); lda=k +* + IJ = 0 + J = K + DO I = K, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + ARF( IJ ) = A( I, K+1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,k+1) , T2 -> A(0,k) , S -> A(0,0) +* T1 -> A(0+k*(k+1)) , T2 -> A(0+k*k) , S -> A(0+0)); lda=k +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + ARF( IJ ) = CONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + ARF( IJ ) = CONJG( A( K+1+J, L ) ) + IJ = IJ + 1 + END DO + END DO +* +* Note that here J = K-1 +* + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of CTRTTF +* + END diff --git a/dspl/liblapack/SRC/ctrttp.f b/dspl/liblapack/SRC/ctrttp.f new file mode 100644 index 0000000..0fa3e1a --- /dev/null +++ b/dspl/liblapack/SRC/ctrttp.f @@ -0,0 +1,176 @@ +*> \brief \b CTRTTP copies a triangular matrix from the standard full format (TR) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTRTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTRTTP copies a triangular matrix A from full format (TR) to standard +*> packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices AP and A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTRTTP', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + END IF +* +* + RETURN +* +* End of CTRTTP +* + END diff --git a/dspl/liblapack/SRC/ctzrzf.f b/dspl/liblapack/SRC/ctzrzf.f new file mode 100644 index 0000000..f3f5fdf --- /dev/null +++ b/dspl/liblapack/SRC/ctzrzf.f @@ -0,0 +1,313 @@ +*> \brief \b CTZRZF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CTZRZF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +*> to upper triangular form by means of unitary transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N unitary matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The N-by-N matrix Z can be computed by +*> +*> Z = Z(1)*Z(2)* ... *Z(M) +*> +*> where each N-by-N Z(k) is given by +*> +*> Z(k) = I - tau(k)*v(k)*v(k)**H +*> +*> with v(k) is the kth row vector of the M-by-N matrix +*> +*> V = ( I A(:,M+1:N) ) +*> +*> I is the M-by-M identity matrix, A(:,M+1:N) +*> is the output stored in A on exit from DTZRZF, +*> and tau(k) is the kth element of the array TAU. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKMIN, LWKOPT, + $ M1, MU, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CLARZB, CLARZT, CLATRZ +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + LWKMIN = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + LWKMIN = MAX( 1, M ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL CLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL CLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL CLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CTZRZF +* + END diff --git a/dspl/liblapack/SRC/cunbdb.f b/dspl/liblapack/SRC/cunbdb.f new file mode 100644 index 0000000..9006242 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb.f @@ -0,0 +1,687 @@ +*> \brief \b CUNBDB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, +* X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, +* TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIGNS, TRANS +* INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, +* $ Q +* .. +* .. Array Arguments .. +* REAL PHI( * ), THETA( * ) +* COMPLEX TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), +* $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), +* $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNBDB simultaneously bidiagonalizes the blocks of an M-by-M +*> partitioned unitary matrix X: +*> +*> [ B11 | B12 0 0 ] +*> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H +*> X = [-----------] = [---------] [----------------] [---------] . +*> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] +*> [ 0 | 0 0 I ] +*> +*> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is +*> not the case, then X must be transposed and/or permuted. This can be +*> done in constant time using the TRANS and SIGNS options. See CUNCSD +*> for details.) +*> +*> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- +*> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are +*> represented implicitly by Householder vectors. +*> +*> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, the top-left block of the unitary matrix to be +*> reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X11) specify reflectors for P1, +*> the rows of triu(X11,1) specify reflectors for Q1; +*> else TRANS = 'T', and +*> the rows of triu(X11) specify reflectors for P1, +*> the columns of tril(X11,-1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. If TRANS = 'N', then LDX11 >= +*> P; else LDX11 >= Q. +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is COMPLEX array, dimension (LDX12,M-Q) +*> On entry, the top-right block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X12) specify the first P reflectors for +*> Q2; +*> else TRANS = 'T', and +*> the columns of tril(X12) specify the first P reflectors +*> for Q2. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. If TRANS = 'N', then LDX12 >= +*> P; else LDX11 >= M-Q. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, the bottom-left block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X21) specify reflectors for P2; +*> else TRANS = 'T', and +*> the rows of triu(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. If TRANS = 'N', then LDX21 >= +*> M-P; else LDX21 >= Q. +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is COMPLEX array, dimension (LDX22,M-Q) +*> On entry, the bottom-right block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X22(Q+1:M-P,P+1:M-Q)) specify the last +*> M-P-Q reflectors for Q2, +*> else TRANS = 'T', and +*> the columns of tril(X22(P+1:M-Q,Q+1:M-P)) specify the last +*> M-P-Q reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X22. If TRANS = 'N', then LDX22 >= +*> M-P; else LDX22 >= M-Q. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] TAUQ2 +*> \verbatim +*> TAUQ2 is COMPLEX array, dimension (M-Q) +*> The scalar factors of the elementary reflectors that define +*> Q2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The bidiagonal blocks B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1), ..., THETA(Q) and PHI(1), ..., +*> PHI(Q-1). B11 and B21 are upper bidiagonal, while B21 and B22 are +*> lower bidiagonal. Every entry in each bidiagonal band is a product +*> of a sine or cosine of a THETA with a sine or cosine of a PHI. See +*> [1] or CUNCSD for details. +*> +*> P1, P2, Q1, and Q2 are represented as products of elementary +*> reflectors. See CUNCSD for details on generating P1, P2, Q1, and Q2 +*> using CUNGQR and CUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, + $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIGNS, TRANS + INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, + $ Q +* .. +* .. Array Arguments .. + REAL PHI( * ), THETA( * ) + COMPLEX TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), + $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), + $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL REALONE + PARAMETER ( REALONE = 1.0E0 ) + COMPLEX ONE + PARAMETER ( ONE = (1.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY + INTEGER I, LWORKMIN, LWORKOPT + REAL Z1, Z2, Z3, Z4 +* .. +* .. External Subroutines .. + EXTERNAL CAXPY, CLARF, CLARFGP, CSCAL, XERBLA + EXTERNAL CLACGV +* +* .. +* .. External Functions .. + REAL SCNRM2 + LOGICAL LSAME + EXTERNAL SCNRM2, LSAME +* .. +* .. Intrinsic Functions + INTRINSIC ATAN2, COS, MAX, MIN, SIN + INTRINSIC CMPLX, CONJG +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + IF( .NOT. LSAME( SIGNS, 'O' ) ) THEN + Z1 = REALONE + Z2 = REALONE + Z3 = REALONE + Z4 = REALONE + ELSE + Z1 = REALONE + Z2 = -REALONE + Z3 = REALONE + Z4 = -REALONE + END IF + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -3 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -4 + ELSE IF( Q .LT. 0 .OR. Q .GT. P .OR. Q .GT. M-P .OR. + $ Q .GT. M-Q ) THEN + INFO = -5 + ELSE IF( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( .NOT.COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -7 + ELSE IF( COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -9 + ELSE IF( .NOT.COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -9 + ELSE IF( COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -11 + ELSE IF( .NOT.COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF( COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -13 + ELSE IF( .NOT.COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + LWORKOPT = M - Q + LWORKMIN = M - Q + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -21 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'xORBDB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Handle column-major and row-major separately +* + IF( COLMAJOR ) THEN +* +* Reduce columns 1, ..., Q of X11, X12, X21, and X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL CSCAL( P-I+1, CMPLX( Z1, 0.0E0 ), X11(I,I), 1 ) + ELSE + CALL CSCAL( P-I+1, CMPLX( Z1*COS(PHI(I-1)), 0.0E0 ), + $ X11(I,I), 1 ) + CALL CAXPY( P-I+1, CMPLX( -Z1*Z3*Z4*SIN(PHI(I-1)), + $ 0.0E0 ), X12(I,I-1), 1, X11(I,I), 1 ) + END IF + IF( I .EQ. 1 ) THEN + CALL CSCAL( M-P-I+1, CMPLX( Z2, 0.0E0 ), X21(I,I), 1 ) + ELSE + CALL CSCAL( M-P-I+1, CMPLX( Z2*COS(PHI(I-1)), 0.0E0 ), + $ X21(I,I), 1 ) + CALL CAXPY( M-P-I+1, CMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), + $ 0.0E0 ), X22(I,I-1), 1, X21(I,I), 1 ) + END IF +* + THETA(I) = ATAN2( SCNRM2( M-P-I+1, X21(I,I), 1 ), + $ SCNRM2( P-I+1, X11(I,I), 1 ) ) +* + IF( P .GT. I ) THEN + CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + ELSE IF ( P .EQ. I ) THEN + CALL CLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) + END IF + X11(I,I) = ONE + IF ( M-P .GT. I ) THEN + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, + $ TAUP2(I) ) + ELSE IF ( M-P .EQ. I ) THEN + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, + $ CONJG(TAUP1(I)), X11(I,I+1), LDX11, WORK ) + CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ CONJG(TAUP2(I)), X21(I,I+1), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL CLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ CONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) + CALL CLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ CONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL CSCAL( Q-I, CMPLX( -Z1*Z3*SIN(THETA(I)), 0.0E0 ), + $ X11(I,I+1), LDX11 ) + CALL CAXPY( Q-I, CMPLX( Z2*Z3*COS(THETA(I)), 0.0E0 ), + $ X21(I,I+1), LDX21, X11(I,I+1), LDX11 ) + END IF + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), 0.0E0 ), + $ X12(I,I), LDX12 ) + CALL CAXPY( M-Q-I+1, CMPLX( Z2*Z4*COS(THETA(I)), 0.0E0 ), + $ X22(I,I), LDX22, X12(I,I), LDX12 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( SCNRM2( Q-I, X11(I,I+1), LDX11 ), + $ SCNRM2( M-Q-I+1, X12(I,I), LDX12 ) ) +* + IF( I .LT. Q ) THEN + CALL CLACGV( Q-I, X11(I,I+1), LDX11 ) + IF ( I .EQ. Q-1 ) THEN + CALL CLARFGP( Q-I, X11(I,I+1), X11(I,I+1), LDX11, + $ TAUQ1(I) ) + ELSE + CALL CLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, + $ TAUQ1(I) ) + END IF + X11(I,I+1) = ONE + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) + IF ( M-Q .EQ. I ) THEN + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL CLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL CLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + IF ( P .GT. I ) THEN + CALL CLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF ( M-P .GT. I ) THEN + CALL CLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) + $ CALL CLACGV( Q-I, X11(I,I+1), LDX11 ) + CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4, 0.0E0 ), X12(I,I), + $ LDX12 ) + CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) + IF ( I .GE. M-Q ) THEN + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL CLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL CLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) +* + CALL CLACGV( M-Q-I+1, X12(I,I), LDX12 ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL CSCAL( M-P-Q-I+1, CMPLX( Z2*Z4, 0.0E0 ), + $ X22(Q+I,P+I), LDX22 ) + CALL CLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) + CALL CLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), + $ LDX22, TAUQ2(P+I) ) + X22(Q+I,P+I) = ONE + CALL CLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, + $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) +* + CALL CLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) +* + END DO +* + ELSE +* +* Reduce columns 1, ..., Q of X11, X12, X21, X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL CSCAL( P-I+1, CMPLX( Z1, 0.0E0 ), X11(I,I), + $ LDX11 ) + ELSE + CALL CSCAL( P-I+1, CMPLX( Z1*COS(PHI(I-1)), 0.0E0 ), + $ X11(I,I), LDX11 ) + CALL CAXPY( P-I+1, CMPLX( -Z1*Z3*Z4*SIN(PHI(I-1)), + $ 0.0E0 ), X12(I-1,I), LDX12, X11(I,I), LDX11 ) + END IF + IF( I .EQ. 1 ) THEN + CALL CSCAL( M-P-I+1, CMPLX( Z2, 0.0E0 ), X21(I,I), + $ LDX21 ) + ELSE + CALL CSCAL( M-P-I+1, CMPLX( Z2*COS(PHI(I-1)), 0.0E0 ), + $ X21(I,I), LDX21 ) + CALL CAXPY( M-P-I+1, CMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), + $ 0.0E0 ), X22(I-1,I), LDX22, X21(I,I), LDX21 ) + END IF +* + THETA(I) = ATAN2( SCNRM2( M-P-I+1, X21(I,I), LDX21 ), + $ SCNRM2( P-I+1, X11(I,I), LDX11 ) ) +* + CALL CLACGV( P-I+1, X11(I,I), LDX11 ) + CALL CLACGV( M-P-I+1, X21(I,I), LDX21 ) +* + CALL CLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) + X11(I,I) = ONE + IF ( I .EQ. M-P ) THEN + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, + $ TAUP2(I) ) + ELSE + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + CALL CLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + CALL CLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X12(I,I), LDX12, WORK ) + CALL CLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), + $ X21(I+1,I), LDX21, WORK ) + CALL CLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) +* + CALL CLACGV( P-I+1, X11(I,I), LDX11 ) + CALL CLACGV( M-P-I+1, X21(I,I), LDX21 ) +* + IF( I .LT. Q ) THEN + CALL CSCAL( Q-I, CMPLX( -Z1*Z3*SIN(THETA(I)), 0.0E0 ), + $ X11(I+1,I), 1 ) + CALL CAXPY( Q-I, CMPLX( Z2*Z3*COS(THETA(I)), 0.0E0 ), + $ X21(I+1,I), 1, X11(I+1,I), 1 ) + END IF + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4*SIN(THETA(I)), 0.0E0 ), + $ X12(I,I), 1 ) + CALL CAXPY( M-Q-I+1, CMPLX( Z2*Z4*COS(THETA(I)), 0.0E0 ), + $ X22(I,I), 1, X12(I,I), 1 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( SCNRM2( Q-I, X11(I+1,I), 1 ), + $ SCNRM2( M-Q-I+1, X12(I,I), 1 ) ) +* + IF( I .LT. Q ) THEN + CALL CLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, TAUQ1(I) ) + X11(I+1,I) = ONE + END IF + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL CLARF( 'L', Q-I, P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, WORK ) + CALL CLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ CONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, WORK ) + END IF + CALL CLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, CONJG(TAUQ2(I)), + $ X12(I,I+1), LDX12, WORK ) + + IF ( M-P .GT. I ) THEN + CALL CLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,I+1), LDX22, WORK ) + END IF + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL CSCAL( M-Q-I+1, CMPLX( -Z1*Z4, 0.0E0 ), X12(I,I), 1 ) + CALL CLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL CLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL CLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ CONJG(TAUQ2(I)), X22(I,Q+1), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL CSCAL( M-P-Q-I+1, CMPLX( Z2*Z4, 0.0E0 ), + $ X22(P+I,Q+I), 1 ) + CALL CLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + $ TAUQ2(P+I) ) + X22(P+I,Q+I) = ONE + IF ( M-P-Q .NE. I ) THEN + CALL CLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, + $ CONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), LDX22, + $ WORK ) + END IF + END DO +* + END IF +* + RETURN +* +* End of CUNBDB +* + END + diff --git a/dspl/liblapack/SRC/cunbdb1.f b/dspl/liblapack/SRC/cunbdb1.f new file mode 100644 index 0000000..bfd2147 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb1.f @@ -0,0 +1,326 @@ +*> \brief \b CUNBDB1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, +*> M-P, or M-Q. Routines CUNBDB2, CUNBDB3, and CUNBDB4 handle cases in +*> which Q is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== + +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or CUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See CUNCSD2BY1 for details on generating P1, P2, and Q1 using CUNGQR +*> and CUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, XERBLA + EXTERNAL CLACGV +* .. +* .. External Functions .. + REAL SCNRM2 + EXTERNAL SCNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. Q .OR. M-P .LT. Q ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. M-Q .LT. Q ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-2 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., Q of X11 and X21 +* + DO I = 1, Q +* + CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + THETA(I) = ATAN2( REAL( X21(I,I) ), REAL( X11(I,I) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I) = ONE + X21(I,I) = ONE + CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + IF( I .LT. Q ) THEN + CALL CSROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, + $ S ) + CALL CLACGV( Q-I, X21(I,I+1), LDX21 ) + CALL CLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + S = REAL( X21(I,I+1) ) + X21(I,I+1) = ONE + CALL CLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL CLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I, X21(I,I+1), LDX21 ) + C = SQRT( SCNRM2( P-I, X11(I+1,I+1), 1 )**2 + $ + SCNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + CALL CUNBDB5( P-I, M-P-I, Q-I-1, X11(I+1,I+1), 1, + $ X21(I+1,I+1), 1, X11(I+1,I+2), LDX11, + $ X21(I+1,I+2), LDX21, WORK(IORBDB5), LORBDB5, + $ CHILDINFO ) + END IF +* + END DO +* + RETURN +* +* End of CUNBDB1 +* + END + diff --git a/dspl/liblapack/SRC/cunbdb2.f b/dspl/liblapack/SRC/cunbdb2.f new file mode 100644 index 0000000..ae73699 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb2.f @@ -0,0 +1,338 @@ +*> \brief \b CUNBDB2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, +*> Q, or M-Q. Routines CUNBDB1, CUNBDB3, and CUNBDB4 handle cases in +*> which P is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= min(M-P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or CUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See CUNCSD2BY1 for details on generating P1, P2, and Q1 using CUNGQR +*> and CUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX NEGONE, ONE + PARAMETER ( NEGONE = (-1.0E0,0.0E0), + $ ONE = (1.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + $ XERBLA +* .. +* .. External Functions .. + REAL SCNRM2 + EXTERNAL SCNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. 0 .OR. P .GT. M-P ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. Q .LT. P .OR. M-Q .LT. P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., P of X11 and X21 +* + DO I = 1, P +* + IF( I .GT. 1 ) THEN + CALL CSROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, + $ S ) + END IF + CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) + CALL CLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + C = REAL( X11(I,I) ) + X11(I,I) = ONE + CALL CLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) + S = SQRT( SCNRM2( P-I, X11(I+1,I), 1 )**2 + $ + SCNRM2( M-P-I+1, X21(I,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL CUNBDB5( P-I, M-P-I+1, Q-I, X11(I+1,I), 1, X21(I,I), 1, + $ X11(I+1,I+1), LDX11, X21(I,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL CSCAL( P-I, NEGONE, X11(I+1,I), 1 ) + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + IF( I .LT. P ) THEN + CALL CLARFGP( P-I, X11(I+1,I), X11(I+2,I), 1, TAUP1(I) ) + PHI(I) = ATAN2( REAL( X11(I+1,I) ), REAL( X21(I,I) ) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X11(I+1,I) = ONE + CALL CLARF( 'L', P-I, Q-I, X11(I+1,I), 1, CONJG(TAUP1(I)), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + END IF + X21(I,I) = ONE + CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X21 to the identity matrix +* + DO I = P + 1, Q + CALL CLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + X21(I,I) = ONE + CALL CLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, CONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of CUNBDB2 +* + END + diff --git a/dspl/liblapack/SRC/cunbdb3.f b/dspl/liblapack/SRC/cunbdb3.f new file mode 100644 index 0000000..c0abde1 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb3.f @@ -0,0 +1,336 @@ +*> \brief \b CUNBDB3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, +*> Q, or M-Q. Routines CUNBDB1, CUNBDB2, and CUNBDB4 handle cases in +*> which M-P is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. M-P <= min(P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== + +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or CUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See CUNCSD2BY1 for details on generating P1, P2, and Q1 using CUNGQR +*> and CUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + COMPLEX TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = (1.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CLACGV, XERBLA +* .. +* .. External Functions .. + REAL SCNRM2 + EXTERNAL SCNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( 2*P .LT. M .OR. P .GT. M ) THEN + INFO = -2 + ELSE IF( Q .LT. M-P .OR. M-Q .LT. M-P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., M-P of X11 and X21 +* + DO I = 1, M-P +* + IF( I .GT. 1 ) THEN + CALL CSROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, + $ S ) + END IF +* + CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) + CALL CLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + S = REAL( X21(I,I) ) + X21(I,I) = ONE + CALL CLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL CLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) + C = SQRT( SCNRM2( P-I+1, X11(I,I), 1 )**2 + $ + SCNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL CUNBDB5( P-I+1, M-P-I, Q-I, X11(I,I), 1, X21(I+1,I), 1, + $ X11(I,I+1), LDX11, X21(I+1,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + IF( I .LT. M-P ) THEN + CALL CLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + PHI(I) = ATAN2( REAL( X21(I+1,I) ), REAL( X11(I,I) ) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X21(I+1,I) = ONE + CALL CLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, CONJG(TAUP2(I)), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + END IF + X11(I,I) = ONE + CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X11 to the identity matrix +* + DO I = M-P + 1, Q + CALL CLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + X11(I,I) = ONE + CALL CLARF( 'L', P-I+1, Q-I, X11(I,I), 1, CONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + END DO +* + RETURN +* +* End of CUNBDB3 +* + END + diff --git a/dspl/liblapack/SRC/cunbdb4.f b/dspl/liblapack/SRC/cunbdb4.f new file mode 100644 index 0000000..803b851 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb4.f @@ -0,0 +1,385 @@ +*> \brief \b CUNBDB4 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* COMPLEX PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), +* $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, +*> M-P, or Q. Routines CUNBDB1, CUNBDB2, and CUNBDB3 handle cases in +*> which M-Q is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M and +*> M-Q <= min(P,M-P,Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] PHANTOM +*> \verbatim +*> PHANTOM is COMPLEX array, dimension (M) +*> The routine computes an M-by-1 column vector Y that is +*> orthogonal to the columns of [ X11; X21 ]. PHANTOM(1:P) and +*> PHANTOM(P+1:M) contain Householder vectors for Y(1:P) and +*> Y(P+1:M), respectively. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +*> \par Further Details: +* ===================== + +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or CUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See CUNCSD2BY1 for details on generating P1, P2, and Q1 using CUNGQR +*> and CUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + COMPLEX PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), + $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX NEGONE, ONE, ZERO + PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), + $ ZERO = (0.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, J, LLARF, + $ LORBDB5, LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CLARFGP, CUNBDB5, CSROT, CSCAL, CLACGV, + $ XERBLA +* .. +* .. External Functions .. + REAL SCNRM2 + EXTERNAL SCNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. M-Q .OR. M-P .LT. M-Q ) THEN + INFO = -2 + ELSE IF( Q .LT. M-Q .OR. Q .GT. M ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( Q-1, P-1, M-P-1 ) + IORBDB5 = 2 + LORBDB5 = Q + LWORKOPT = ILARF + LLARF - 1 + LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB4', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., M-Q of X11 and X21 +* + DO I = 1, M-Q +* + IF( I .EQ. 1 ) THEN + DO J = 1, M + PHANTOM(J) = ZERO + END DO + CALL CUNBDB5( P, M-P, Q, PHANTOM(1), 1, PHANTOM(P+1), 1, + $ X11, LDX11, X21, LDX21, WORK(IORBDB5), + $ LORBDB5, CHILDINFO ) + CALL CSCAL( P, NEGONE, PHANTOM(1), 1 ) + CALL CLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) + CALL CLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + THETA(I) = ATAN2( REAL( PHANTOM(1) ), REAL( PHANTOM(P+1) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + PHANTOM(1) = ONE + PHANTOM(P+1) = ONE + CALL CLARF( 'L', P, Q, PHANTOM(1), 1, CONJG(TAUP1(1)), X11, + $ LDX11, WORK(ILARF) ) + CALL CLARF( 'L', M-P, Q, PHANTOM(P+1), 1, CONJG(TAUP2(1)), + $ X21, LDX21, WORK(ILARF) ) + ELSE + CALL CUNBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, + $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), + $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL CSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) + CALL CLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL CLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, + $ TAUP2(I) ) + THETA(I) = ATAN2( REAL( X11(I,I-1) ), REAL( X21(I,I-1) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I-1) = ONE + X21(I,I-1) = ONE + CALL CLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, + $ CONJG(TAUP1(I)), X11(I,I), LDX11, WORK(ILARF) ) + CALL CLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ CONJG(TAUP2(I)), X21(I,I), LDX21, WORK(ILARF) ) + END IF +* + CALL CSROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) + CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) + CALL CLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + C = REAL( X21(I,I) ) + X21(I,I) = ONE + CALL CLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I+1, X21(I,I), LDX21 ) + IF( I .LT. M-Q ) THEN + S = SQRT( SCNRM2( P-I, X11(I+1,I), 1 )**2 + $ + SCNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + END IF +* + END DO +* +* Reduce the bottom-right portion of X11 to [ I 0 ] +* + DO I = M - Q + 1, P + CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) + CALL CLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + X11(I,I) = ONE + CALL CLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL CLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I+1, X11(I,I), LDX11 ) + END DO +* +* Reduce the bottom-right portion of X21 to [ 0 I ] +* + DO I = P + 1, Q + CALL CLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) + CALL CLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + $ TAUQ1(I) ) + X21(M-Q+I-P,I) = ONE + CALL CLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), + $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + CALL CLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) + END DO +* + RETURN +* +* End of CUNBDB4 +* + END + diff --git a/dspl/liblapack/SRC/cunbdb5.f b/dspl/liblapack/SRC/cunbdb5.f new file mode 100644 index 0000000..9599745 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb5.f @@ -0,0 +1,274 @@ +*> \brief \b CUNBDB5 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* COMPLEX Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB5 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then some other vector from the orthogonal complement +*> is returned. This vector is chosen in an arbitrary but deterministic +*> way. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is COMPLEX array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is COMPLEX array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is COMPLEX array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is COMPLEX array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + COMPLEX Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = (1.0E0,0.0E0), ZERO = (0.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, J +* .. +* .. External Subroutines .. + EXTERNAL CUNBDB6, XERBLA +* .. +* .. External Functions .. + REAL SCNRM2 + EXTERNAL SCNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB5', -INFO ) + RETURN + END IF +* +* Project X onto the orthogonal complement of Q +* + CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, + $ WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SCNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF +* +* Project each standard basis vector e_1,...,e_M1 in turn, stopping +* when a nonzero projection is found +* + DO I = 1, M1 + DO J = 1, M1 + X1(J) = ZERO + END DO + X1(I) = ONE + DO J = 1, M2 + X2(J) = ZERO + END DO + CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( SCNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* +* Project each standard basis vector e_(M1+1),...,e_(M1+M2) in turn, +* stopping when a nonzero projection is found +* + DO I = 1, M2 + DO J = 1, M1 + X1(J) = ZERO + END DO + DO J = 1, M2 + X2(J) = ZERO + END DO + X2(I) = ONE + CALL CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( SCNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SCNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* + RETURN +* +* End of CUNBDB5 +* + END + diff --git a/dspl/liblapack/SRC/cunbdb6.f b/dspl/liblapack/SRC/cunbdb6.f new file mode 100644 index 0000000..05a1389 --- /dev/null +++ b/dspl/liblapack/SRC/cunbdb6.f @@ -0,0 +1,313 @@ +*> \brief \b CUNBDB6 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNBDB6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* COMPLEX Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNBDB6 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then the zero vector is returned. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is COMPLEX array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is COMPLEX array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is COMPLEX array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is COMPLEX array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + COMPLEX Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ALPHASQ, REALONE, REALZERO + PARAMETER ( ALPHASQ = 0.01E0, REALONE = 1.0E0, + $ REALZERO = 0.0E0 ) + COMPLEX NEGONE, ONE, ZERO + PARAMETER ( NEGONE = (-1.0E0,0.0E0), ONE = (1.0E0,0.0E0), + $ ZERO = (0.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + INTEGER I + REAL NORMSQ1, NORMSQ2, SCL1, SCL2, SSQ1, SSQ2 +* .. +* .. External Subroutines .. + EXTERNAL CGEMV, CLASSQ, XERBLA +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNBDB6', -INFO ) + RETURN + END IF +* +* First, project X onto the orthogonal complement of Q's column +* space +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL CLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL CLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ1 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL CGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL CGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL CGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL CGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL CLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL CLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If projection is sufficiently large in norm, then stop. +* If projection is zero, then stop. +* Otherwise, project again. +* + IF( NORMSQ2 .GE. ALPHASQ*NORMSQ1 ) THEN + RETURN + END IF +* + IF( NORMSQ2 .EQ. ZERO ) THEN + RETURN + END IF +* + NORMSQ1 = NORMSQ2 +* + DO I = 1, N + WORK(I) = ZERO + END DO +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL CGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL CGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL CGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL CGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL CLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL CLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If second projection is sufficiently large in norm, then do +* nothing more. Alternatively, if it shrunk significantly, then +* truncate it to zero. +* + IF( NORMSQ2 .LT. ALPHASQ*NORMSQ1 ) THEN + DO I = 1, M1 + X1(I) = ZERO + END DO + DO I = 1, M2 + X2(I) = ZERO + END DO + END IF +* + RETURN +* +* End of CUNBDB6 +* + END + diff --git a/dspl/liblapack/SRC/cuncsd.f b/dspl/liblapack/SRC/cuncsd.f new file mode 100644 index 0000000..f305291 --- /dev/null +++ b/dspl/liblapack/SRC/cuncsd.f @@ -0,0 +1,658 @@ +*> \brief \b CUNCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE CUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, +* SIGNS, M, P, Q, X11, LDX11, X12, +* LDX12, X21, LDX21, X22, LDX22, THETA, +* U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, +* LDV2T, WORK, LWORK, RWORK, LRWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, +* $ LDX21, LDX22, LRWORK, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL THETA( * ) +* REAL RWORK( * ) +* COMPLEX U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), +* $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, +* $ * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNCSD computes the CS decomposition of an M-by-M partitioned +*> unitary matrix X: +*> +*> [ I 0 0 | 0 0 0 ] +*> [ 0 C 0 | 0 -S 0 ] +*> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H +*> X = [-----------] = [---------] [---------------------] [---------] . +*> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] +*> [ 0 S 0 | 0 C 0 ] +*> [ 0 0 I | 0 0 0 ] +*> +*> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, +*> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are +*> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in +*> which R = MIN(P,M-P,Q,M-Q). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is computed; +*> otherwise: V2T is not computed. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is COMPLEX array, dimension (LDX12,M-Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. LDX12 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X11. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is COMPLEX array, dimension (LDX22,M-Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X11. LDX22 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is COMPLEX array, dimension (LDU1,P) +*> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is COMPLEX array, dimension (LDU2,M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is COMPLEX array, dimension (LDV1T,Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary +*> matrix V1**H. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] V2T +*> \verbatim +*> V2T is COMPLEX array, dimension (LDV2T,M-Q) +*> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary +*> matrix V2**H. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >= +*> MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension MAX(1,LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: CBBCSD did not converge. See the description of RWORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE CUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, + $ SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, X21, LDX21, X22, LDX22, THETA, + $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK, LWORK, RWORK, LRWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, + $ LDX21, LDX22, LRWORK, LWORK, M, P, Q +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL THETA( * ) + REAL RWORK( * ) + COMPLEX U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), + $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, + $ * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = (1.0E0,0.0E0), + $ ZERO = (0.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST, SIGNST + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN, + $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN, + $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN, + $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN, + $ LORGQRWORKOPT, LWORKMIN, LWORKOPT, P1, Q1 + LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2, + $ WANTV1T, WANTV2T + INTEGER LRWORKMIN, LRWORKOPT + LOGICAL LRQUERY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, CBBCSD, CLACPY, CLAPMR, CLAPMT, + $ CUNBDB, CUNGLQ, CUNGQR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' ) + LQUERY = LWORK .EQ. -1 + LRQUERY = LRWORK .EQ. -1 + IF( M .LT. 0 ) THEN + INFO = -7 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -8 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -9 + ELSE IF ( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -11 + ELSE IF (.NOT. COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF (COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF (.NOT. COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + ELSE IF (COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -15 + ELSE IF (.NOT. COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -15 + ELSE IF (COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -17 + ELSE IF (.NOT. COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -17 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -20 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -22 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -24 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -26 + END IF +* +* Work with transpose if convenient +* + IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN + IF( COLMAJOR ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL CUNCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M, + $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22, + $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1, + $ U2, LDU2, WORK, LWORK, RWORK, LRWORK, IWORK, + $ INFO ) + RETURN + END IF +* +* Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if +* convenient +* + IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL CUNCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M, + $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11, + $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T, + $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, INFO ) + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN +* +* Real workspace +* + IPHI = 2 + IB11D = IPHI + MAX( 1, Q - 1 ) + IB11E = IB11D + MAX( 1, Q ) + IB12D = IB11E + MAX( 1, Q - 1 ) + IB12E = IB12D + MAX( 1, Q ) + IB21D = IB12E + MAX( 1, Q - 1 ) + IB21E = IB21D + MAX( 1, Q ) + IB22D = IB21E + MAX( 1, Q - 1 ) + IB22E = IB22D + MAX( 1, Q ) + IBBCSD = IB22E + MAX( 1, Q - 1 ) + CALL CBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, THETA, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, THETA, THETA, THETA, THETA, THETA, + $ THETA, THETA, THETA, RWORK, -1, CHILDINFO ) + LBBCSDWORKOPT = INT( RWORK(1) ) + LBBCSDWORKMIN = LBBCSDWORKOPT + LRWORKOPT = IBBCSD + LBBCSDWORKOPT - 1 + LRWORKMIN = IBBCSD + LBBCSDWORKMIN - 1 + RWORK(1) = LRWORKOPT +* +* Complex workspace +* + ITAUP1 = 2 + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M - P ) + ITAUQ2 = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ2 + MAX( 1, M - Q ) + CALL CUNGQR( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGQRWORKOPT = INT( WORK(1) ) + LORGQRWORKMIN = MAX( 1, M - Q ) + IORGLQ = ITAUQ2 + MAX( 1, M - Q ) + CALL CUNGLQ( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGLQWORKOPT = INT( WORK(1) ) + LORGLQWORKMIN = MAX( 1, M - Q ) + IORBDB = ITAUQ2 + MAX( 1, M - Q ) + CALL CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, THETA, U1, U2, + $ V1T, V2T, WORK, -1, CHILDINFO ) + LORBDBWORKOPT = INT( WORK(1) ) + LORBDBWORKMIN = LORBDBWORKOPT + LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT, + $ IORBDB + LORBDBWORKOPT ) - 1 + LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN, + $ IORBDB + LORBDBWORKMIN ) - 1 + WORK(1) = MAX(LWORKOPT,LWORKMIN) +* + IF( LWORK .LT. LWORKMIN + $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN + INFO = -22 + ELSE IF( LRWORK .LT. LRWORKMIN + $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN + INFO = -24 + ELSE + LORGQRWORK = LWORK - IORGQR + 1 + LORGLQWORK = LWORK - IORGLQ + 1 + LORBDBWORK = LWORK - IORBDB + 1 + LBBCSDWORK = LRWORK - IBBCSD + 1 + END IF + END IF +* +* Abort if any illegal arguments +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNCSD', -INFO ) + RETURN + ELSE IF( LQUERY .OR. LRQUERY ) THEN + RETURN + END IF +* +* Transform to bidiagonal block form +* + CALL CUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21, + $ LDX21, X22, LDX22, THETA, RWORK(IPHI), WORK(ITAUP1), + $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2), + $ WORK(IORBDB), LORBDBWORK, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( COLMAJOR ) THEN + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL CUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQRWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL CUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL CUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL CLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T ) + IF( M-P .GT. Q ) THEN + CALL CLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + END IF + IF( M .GT. Q ) THEN + CALL CUNGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + END IF + ELSE + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 ) + CALL CUNGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ), + $ LORGLQWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 ) + CALL CUNGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL CUNGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + P1 = MIN( P+1, M ) + Q1 = MIN( Q+1, M ) + CALL CLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T ) + IF ( M .GT. P+Q ) THEN + CALL CLACPY( 'L', M-P-Q, M-P-Q, X22(P1,Q1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + END IF + CALL CUNGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + END IF +* +* Compute the CSD of the matrix in bidiagonal-block form +* + CALL CBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA, + $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), + $ LBBCSDWORK, INFO ) +* +* Permute rows and columns to place identity submatrices in top- +* left corner of (1,1)-block and/or bottom-right corner of (1,2)- +* block and/or bottom-right corner of (2,1)-block and/or top-left +* corner of (2,2)-block +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + IF( COLMAJOR ) THEN + CALL CLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + ELSE + CALL CLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + END IF + IF( M .GT. 0 .AND. WANTV2T ) THEN + DO I = 1, P + IWORK(I) = M - P - Q + I + END DO + DO I = P + 1, M - Q + IWORK(I) = I - P + END DO + IF( .NOT. COLMAJOR ) THEN + CALL CLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + ELSE + CALL CLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + END IF + END IF +* + RETURN +* +* End CUNCSD +* + END + diff --git a/dspl/liblapack/SRC/cuncsd2by1.f b/dspl/liblapack/SRC/cuncsd2by1.f new file mode 100644 index 0000000..1d49885 --- /dev/null +++ b/dspl/liblapack/SRC/cuncsd2by1.f @@ -0,0 +1,774 @@ +*> \brief \b CUNCSD2BY1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNCSD2BY1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, +* X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, +* LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T +* INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, +* $ M, P, Q +* INTEGER LRWORK, LRWORKMIN, LRWORKOPT +* .. +* .. Array Arguments .. +* REAL RWORK(*) +* REAL THETA(*) +* COMPLEX U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* INTEGER IWORK(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> CUNCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with +*> orthonormal columns that has been partitioned into a 2-by-1 block +*> structure: +*> +*> [ I1 0 0 ] +*> [ 0 C 0 ] +*> [ X11 ] [ U1 | ] [ 0 0 0 ] +*> X = [-----] = [---------] [----------] V1**T . +*> [ X21 ] [ | U2 ] [ 0 0 0 ] +*> [ 0 S 0 ] +*> [ 0 0 I2] +*> +*> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, +*> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R +*> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which +*> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a +*> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX array, dimension (LDX11,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX array, dimension (LDX21,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is COMPLEX array, dimension (P) +*> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is COMPLEX array, dimension (M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is COMPLEX array, dimension (Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is REAL array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +* +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: CBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, + $ X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, + $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T + INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, + $ M, P, Q + INTEGER LRWORK, LRWORKMIN, LRWORKOPT +* .. +* .. Array Arguments .. + REAL RWORK(*) + REAL THETA(*) + COMPLEX U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) + INTEGER IWORK(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = (1.0E0,0.0E0), ZERO = (0.0E0,0.0E0) ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ J, LBBCSD, LORBDB, LORGLQ, LORGLQMIN, + $ LORGLQOPT, LORGQR, LORGQRMIN, LORGQROPT, + $ LWORKMIN, LWORKOPT, R + LOGICAL LQUERY, WANTU1, WANTU2, WANTV1T +* .. +* .. Local Arrays .. + REAL DUM( 1 ) + COMPLEX CDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL CBBCSD, CCOPY, CLACPY, CLAPMR, CLAPMT, CUNBDB1, + $ CUNBDB2, CUNBDB3, CUNBDB4, CUNGLQ, CUNGQR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Function .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -4 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -5 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -6 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -10 + ELSE IF( WANTU1 .AND. LDU1 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF( WANTU2 .AND. LDU2 .LT. MAX( 1, M - P ) ) THEN + INFO = -15 + ELSE IF( WANTV1T .AND. LDV1T .LT. MAX( 1, Q ) ) THEN + INFO = -17 + END IF +* + R = MIN( P, M-P, Q, M-Q ) +* +* Compute workspace +* +* WORK layout: +* |-----------------------------------------| +* | LWORKOPT (1) | +* |-----------------------------------------| +* | TAUP1 (MAX(1,P)) | +* | TAUP2 (MAX(1,M-P)) | +* | TAUQ1 (MAX(1,Q)) | +* |-----------------------------------------| +* | CUNBDB WORK | CUNGQR WORK | CUNGLQ WORK | +* | | | | +* | | | | +* | | | | +* | | | | +* |-----------------------------------------| +* RWORK layout: +* |------------------| +* | LRWORKOPT (1) | +* |------------------| +* | PHI (MAX(1,R-1)) | +* |------------------| +* | B11D (R) | +* | B11E (R-1) | +* | B12D (R) | +* | B12E (R-1) | +* | B21D (R) | +* | B21E (R-1) | +* | B22D (R) | +* | B22E (R-1) | +* | CBBCSD RWORK | +* |------------------| +* + IF( INFO .EQ. 0 ) THEN + IPHI = 2 + IB11D = IPHI + MAX( 1, R-1 ) + IB11E = IB11D + MAX( 1, R ) + IB12D = IB11E + MAX( 1, R - 1 ) + IB12E = IB12D + MAX( 1, R ) + IB21D = IB12E + MAX( 1, R - 1 ) + IB21E = IB21D + MAX( 1, R ) + IB22D = IB21E + MAX( 1, R - 1 ) + IB22E = IB22D + MAX( 1, R ) + IBBCSD = IB22E + MAX( 1, R - 1 ) + ITAUP1 = 2 + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M-P ) + IORBDB = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ1 + MAX( 1, Q ) + IORGLQ = ITAUQ1 + MAX( 1, Q ) + LORGQRMIN = 1 + LORGQROPT = 1 + LORGLQMIN = 1 + LORGLQOPT = 1 + IF( R .EQ. Q ) THEN + CALL CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM, CDUM, CDUM, CDUM, WORK, -1, + $ CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CUNGQR( P, P, Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + ENDIF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CUNGQR( M-P, M-P, Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CUNGLQ( Q-1, Q-1, Q-1, V1T, LDV1T, + $ CDUM, WORK(1), -1, CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q-1 ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL CBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ DUM(1), U1, LDU1, U2, LDU2, V1T, LDV1T, CDUM, + $ 1, DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE IF( R .EQ. P ) THEN + CALL CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CUNGQR( P-1, P-1, P-1, U1(2,2), LDU1, CDUM, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CUNGQR( M-P, M-P, Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CUNGLQ( Q, Q, R, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL CBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ DUM, V1T, LDV1T, CDUM, 1, U1, LDU1, U2, LDU2, + $ DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE IF( R .EQ. M-P ) THEN + CALL CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CUNGQR( P, P, Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CUNGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, CDUM, + $ WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CUNGLQ( Q, Q, R, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL CBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, DUM, CDUM, 1, V1T, LDV1T, U2, LDU2, U1, + $ LDU1, DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE + CALL CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO + $ ) + LORBDB = M + INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CUNGQR( P, P, M-Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CUNGQR( M-P, M-P, M-Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CUNGLQ( Q, Q, Q, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL CBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, DUM, U2, LDU2, U1, LDU1, CDUM, 1, V1T, + $ LDV1T, DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + END IF + LRWORKMIN = IBBCSD+LBBCSD-1 + LRWORKOPT = LRWORKMIN + RWORK(1) = LRWORKOPT + LWORKMIN = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQRMIN-1, + $ IORGLQ+LORGLQMIN-1 ) + LWORKOPT = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQROPT-1, + $ IORGLQ+LORGLQOPT-1 ) + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'CUNCSD2BY1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF + LORGQR = LWORK-IORGQR+1 + LORGLQ = LWORK-IORGLQ+1 +* +* Handle four cases separately: R = Q, R = P, R = M-P, and R = M-Q, +* in which R = MIN(P,M-P,Q,M-Q) +* + IF( R .EQ. Q ) THEN +* +* Case 1: R = Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL CUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL CUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL CUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + V1T(1,1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL CLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2), + $ LDV1T ) + CALL CUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL CBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, CDUM, + $ 1, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place zero submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL CLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. P ) THEN +* +* Case 2: R = P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL CUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + U1(1,1) = ONE + DO J = 2, P + U1(1,J) = ZERO + U1(J,1) = ZERO + END DO + CALL CLACPY( 'L', P-1, P-1, X11(2,1), LDX11, U1(2,2), LDU1 ) + CALL CUNGQR( P-1, P-1, P-1, U1(2,2), LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL CUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CLACPY( 'U', P, Q, X11, LDX11, V1T, LDV1T ) + CALL CUNGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL CBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ RWORK(IPHI), V1T, LDV1T, CDUM, 1, U1, LDU1, U2, + $ LDU2, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL CLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. M-P ) THEN +* +* Case 3: R = M-P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL CUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL CUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + U2(1,1) = ONE + DO J = 2, M-P + U2(1,J) = ZERO + U2(J,1) = ZERO + END DO + CALL CLACPY( 'L', M-P-1, M-P-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL CUNGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, + $ WORK(ITAUP2), WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CLACPY( 'U', M-P, Q, X21, LDX21, V1T, LDV1T ) + CALL CUNGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL CBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, RWORK(IPHI), CDUM, 1, V1T, LDV1T, U2, LDU2, + $ U1, LDU1, RWORK(IB11D), RWORK(IB11E), + $ RWORK(IB12D), RWORK(IB12E), RWORK(IB21D), + $ RWORK(IB21E), RWORK(IB22D), RWORK(IB22E), + $ RWORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. R ) THEN + DO I = 1, R + IWORK(I) = Q - R + I + END DO + DO I = R + 1, Q + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL CLAPMT( .FALSE., P, Q, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL CLAPMR( .FALSE., Q, Q, V1T, LDV1T, IWORK ) + END IF + END IF + ELSE +* +* Case 4: R = M-Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL CUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), WORK(IORBDB+M), + $ LORBDB-M, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL CCOPY( P, WORK(IORBDB), 1, U1, 1 ) + DO J = 2, P + U1(1,J) = ZERO + END DO + CALL CLACPY( 'L', P-1, M-Q-1, X11(2,1), LDX11, U1(2,2), + $ LDU1 ) + CALL CUNGQR( P, P, M-Q, U1, LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL CCOPY( M-P, WORK(IORBDB+P), 1, U2, 1 ) + DO J = 2, M-P + U2(1,J) = ZERO + END DO + CALL CLACPY( 'L', M-P-1, M-Q-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL CUNGQR( M-P, M-P, M-Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL CLACPY( 'U', M-Q, Q, X21, LDX21, V1T, LDV1T ) + CALL CLACPY( 'U', P-(M-Q), Q-(M-Q), X11(M-Q+1,M-Q+1), LDX11, + $ V1T(M-Q+1,M-Q+1), LDV1T ) + CALL CLACPY( 'U', -P+Q, Q-P, X21(M-Q+1,P+1), LDX21, + $ V1T(P+1,P+1), LDV1T ) + CALL CUNGLQ( Q, Q, Q, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL CBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, RWORK(IPHI), U2, LDU2, U1, LDU1, CDUM, 1, + $ V1T, LDV1T, RWORK(IB11D), RWORK(IB11E), + $ RWORK(IB12D), RWORK(IB12E), RWORK(IB21D), + $ RWORK(IB21E), RWORK(IB22D), RWORK(IB22E), + $ RWORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( P .GT. R ) THEN + DO I = 1, R + IWORK(I) = P - R + I + END DO + DO I = R + 1, P + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL CLAPMT( .FALSE., P, P, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL CLAPMR( .FALSE., P, Q, V1T, LDV1T, IWORK ) + END IF + END IF + END IF +* + RETURN +* +* End of CUNCSD2BY1 +* + END + diff --git a/dspl/liblapack/SRC/cung2l.f b/dspl/liblapack/SRC/cung2l.f new file mode 100644 index 0000000..88d285e --- /dev/null +++ b/dspl/liblapack/SRC/cung2l.f @@ -0,0 +1,199 @@ +*> \brief \b CUNG2L generates all or part of the unitary matrix Q from a QL factorization determined by cgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNG2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNG2L generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by CGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL CLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL CSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2L +* + END diff --git a/dspl/liblapack/SRC/cung2r.f b/dspl/liblapack/SRC/cung2r.f new file mode 100644 index 0000000..eea37d1 --- /dev/null +++ b/dspl/liblapack/SRC/cung2r.f @@ -0,0 +1,201 @@ +*> \brief \b CUNG2R +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNG2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNG2R generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL CSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNG2R +* + END diff --git a/dspl/liblapack/SRC/cungbr.f b/dspl/liblapack/SRC/cungbr.f new file mode 100644 index 0000000..df25799 --- /dev/null +++ b/dspl/liblapack/SRC/cungbr.f @@ -0,0 +1,338 @@ +*> \brief \b CUNGBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGBR generates one of the complex unitary matrices Q or P**H +*> determined by CGEBRD when reducing a complex matrix A to bidiagonal +*> form: A = Q * B * P**H. Q and P**H are defined as products of +*> elementary reflectors H(i) or G(i) respectively. +*> +*> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +*> is of order M: +*> if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n +*> columns of Q, where m >= n >= k; +*> if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an +*> M-by-M matrix. +*> +*> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H +*> is of order N: +*> if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m +*> rows of P**H, where n >= m >= k; +*> if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as +*> an N-by-N matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether the matrix Q or the matrix P**H is +*> required, as defined in the transformation applied by CGEBRD: +*> = 'Q': generate Q; +*> = 'P': generate P**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q or P**H to be returned. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q or P**H to be returned. +*> N >= 0. +*> If VECT = 'Q', M >= N >= min(M,K); +*> if VECT = 'P', N >= M >= min(N,K). +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original M-by-K +*> matrix reduced by CGEBRD. +*> If VECT = 'P', the number of rows in the original K-by-N +*> matrix reduced by CGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by CGEBRD. +*> On exit, the M-by-N matrix Q or P**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= M. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension +*> (min(M,K)) if VECT = 'Q' +*> (min(N,K)) if VECT = 'P' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i), which determines Q or P**H, as +*> returned by CGEBRD in its array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,min(M,N)). +*> For optimum performance LWORK >= min(M,N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complexGBcomputational +* +* ===================================================================== + SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGLQ, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = 1 + IF( WANTQ ) THEN + IF( M.GE.K ) THEN + CALL CUNGQR( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( M.GT.1 ) THEN + CALL CUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + ELSE + IF( K.LT.N ) THEN + CALL CUNGLQ( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( N.GT.1 ) THEN + CALL CUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + END IF + LWKOPT = WORK( 1 ) + LWKOPT = MAX (LWKOPT, MN) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to CGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL CUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P**H, determined by a call to CGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P**H to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P**H(2:n,2:n) +* + CALL CUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGBR +* + END diff --git a/dspl/liblapack/SRC/cunghr.f b/dspl/liblapack/SRC/cunghr.f new file mode 100644 index 0000000..40c71d4 --- /dev/null +++ b/dspl/liblapack/SRC/cunghr.f @@ -0,0 +1,241 @@ +*> \brief \b CUNGHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGHR generates a complex unitary matrix Q which is defined as the +*> product of IHI-ILO elementary reflectors of order N, as returned by +*> CGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of CGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by CGEHRD. +*> On exit, the N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEHRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= IHI-ILO. +*> For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL CUNGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'CUNGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL CUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGHR +* + END diff --git a/dspl/liblapack/SRC/cungl2.f b/dspl/liblapack/SRC/cungl2.f new file mode 100644 index 0000000..837354d --- /dev/null +++ b/dspl/liblapack/SRC/cungl2.f @@ -0,0 +1,207 @@ +*> \brief \b CUNGL2 generates all or part of the unitary matrix Q from an LQ factorization determined by cgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by CGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by CGELQF in the first k rows of its array argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)**H to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ CONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL CSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL CLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - CONJG( TAU( I ) ) +* +* Set A(i,1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNGL2 +* + END diff --git a/dspl/liblapack/SRC/cunglq.f b/dspl/liblapack/SRC/cunglq.f new file mode 100644 index 0000000..10c2dee --- /dev/null +++ b/dspl/liblapack/SRC/cunglq.f @@ -0,0 +1,289 @@ +*> \brief \b CUNGLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, +*> which is defined as the first M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by CGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by CGELQF in the first k rows of its array argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit; +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL CUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i+ib:m,i:n) from the right +* + CALL CLARFB( 'Right', 'Conjugate transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H**H to columns i:n of current block +* + CALL CUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGLQ +* + END diff --git a/dspl/liblapack/SRC/cungql.f b/dspl/liblapack/SRC/cungql.f new file mode 100644 index 0000000..4a13826 --- /dev/null +++ b/dspl/liblapack/SRC/cungql.f @@ -0,0 +1,296 @@ +*> \brief \b CUNGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by CGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CUNGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL CUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL CUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQL +* + END diff --git a/dspl/liblapack/SRC/cungqr.f b/dspl/liblapack/SRC/cungqr.f new file mode 100644 index 0000000..f8d0642 --- /dev/null +++ b/dspl/liblapack/SRC/cungqr.f @@ -0,0 +1,290 @@ +*> \brief \b CUNGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL CUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL CLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL CUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGQR +* + END diff --git a/dspl/liblapack/SRC/cungr2.f b/dspl/liblapack/SRC/cungr2.f new file mode 100644 index 0000000..defc585 --- /dev/null +++ b/dspl/liblapack/SRC/cungr2.f @@ -0,0 +1,205 @@ +*> \brief \b CUNGR2 generates all or part of the unitary matrix Q from an RQ factorization determined by cgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGR2 generates an m by n complex matrix Q with orthonormal rows, +*> which is defined as the last m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by CGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGERQF in the last k rows of its array argument +*> A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE, ZERO + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ), + $ ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, CSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i)**H to A(1:m-k+i,1:n-k+i) from the right +* + CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE + CALL CLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ CONJG( TAU( I ) ), A, LDA, WORK ) + CALL CSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + CALL CLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - CONJG( TAU( I ) ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of CUNGR2 +* + END diff --git a/dspl/liblapack/SRC/cungrq.f b/dspl/liblapack/SRC/cungrq.f new file mode 100644 index 0000000..5083744 --- /dev/null +++ b/dspl/liblapack/SRC/cungrq.f @@ -0,0 +1,297 @@ +*> \brief \b CUNGRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, +*> which is defined as the last M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by CGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by CGERQF in the last k rows of its array argument +*> A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'CUNGRQ', ' ', M, N, K, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'CUNGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL CUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL CLARFB( 'Right', 'Conjugate transpose', 'Backward', + $ 'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ), + $ LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H**H to columns 1:n-k+i+ib-1 of current block +* + CALL CUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of CUNGRQ +* + END diff --git a/dspl/liblapack/SRC/cungtr.f b/dspl/liblapack/SRC/cungtr.f new file mode 100644 index 0000000..2749bc3 --- /dev/null +++ b/dspl/liblapack/SRC/cungtr.f @@ -0,0 +1,256 @@ +*> \brief \b CUNGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNGTR generates a complex unitary matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> CHETRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from CHETRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from CHETRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by CHETRD. +*> On exit, the N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= N. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CHETRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N-1. +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNGQL, CUNGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'CUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'CUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL CUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to CHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL CUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNGTR +* + END diff --git a/dspl/liblapack/SRC/cunm22.f b/dspl/liblapack/SRC/cunm22.f new file mode 100644 index 0000000..3c6b4c1 --- /dev/null +++ b/dspl/liblapack/SRC/cunm22.f @@ -0,0 +1,440 @@ +*> \brief \b CUNM22 multiplies a general matrix by a banded unitary matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNM22 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. +* COMPLEX Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose +* ============ +*> +*> \verbatim +*> +*> CUNM22 overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order NQ, with NQ = M if +*> SIDE = 'L' and NQ = N if SIDE = 'R'. +*> The unitary matrix Q processes a 2-by-2 block structure +*> +*> [ Q11 Q12 ] +*> Q = [ ] +*> [ Q21 Q22 ], +*> +*> where Q12 is an N1-by-N1 lower triangular matrix and Q21 is an +*> N2-by-N2 upper triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose); +*> = 'C': apply Q**H (Conjugate transpose). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \param[in] N2 +*> \verbatim +*> N1 is INTEGER +*> N2 is INTEGER +*> The dimension of Q12 and Q21, respectively. N1, N2 >= 0. +*> The following requirement must be satisfied: +*> N1 + N2 = M if SIDE = 'L' and N1 + N2 = N if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is COMPLEX array, dimension +*> (LDQ,M) if SIDE = 'L' +*> (LDQ,N) if SIDE = 'R' +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if SIDE = 'L'; LDQ >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= M*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. + COMPLEX Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, LDWORK, LEN, LWKOPT, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CGEMM, CLACPY, CTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q; +* NW is the minimum dimension of WORK. +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + NW = NQ + IF( N1.EQ.0 .OR. N2.EQ.0 ) NW = 1 + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N1.LT.0 .OR. N1+N2.NE.NQ ) THEN + INFO = -5 + ELSE IF( N2.LT.0 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = M*N + WORK( 1 ) = CMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM22', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Degenerate cases (N1 = 0 or N2 = 0) are handled using CTRMM. +* + IF( N1.EQ.0 ) THEN + CALL CTRMM( SIDE, 'Upper', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + ELSE IF( N2.EQ.0 ) THEN + CALL CTRMM( SIDE, 'Lower', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + END IF +* +* Compute the largest chunk size available from the workspace. +* + NB = MAX( 1, MIN( LWORK, LWKOPT ) / NQ ) +* + IF( LEFT ) THEN + IF( NOTRAN ) THEN + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q12. +* + CALL CLACPY( 'All', N1, LEN, C( N2+1, I ), LDC, WORK, + $ LDWORK ) + CALL CTRMM( 'Left', 'Lower', 'No Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11. +* + CALL CGEMM( 'No Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q21. +* + CALL CLACPY( 'All', N2, LEN, C( 1, I ), LDC, + $ WORK( N1+1 ), LDWORK ) + CALL CTRMM( 'Left', 'Upper', 'No Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( N1+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22. +* + CALL CGEMM( 'No Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N2+1, I ), LDC, + $ ONE, WORK( N1+1 ), LDWORK ) +* +* Copy everything back. +* + CALL CLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + ELSE + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q21**H. +* + CALL CLACPY( 'All', N2, LEN, C( N1+1, I ), LDC, WORK, + $ LDWORK ) + CALL CTRMM( 'Left', 'Upper', 'Conjugate', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11**H. +* + CALL CGEMM( 'Conjugate', 'No Transpose', N2, LEN, N1, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q12**H. +* + CALL CLACPY( 'All', N1, LEN, C( 1, I ), LDC, + $ WORK( N2+1 ), LDWORK ) + CALL CTRMM( 'Left', 'Lower', 'Conjugate', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( N2+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22**H. +* + CALL CGEMM( 'Conjugate', 'No Transpose', N1, LEN, N2, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N1+1, I ), LDC, + $ ONE, WORK( N2+1 ), LDWORK ) +* +* Copy everything back. +* + CALL CLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + END IF + ELSE + IF( NOTRAN ) THEN + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q21. +* + CALL CLACPY( 'All', LEN, N2, C( I, N1+1 ), LDC, WORK, + $ LDWORK ) + CALL CTRMM( 'Right', 'Upper', 'No Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11. +* + CALL CGEMM( 'No Transpose', 'No Transpose', LEN, N2, N1, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q12. +* + CALL CLACPY( 'All', LEN, N1, C( I, 1 ), LDC, + $ WORK( 1 + N2*LDWORK ), LDWORK ) + CALL CTRMM( 'Right', 'Lower', 'No Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22. +* + CALL CGEMM( 'No Transpose', 'No Transpose', LEN, N1, N2, + $ ONE, C( I, N1+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL CLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + ELSE + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q12**H. +* + CALL CLACPY( 'All', LEN, N1, C( I, N2+1 ), LDC, WORK, + $ LDWORK ) + CALL CTRMM( 'Right', 'Lower', 'Conjugate', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11**H. +* + CALL CGEMM( 'No Transpose', 'Conjugate', LEN, N1, N2, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q21**H. +* + CALL CLACPY( 'All', LEN, N2, C( I, 1 ), LDC, + $ WORK( 1 + N1*LDWORK ), LDWORK ) + CALL CTRMM( 'Right', 'Upper', 'Conjugate', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22**H. +* + CALL CGEMM( 'No Transpose', 'Conjugate', LEN, N2, N1, + $ ONE, C( I, N2+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL CLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + END IF + END IF +* + WORK( 1 ) = CMPLX( LWKOPT ) + RETURN +* +* End of CUNM22 +* + END diff --git a/dspl/liblapack/SRC/cunm2l.f b/dspl/liblapack/SRC/cunm2l.f new file mode 100644 index 0000000..75dd5f9 --- /dev/null +++ b/dspl/liblapack/SRC/cunm2l.f @@ -0,0 +1,281 @@ +*> \brief \b CUNM2L multiplies a general matrix by the unitary matrix from a QL factorization determined by cgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNM2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNM2L overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQLF in the last k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of CUNM2L +* + END diff --git a/dspl/liblapack/SRC/cunm2r.f b/dspl/liblapack/SRC/cunm2r.f new file mode 100644 index 0000000..569276f --- /dev/null +++ b/dspl/liblapack/SRC/cunm2r.f @@ -0,0 +1,286 @@ +*> \brief \b CUNM2R multiplies a general matrix by the unitary matrix from a QR factorization determined by cgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNM2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNM2R overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRF in the first k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + $ WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of CUNM2R +* + END diff --git a/dspl/liblapack/SRC/cunmbr.f b/dspl/liblapack/SRC/cunmbr.f new file mode 100644 index 0000000..4136fcd --- /dev/null +++ b/dspl/liblapack/SRC/cunmbr.f @@ -0,0 +1,381 @@ +*> \brief \b CUNMBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, VECT +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': P * C C * P +*> TRANS = 'C': P**H * C C * P**H +*> +*> Here Q and P**H are the unitary matrices determined by CGEBRD when +*> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q +*> and P**H are defined as products of elementary reflectors H(i) and +*> G(i) respectively. +*> +*> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +*> order of the unitary matrix Q or P**H that is applied. +*> +*> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +*> if nq >= k, Q = H(1) H(2) . . . H(k); +*> if nq < k, Q = H(1) H(2) . . . H(nq-1). +*> +*> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +*> if k < nq, P = G(1) G(2) . . . G(k); +*> if k >= nq, P = G(1) G(2) . . . G(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'Q': apply Q or Q**H; +*> = 'P': apply P or P**H. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q, Q**H, P or P**H from the Left; +*> = 'R': apply Q, Q**H, P or P**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q or P; +*> = 'C': Conjugate transpose, apply Q**H or P**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original +*> matrix reduced by CGEBRD. +*> If VECT = 'P', the number of rows in the original +*> matrix reduced by CGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,min(nq,K)) if VECT = 'Q' +*> (LDA,nq) if VECT = 'P' +*> The vectors which define the elementary reflectors H(i) and +*> G(i), whose products determine the matrices Q and P, as +*> returned by CGEBRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If VECT = 'Q', LDA >= max(1,nq); +*> if VECT = 'P', LDA >= max(1,min(nq,K)). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (min(nq,K)) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i) which determines Q or P, as returned +*> by CGEBRD in the array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q +*> or P*C or P**H*C or C*P or C*P**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M); +*> if N = 0 or M = 0, LWORK >= 1. +*> For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L', +*> and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the +*> optimal blocksize. (NB = 0 if M = 0 or N = 0.) +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMLQ, CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + NW = 0 + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NW.GT.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW*NB ) + ELSE + LWKOPT = 1 + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to CGEBRD with nq >= k +* + CALL CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to CGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to CGEBRD with nq > k +* + CALL CUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to CGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL CUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMBR +* + END diff --git a/dspl/liblapack/SRC/cunmhr.f b/dspl/liblapack/SRC/cunmhr.f new file mode 100644 index 0000000..aa65886 --- /dev/null +++ b/dspl/liblapack/SRC/cunmhr.f @@ -0,0 +1,296 @@ +*> \brief \b CUNMHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMHR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> IHI-ILO elementary reflectors, as returned by CGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of CGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +*> ILO = 1 and IHI = 0, if M = 0; +*> if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +*> ILO = 1 and IHI = 0, if N = 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by CGEHRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEHRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL CUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMHR +* + END diff --git a/dspl/liblapack/SRC/cunml2.f b/dspl/liblapack/SRC/cunml2.f new file mode 100644 index 0000000..b55b0c2 --- /dev/null +++ b/dspl/liblapack/SRC/cunml2.f @@ -0,0 +1,290 @@ +*> \brief \b CUNML2 multiplies a general matrix by the unitary matrix from a LQ factorization determined by cgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNML2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNML2 overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGELQF in the first k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = CONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + AII = A( I, I ) + A( I, I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + IF( I.LT.NQ ) + $ CALL CLACGV( NQ-I, A( I, I+1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CUNML2 +* + END diff --git a/dspl/liblapack/SRC/cunmlq.f b/dspl/liblapack/SRC/cunmlq.f new file mode 100644 index 0000000..8cf4442 --- /dev/null +++ b/dspl/liblapack/SRC/cunmlq.f @@ -0,0 +1,354 @@ +*> \brief \b CUNMLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMLQ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGELQF in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + RETURN + END IF +* +* Determine the block size +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMLQ +* + END diff --git a/dspl/liblapack/SRC/cunmql.f b/dspl/liblapack/SRC/cunmql.f new file mode 100644 index 0000000..e727960 --- /dev/null +++ b/dspl/liblapack/SRC/cunmql.f @@ -0,0 +1,343 @@ +*> \brief \b CUNMQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMQL overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQLF in the last k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Determine the block size +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.(NW*NB+TSIZE) ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**H is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**H +* + CALL CLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMQL +* + END diff --git a/dspl/liblapack/SRC/cunmqr.f b/dspl/liblapack/SRC/cunmqr.f new file mode 100644 index 0000000..41fee71 --- /dev/null +++ b/dspl/liblapack/SRC/cunmqr.f @@ -0,0 +1,342 @@ +*> \brief \b CUNMQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMQR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRF in the first k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMQR +* + END diff --git a/dspl/liblapack/SRC/cunmr2.f b/dspl/liblapack/SRC/cunmr2.f new file mode 100644 index 0000000..40b6583 --- /dev/null +++ b/dspl/liblapack/SRC/cunmr2.f @@ -0,0 +1,283 @@ +*> \brief \b CUNMR2 multiplies a general matrix by the unitary matrix from a RQ factorization determined by cgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMR2 overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by CGERQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGERQF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLACGV, CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = CONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL CLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) + A( I, NQ-K+I ) = AII + CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of CUNMR2 +* + END diff --git a/dspl/liblapack/SRC/cunmr3.f b/dspl/liblapack/SRC/cunmr3.f new file mode 100644 index 0000000..0a8bad8 --- /dev/null +++ b/dspl/liblapack/SRC/cunmr3.f @@ -0,0 +1,305 @@ +*> \brief \b CUNMR3 multiplies a general matrix by the unitary matrix from a RZ factorization determined by ctzrzf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMR3 overwrites the general complex m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CTZRZF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ + COMPLEX TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + CALL CLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI, + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of CUNMR3 +* + END diff --git a/dspl/liblapack/SRC/cunmrq.f b/dspl/liblapack/SRC/cunmrq.f new file mode 100644 index 0000000..3513215 --- /dev/null +++ b/dspl/liblapack/SRC/cunmrq.f @@ -0,0 +1,348 @@ +*> \brief \b CUNMRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMRQ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGERQF in the last k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARFB, CLARFT, CUNMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**H is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**H +* + CALL CLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMRQ +* + END diff --git a/dspl/liblapack/SRC/cunmrz.f b/dspl/liblapack/SRC/cunmrz.f new file mode 100644 index 0000000..ccf9cd5 --- /dev/null +++ b/dspl/liblapack/SRC/cunmrz.f @@ -0,0 +1,383 @@ +*> \brief \b CUNMRZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMRZ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE CUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL CLARZB, CLARZT, CUNMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Determine the block size. +* + NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), WORK( IWT ), LDT ) +* + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of CUNMRZ +* + END diff --git a/dspl/liblapack/SRC/cunmtr.f b/dspl/liblapack/SRC/cunmtr.f new file mode 100644 index 0000000..81d2ca6 --- /dev/null +++ b/dspl/liblapack/SRC/cunmtr.f @@ -0,0 +1,312 @@ +*> \brief \b CUNMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUNMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUNMTR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by CHETRD: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from CHETRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from CHETRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by CHETRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CHETRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >=M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNMQL, CUNMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUNMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHETRD with UPLO = 'U' +* + CALL CUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to CHETRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of CUNMTR +* + END diff --git a/dspl/liblapack/SRC/cupgtr.f b/dspl/liblapack/SRC/cupgtr.f new file mode 100644 index 0000000..4c75d3e --- /dev/null +++ b/dspl/liblapack/SRC/cupgtr.f @@ -0,0 +1,233 @@ +*> \brief \b CUPGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUPGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUPGTR generates a complex unitary matrix Q which is defined as the +*> product of n-1 elementary reflectors H(i) of order n, as returned by +*> CHPTRD using packed storage: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to CHPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to CHPTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension (N*(N+1)/2) +*> The vectors which define the elementary reflectors, as +*> returned by CHPTRD. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CHPTRD. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX array, dimension (LDQ,N) +*> The N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension (N-1) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX CZERO, CONE + PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), + $ CONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CUNG2L, CUNG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = CZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = CZERO + 30 CONTINUE + Q( N, N ) = CONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL CUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to CHPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = CONE + DO 40 I = 2, N + Q( I, 1 ) = CZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = CZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL CUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of CUPGTR +* + END diff --git a/dspl/liblapack/SRC/cupmtr.f b/dspl/liblapack/SRC/cupmtr.f new file mode 100644 index 0000000..eb6d219 --- /dev/null +++ b/dspl/liblapack/SRC/cupmtr.f @@ -0,0 +1,349 @@ +*> \brief \b CUPMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CUPMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CUPMTR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by CHPTRD using packed +*> storage: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to CHPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to CHPTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX array, dimension +*> (M*(M+1)/2) if SIDE = 'L' +*> (N*(N+1)/2) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by CHPTRD. AP is modified by the routine but +*> restored on exit. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX array, dimension (M-1) if SIDE = 'L' +*> or (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by CHPTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + COMPLEX AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL CLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'CUPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to CHPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + AII = AP( II ) + AP( II ) = ONE + CALL CLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to CHPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = CONJG( TAU( I ) ) + END IF + CALL CLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), + $ LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of CUPMTR +* + END diff --git a/dspl/liblapack/SRC/dbbcsd.f b/dspl/liblapack/SRC/dbbcsd.f new file mode 100644 index 0000000..f1df567 --- /dev/null +++ b/dspl/liblapack/SRC/dbbcsd.f @@ -0,0 +1,1080 @@ +*> \brief \b DBBCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DBBCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, +* THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, +* V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, +* B22D, B22E, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ), +* $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), +* $ PHI( * ), THETA( * ), WORK( * ) +* DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DBBCSD computes the CS decomposition of an orthogonal matrix in +*> bidiagonal-block form, +*> +*> +*> [ B11 | B12 0 0 ] +*> [ 0 | 0 -I 0 ] +*> X = [----------------] +*> [ B21 | B22 0 0 ] +*> [ 0 | 0 0 I ] +*> +*> [ C | -S 0 0 ] +*> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T +*> = [---------] [---------------] [---------] . +*> [ | U2 ] [ S | C 0 0 ] [ | V2 ] +*> [ 0 | 0 0 I ] +*> +*> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger +*> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be +*> transposed and/or permuted. This can be done in constant time using +*> the TRANS and SIGNS options. See DORCSD for details.) +*> +*> The bidiagonal matrices B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1:Q) and PHI(1:Q-1). +*> +*> The orthogonal matrices U1, U2, V1T, and V2T are input/output. +*> The input matrices are pre- or post-multiplied by the appropriate +*> singular vector matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is updated; +*> otherwise: U1 is not updated. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is updated; +*> otherwise: U2 is not updated. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is updated; +*> otherwise: V1T is not updated. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is updated; +*> otherwise: V2T is not updated. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X, the orthogonal matrix in +*> bidiagonal-block form. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in the top-left block of X. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in the top-left block of X. +*> 0 <= Q <= MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> On entry, the angles THETA(1),...,THETA(Q) that, along with +*> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block +*> form. On exit, the angles whose cosines and sines define the +*> diagonal blocks in the CS decomposition. +*> \endverbatim +*> +*> \param[in,out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., +*> THETA(Q), define the matrix in bidiagonal-block form. +*> \endverbatim +*> +*> \param[in,out] U1 +*> \verbatim +*> U1 is DOUBLE PRECISION array, dimension (LDU1,P) +*> On entry, a P-by-P matrix. On exit, U1 is postmultiplied +*> by the left singular vector matrix common to [ B11 ; 0 ] and +*> [ B12 0 0 ; 0 -I 0 0 ]. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of the array U1, LDU1 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] U2 +*> \verbatim +*> U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) +*> On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is +*> postmultiplied by the left singular vector matrix common to +*> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2, LDU2 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] V1T +*> \verbatim +*> V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) +*> On entry, a Q-by-Q matrix. On exit, V1T is premultiplied +*> by the transpose of the right singular vector +*> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of the array V1T, LDV1T >= MAX(1,Q). +*> \endverbatim +*> +*> \param[in,out] V2T +*> \verbatim +*> V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) +*> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is +*> premultiplied by the transpose of the right +*> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and +*> [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] B11D +*> \verbatim +*> B11D is DOUBLE PRECISION array, dimension (Q) +*> When DBBCSD converges, B11D contains the cosines of THETA(1), +*> ..., THETA(Q). If DBBCSD fails to converge, then B11D +*> contains the diagonal of the partially reduced top-left +*> block. +*> \endverbatim +*> +*> \param[out] B11E +*> \verbatim +*> B11E is DOUBLE PRECISION array, dimension (Q-1) +*> When DBBCSD converges, B11E contains zeros. If DBBCSD fails +*> to converge, then B11E contains the superdiagonal of the +*> partially reduced top-left block. +*> \endverbatim +*> +*> \param[out] B12D +*> \verbatim +*> B12D is DOUBLE PRECISION array, dimension (Q) +*> When DBBCSD converges, B12D contains the negative sines of +*> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then +*> B12D contains the diagonal of the partially reduced top-right +*> block. +*> \endverbatim +*> +*> \param[out] B12E +*> \verbatim +*> B12E is DOUBLE PRECISION array, dimension (Q-1) +*> When DBBCSD converges, B12E contains zeros. If DBBCSD fails +*> to converge, then B12E contains the subdiagonal of the +*> partially reduced top-right block. +*> \endverbatim +*> +*> \param[out] B21D +*> \verbatim +*> B21D is DOUBLE PRECISION array, dimension (Q) +*> When DBBCSD converges, B21D contains the negative sines of +*> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then +*> B21D contains the diagonal of the partially reduced bottom-left +*> block. +*> \endverbatim +*> +*> \param[out] B21E +*> \verbatim +*> B21E is DOUBLE PRECISION array, dimension (Q-1) +*> When DBBCSD converges, B21E contains zeros. If DBBCSD fails +*> to converge, then B21E contains the subdiagonal of the +*> partially reduced bottom-left block. +*> \endverbatim +*> +*> \param[out] B22D +*> \verbatim +*> B22D is DOUBLE PRECISION array, dimension (Q) +*> When DBBCSD converges, B22D contains the negative sines of +*> THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then +*> B22D contains the diagonal of the partially reduced bottom-right +*> block. +*> \endverbatim +*> +*> \param[out] B22E +*> \verbatim +*> B22E is DOUBLE PRECISION array, dimension (Q-1) +*> When DBBCSD converges, B22E contains zeros. If DBBCSD fails +*> to converge, then B22E contains the subdiagonal of the +*> partially reduced bottom-right block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= MAX(1,8*Q). +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the work array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if DBBCSD did not converge, INFO specifies the number +*> of nonzero entries in PHI, and B11D, B11E, etc., +*> contain the partially reduced matrix. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they +*> are within TOLMUL*EPS of either bound. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, + $ B22D, B22E, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q +* .. +* .. Array Arguments .. + DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ), + $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), + $ PHI( * ), THETA( * ), WORK( * ) + DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) + DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO + PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0, + $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0, + $ TEN = 10.0D0, ZERO = 0.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12, + $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T, + $ WANTV2T + INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS, + $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J, + $ LWORKMIN, LWORKOPT, MAXIT, MINI + DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY, + $ EPS, MU, NU, R, SIGMA11, SIGMA21, + $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL, + $ UNFL, X1, X2, Y1, Y2 +* +* .. External Subroutines .. + EXTERNAL DLASR, DSCAL, DSWAP, DLARTGP, DLARTGS, DLAS2, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) +* + IF( M .LT. 0 ) THEN + INFO = -6 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -7 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -8 + ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN + INFO = -8 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -12 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -14 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -16 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -18 + END IF +* +* Quick return if Q = 0 +* + IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN + LWORKMIN = 1 + WORK(1) = LWORKMIN + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + IU1CS = 1 + IU1SN = IU1CS + Q + IU2CS = IU1SN + Q + IU2SN = IU2CS + Q + IV1TCS = IU2SN + Q + IV1TSN = IV1TCS + Q + IV2TCS = IV1TSN + Q + IV2TSN = IV2TCS + Q + LWORKOPT = IV2TSN + Q - 1 + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -28 + END IF + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DBBCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) + TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) ) + TOL = TOLMUL*EPS + THRESH = MAX( TOL, MAXITR*Q*Q*UNFL ) +* +* Test for negligible sines or cosines +* + DO I = 1, Q + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = 1, Q-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Initial deflation +* + IMAX = Q + DO WHILE( IMAX .GT. 1 ) + IF( PHI(IMAX-1) .NE. ZERO ) THEN + EXIT + END IF + IMAX = IMAX - 1 + END DO + IMIN = IMAX - 1 + IF ( IMIN .GT. 1 ) THEN + DO WHILE( PHI(IMIN-1) .NE. ZERO ) + IMIN = IMIN - 1 + IF ( IMIN .LE. 1 ) EXIT + END DO + END IF +* +* Initialize iteration counter +* + MAXIT = MAXITR*Q*Q + ITER = 0 +* +* Begin main iteration loop +* + DO WHILE( IMAX .GT. 1 ) +* +* Compute the matrix entries +* + B11D(IMIN) = COS( THETA(IMIN) ) + B21D(IMIN) = -SIN( THETA(IMIN) ) + DO I = IMIN, IMAX - 1 + B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) ) + B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) ) + B12D(I) = SIN( THETA(I) ) * COS( PHI(I) ) + B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) ) + B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) ) + B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) ) + B22D(I) = COS( THETA(I) ) * COS( PHI(I) ) + B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) ) + END DO + B12D(IMAX) = SIN( THETA(IMAX) ) + B22D(IMAX) = COS( THETA(IMAX) ) +* +* Abort if not converging; otherwise, increment ITER +* + IF( ITER .GT. MAXIT ) THEN + INFO = 0 + DO I = 1, Q + IF( PHI(I) .NE. ZERO ) + $ INFO = INFO + 1 + END DO + RETURN + END IF +* + ITER = ITER + IMAX - IMIN +* +* Compute shifts +* + THETAMAX = THETA(IMIN) + THETAMIN = THETA(IMIN) + DO I = IMIN+1, IMAX + IF( THETA(I) > THETAMAX ) + $ THETAMAX = THETA(I) + IF( THETA(I) < THETAMIN ) + $ THETAMIN = THETA(I) + END DO +* + IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN +* +* Zero on diagonals of B11 and B22; induce deflation with a +* zero shift +* + MU = ZERO + NU = ONE +* + ELSE IF( THETAMIN .LT. THRESH ) THEN +* +* Zero on diagonals of B12 and B22; induce deflation with a +* zero shift +* + MU = ONE + NU = ZERO +* + ELSE +* +* Compute shifts for B11 and B21 and use the lesser +* + CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11, + $ DUMMY ) + CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21, + $ DUMMY ) +* + IF( SIGMA11 .LE. SIGMA21 ) THEN + MU = SIGMA11 + NU = SQRT( ONE - MU**2 ) + IF( MU .LT. THRESH ) THEN + MU = ZERO + NU = ONE + END IF + ELSE + NU = SIGMA21 + MU = SQRT( 1.0 - NU**2 ) + IF( NU .LT. THRESH ) THEN + MU = ONE + NU = ZERO + END IF + END IF + END IF +* +* Rotate to produce bulges in B11 and B21 +* + IF( MU .LE. NU ) THEN + CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) ) + ELSE + CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) ) + END IF +* + TEMP = WORK(IV1TCS+IMIN-1)*B11D(IMIN) + + $ WORK(IV1TSN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = WORK(IV1TCS+IMIN-1)*B11E(IMIN) - + $ WORK(IV1TSN+IMIN-1)*B11D(IMIN) + B11D(IMIN) = TEMP + B11BULGE = WORK(IV1TSN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B11D(IMIN+1) + TEMP = WORK(IV1TCS+IMIN-1)*B21D(IMIN) + + $ WORK(IV1TSN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = WORK(IV1TCS+IMIN-1)*B21E(IMIN) - + $ WORK(IV1TSN+IMIN-1)*B21D(IMIN) + B21D(IMIN) = TEMP + B21BULGE = WORK(IV1TSN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B21D(IMIN+1) +* +* Compute THETA(IMIN) +* + THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ), + $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) ) +* +* Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) +* + IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN + CALL DLARTGP( B11BULGE, B11D(IMIN), WORK(IU1SN+IMIN-1), + $ WORK(IU1CS+IMIN-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) ) + ELSE + CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) ) + END IF + IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN + CALL DLARTGP( B21BULGE, B21D(IMIN), WORK(IU2SN+IMIN-1), + $ WORK(IU2CS+IMIN-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) ) + ELSE + CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) ) + END IF + WORK(IU2CS+IMIN-1) = -WORK(IU2CS+IMIN-1) + WORK(IU2SN+IMIN-1) = -WORK(IU2SN+IMIN-1) +* + TEMP = WORK(IU1CS+IMIN-1)*B11E(IMIN) + + $ WORK(IU1SN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = WORK(IU1CS+IMIN-1)*B11D(IMIN+1) - + $ WORK(IU1SN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B11BULGE = WORK(IU1SN+IMIN-1)*B11E(IMIN+1) + B11E(IMIN+1) = WORK(IU1CS+IMIN-1)*B11E(IMIN+1) + END IF + TEMP = WORK(IU1CS+IMIN-1)*B12D(IMIN) + + $ WORK(IU1SN+IMIN-1)*B12E(IMIN) + B12E(IMIN) = WORK(IU1CS+IMIN-1)*B12E(IMIN) - + $ WORK(IU1SN+IMIN-1)*B12D(IMIN) + B12D(IMIN) = TEMP + B12BULGE = WORK(IU1SN+IMIN-1)*B12D(IMIN+1) + B12D(IMIN+1) = WORK(IU1CS+IMIN-1)*B12D(IMIN+1) + TEMP = WORK(IU2CS+IMIN-1)*B21E(IMIN) + + $ WORK(IU2SN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = WORK(IU2CS+IMIN-1)*B21D(IMIN+1) - + $ WORK(IU2SN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B21BULGE = WORK(IU2SN+IMIN-1)*B21E(IMIN+1) + B21E(IMIN+1) = WORK(IU2CS+IMIN-1)*B21E(IMIN+1) + END IF + TEMP = WORK(IU2CS+IMIN-1)*B22D(IMIN) + + $ WORK(IU2SN+IMIN-1)*B22E(IMIN) + B22E(IMIN) = WORK(IU2CS+IMIN-1)*B22E(IMIN) - + $ WORK(IU2SN+IMIN-1)*B22D(IMIN) + B22D(IMIN) = TEMP + B22BULGE = WORK(IU2SN+IMIN-1)*B22D(IMIN+1) + B22D(IMIN+1) = WORK(IU2CS+IMIN-1)*B22D(IMIN+1) +* +* Inner loop: chase bulges from B11(IMIN,IMIN+2), +* B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to +* bottom-right +* + DO I = IMIN+1, IMAX-1 +* +* Compute PHI(I-1) +* + X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1) + X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE + Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1) + Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE +* + PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), +* B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL DLARTGP( X2, X1, WORK(IV1TSN+I-1), WORK(IV1TCS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN + CALL DLARTGP( B11BULGE, B11E(I-1), WORK(IV1TSN+I-1), + $ WORK(IV1TCS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL DLARTGP( B21BULGE, B21E(I-1), WORK(IV1TSN+I-1), + $ WORK(IV1TCS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11D(I), B11E(I), MU, WORK(IV1TCS+I-1), + $ WORK(IV1TSN+I-1) ) + ELSE + CALL DLARTGS( B21D(I), B21E(I), NU, WORK(IV1TCS+I-1), + $ WORK(IV1TSN+I-1) ) + END IF + WORK(IV1TCS+I-1) = -WORK(IV1TCS+I-1) + WORK(IV1TSN+I-1) = -WORK(IV1TSN+I-1) + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL DLARTGP( B12BULGE, B12D(I-1), WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22D(I-1), WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B12E(I-1), B12D(I), NU, WORK(IV2TCS+I-1-1), + $ WORK(IV2TSN+I-1-1) ) + ELSE + CALL DLARTGS( B22E(I-1), B22D(I), MU, WORK(IV2TCS+I-1-1), + $ WORK(IV2TSN+I-1-1) ) + END IF +* + TEMP = WORK(IV1TCS+I-1)*B11D(I) + WORK(IV1TSN+I-1)*B11E(I) + B11E(I) = WORK(IV1TCS+I-1)*B11E(I) - + $ WORK(IV1TSN+I-1)*B11D(I) + B11D(I) = TEMP + B11BULGE = WORK(IV1TSN+I-1)*B11D(I+1) + B11D(I+1) = WORK(IV1TCS+I-1)*B11D(I+1) + TEMP = WORK(IV1TCS+I-1)*B21D(I) + WORK(IV1TSN+I-1)*B21E(I) + B21E(I) = WORK(IV1TCS+I-1)*B21E(I) - + $ WORK(IV1TSN+I-1)*B21D(I) + B21D(I) = TEMP + B21BULGE = WORK(IV1TSN+I-1)*B21D(I+1) + B21D(I+1) = WORK(IV1TCS+I-1)*B21D(I+1) + TEMP = WORK(IV2TCS+I-1-1)*B12E(I-1) + + $ WORK(IV2TSN+I-1-1)*B12D(I) + B12D(I) = WORK(IV2TCS+I-1-1)*B12D(I) - + $ WORK(IV2TSN+I-1-1)*B12E(I-1) + B12E(I-1) = TEMP + B12BULGE = WORK(IV2TSN+I-1-1)*B12E(I) + B12E(I) = WORK(IV2TCS+I-1-1)*B12E(I) + TEMP = WORK(IV2TCS+I-1-1)*B22E(I-1) + + $ WORK(IV2TSN+I-1-1)*B22D(I) + B22D(I) = WORK(IV2TCS+I-1-1)*B22D(I) - + $ WORK(IV2TSN+I-1-1)*B22E(I-1) + B22E(I-1) = TEMP + B22BULGE = WORK(IV2TSN+I-1-1)*B22E(I) + B22E(I) = WORK(IV2TCS+I-1-1)*B22E(I) +* +* Compute THETA(I) +* + X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1) + X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE + Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1) + Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE +* + THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), +* B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL DLARTGP( X2, X1, WORK(IU1SN+I-1), WORK(IU1CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN + CALL DLARTGP( B11BULGE, B11D(I), WORK(IU1SN+I-1), + $ WORK(IU1CS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL DLARTGP( B12BULGE, B12E(I-1), WORK(IU1SN+I-1), + $ WORK(IU1CS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11E(I), B11D(I+1), MU, WORK(IU1CS+I-1), + $ WORK(IU1SN+I-1) ) + ELSE + CALL DLARTGS( B12D(I), B12E(I), NU, WORK(IU1CS+I-1), + $ WORK(IU1SN+I-1) ) + END IF + IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, WORK(IU2SN+I-1), WORK(IU2CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN + CALL DLARTGP( B21BULGE, B21D(I), WORK(IU2SN+I-1), + $ WORK(IU2CS+I-1), R ) + ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1), + $ WORK(IU2CS+I-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1), + $ WORK(IU2SN+I-1) ) + ELSE + CALL DLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1), + $ WORK(IU2SN+I-1) ) + END IF + WORK(IU2CS+I-1) = -WORK(IU2CS+I-1) + WORK(IU2SN+I-1) = -WORK(IU2SN+I-1) +* + TEMP = WORK(IU1CS+I-1)*B11E(I) + WORK(IU1SN+I-1)*B11D(I+1) + B11D(I+1) = WORK(IU1CS+I-1)*B11D(I+1) - + $ WORK(IU1SN+I-1)*B11E(I) + B11E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B11BULGE = WORK(IU1SN+I-1)*B11E(I+1) + B11E(I+1) = WORK(IU1CS+I-1)*B11E(I+1) + END IF + TEMP = WORK(IU2CS+I-1)*B21E(I) + WORK(IU2SN+I-1)*B21D(I+1) + B21D(I+1) = WORK(IU2CS+I-1)*B21D(I+1) - + $ WORK(IU2SN+I-1)*B21E(I) + B21E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B21BULGE = WORK(IU2SN+I-1)*B21E(I+1) + B21E(I+1) = WORK(IU2CS+I-1)*B21E(I+1) + END IF + TEMP = WORK(IU1CS+I-1)*B12D(I) + WORK(IU1SN+I-1)*B12E(I) + B12E(I) = WORK(IU1CS+I-1)*B12E(I) - WORK(IU1SN+I-1)*B12D(I) + B12D(I) = TEMP + B12BULGE = WORK(IU1SN+I-1)*B12D(I+1) + B12D(I+1) = WORK(IU1CS+I-1)*B12D(I+1) + TEMP = WORK(IU2CS+I-1)*B22D(I) + WORK(IU2SN+I-1)*B22E(I) + B22E(I) = WORK(IU2CS+I-1)*B22E(I) - WORK(IU2SN+I-1)*B22D(I) + B22D(I) = TEMP + B22BULGE = WORK(IU2SN+I-1)*B22D(I+1) + B22D(I+1) = WORK(IU2CS+I-1)*B22D(I+1) +* + END DO +* +* Compute PHI(IMAX-1) +* + X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) + + $ COS(THETA(IMAX-1))*B21E(IMAX-1) + Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) + + $ COS(THETA(IMAX-1))*B22D(IMAX-1) + Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE +* + PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) ) +* +* Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) +* + RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2 +* + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL DLARTGP( B12BULGE, B12D(IMAX-1), WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22D(IMAX-1), WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU, + $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) ) + ELSE + CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU, + $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) ) + END IF +* + TEMP = WORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) + + $ WORK(IV2TSN+IMAX-1-1)*B12D(IMAX) + B12D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B12D(IMAX) - + $ WORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1) + B12E(IMAX-1) = TEMP + TEMP = WORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) + + $ WORK(IV2TSN+IMAX-1-1)*B22D(IMAX) + B22D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B22D(IMAX) - + $ WORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1) + B22E(IMAX-1) = TEMP +* +* Update singular vectors +* + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL DLASR( 'R', 'V', 'F', P, IMAX-IMIN+1, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1), + $ U1(1,IMIN), LDU1 ) + ELSE + CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, P, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1), + $ U1(IMIN,1), LDU1 ) + END IF + END IF + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL DLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1), + $ U2(1,IMIN), LDU2 ) + ELSE + CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1), + $ U2(IMIN,1), LDU2 ) + END IF + END IF + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1), + $ V1T(IMIN,1), LDV1T ) + ELSE + CALL DLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1), + $ V1T(1,IMIN), LDV1T ) + END IF + END IF + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL DLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q, + $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1), + $ V2T(IMIN,1), LDV2T ) + ELSE + CALL DLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1, + $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1), + $ V2T(1,IMIN), LDV2T ) + END IF + END IF +* +* Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) +* + IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN + B11D(IMAX) = -B11D(IMAX) + B21D(IMAX) = -B21D(IMAX) + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL DSCAL( Q, NEGONE, V1T(IMAX,1), LDV1T ) + ELSE + CALL DSCAL( Q, NEGONE, V1T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Compute THETA(IMAX) +* + X1 = COS(PHI(IMAX-1))*B11D(IMAX) + + $ SIN(PHI(IMAX-1))*B12E(IMAX-1) + Y1 = COS(PHI(IMAX-1))*B21D(IMAX) + + $ SIN(PHI(IMAX-1))*B22E(IMAX-1) +* + THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) ) +* +* Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), +* and B22(IMAX,IMAX-1) +* + IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN + B12D(IMAX) = -B12D(IMAX) + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL DSCAL( P, NEGONE, U1(1,IMAX), 1 ) + ELSE + CALL DSCAL( P, NEGONE, U1(IMAX,1), LDU1 ) + END IF + END IF + END IF + IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN + B22D(IMAX) = -B22D(IMAX) + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL DSCAL( M-P, NEGONE, U2(1,IMAX), 1 ) + ELSE + CALL DSCAL( M-P, NEGONE, U2(IMAX,1), LDU2 ) + END IF + END IF + END IF +* +* Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) +* + IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL DSCAL( M-Q, NEGONE, V2T(IMAX,1), LDV2T ) + ELSE + CALL DSCAL( M-Q, NEGONE, V2T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Test for negligible sines or cosines +* + DO I = IMIN, IMAX + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = IMIN, IMAX-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Deflate +* + IF (IMAX .GT. 1) THEN + DO WHILE( PHI(IMAX-1) .EQ. ZERO ) + IMAX = IMAX - 1 + IF (IMAX .LE. 1) EXIT + END DO + END IF + IF( IMIN .GT. IMAX - 1 ) + $ IMIN = IMAX - 1 + IF (IMIN .GT. 1) THEN + DO WHILE (PHI(IMIN-1) .NE. ZERO) + IMIN = IMIN - 1 + IF (IMIN .LE. 1) EXIT + END DO + END IF +* +* Repeat main iteration loop +* + END DO +* +* Postprocessing: order THETA from least to greatest +* + DO I = 1, Q +* + MINI = I + THETAMIN = THETA(I) + DO J = I+1, Q + IF( THETA(J) .LT. THETAMIN ) THEN + MINI = J + THETAMIN = THETA(J) + END IF + END DO +* + IF( MINI .NE. I ) THEN + THETA(MINI) = THETA(I) + THETA(I) = THETAMIN + IF( COLMAJOR ) THEN + IF( WANTU1 ) + $ CALL DSWAP( P, U1(1,I), 1, U1(1,MINI), 1 ) + IF( WANTU2 ) + $ CALL DSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 ) + IF( WANTV1T ) + $ CALL DSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T ) + IF( WANTV2T ) + $ CALL DSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1), + $ LDV2T ) + ELSE + IF( WANTU1 ) + $ CALL DSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 ) + IF( WANTU2 ) + $ CALL DSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 ) + IF( WANTV1T ) + $ CALL DSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 ) + IF( WANTV2T ) + $ CALL DSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 ) + END IF + END IF +* + END DO +* + RETURN +* +* End of DBBCSD +* + END + diff --git a/dspl/liblapack/SRC/dbdsdc.f b/dspl/liblapack/SRC/dbdsdc.f new file mode 100644 index 0000000..4c8b95f --- /dev/null +++ b/dspl/liblapack/SRC/dbdsdc.f @@ -0,0 +1,524 @@ +*> \brief \b DBDSDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DBDSDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, UPLO +* INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. +* INTEGER IQ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), Q( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DBDSDC computes the singular value decomposition (SVD) of a real +*> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, +*> using a divide and conquer method, where S is a diagonal matrix +*> with non-negative diagonal elements (the singular values of B), and +*> U and VT are orthogonal matrices of left and right singular vectors, +*> respectively. DBDSDC can be used to compute all singular values, +*> and optionally, singular vectors or singular vectors in compact form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See DLASD3 for details. +*> +*> The code currently calls DLASDQ if singular values only are desired. +*> However, it can be slightly modified to compute singular values +*> using the divide and conquer method. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal. +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> Specifies whether singular vectors are to be computed +*> as follows: +*> = 'N': Compute singular values only; +*> = 'P': Compute singular values and compute singular +*> vectors in compact form; +*> = 'I': Compute singular values and singular vectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the elements of E contain the offdiagonal +*> elements of the bidiagonal matrix whose SVD is desired. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,N) +*> If COMPQ = 'I', then: +*> On exit, if INFO = 0, U contains the left singular vectors +*> of the bidiagonal matrix. +*> For other values of COMPQ, U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1. +*> If singular vectors are desired, then LDU >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT,N) +*> If COMPQ = 'I', then: +*> On exit, if INFO = 0, VT**T contains the right singular +*> vectors of the bidiagonal matrix. +*> For other values of COMPQ, VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1. +*> If singular vectors are desired, then LDVT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ) +*> If COMPQ = 'P', then: +*> On exit, if INFO = 0, Q and IQ contain the left +*> and right singular vectors in a compact form, +*> requiring O(N log N) space instead of 2*N**2. +*> In particular, Q contains all the DOUBLE PRECISION data in +*> LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) +*> words of memory, where SMLSIZ is returned by ILAENV and +*> is equal to the maximum size of the subproblems at the +*> bottom of the computation tree (usually about 25). +*> For other values of COMPQ, Q is not referenced. +*> \endverbatim +*> +*> \param[out] IQ +*> \verbatim +*> IQ is INTEGER array, dimension (LDIQ) +*> If COMPQ = 'P', then: +*> On exit, if INFO = 0, Q and IQ contain the left +*> and right singular vectors in a compact form, +*> requiring O(N log N) space instead of 2*N**2. +*> In particular, IQ contains all INTEGER data in +*> LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) +*> words of memory, where SMLSIZ is returned by ILAENV and +*> is equal to the maximum size of the subproblems at the +*> bottom of the computation tree (usually about 25). +*> For other values of COMPQ, IQ is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> If COMPQ = 'N' then LWORK >= (4 * N). +*> If COMPQ = 'P' then LWORK >= (6 * N). +*> If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value. +*> The update process of divide and conquer failed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, UPLO + INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. + INTEGER IQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), Q( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* Changed dimension statement in comment describing E from (N) to +* (N-1). Sven, 17 Feb 05. +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC, + $ ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK, + $ MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ, + $ SMLSZP, SQRE, START, WSTART, Z + DOUBLE PRECISION CS, EPS, ORGNRM, P, R, SN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLARTG, DLASCL, DLASD0, DLASDA, DLASDQ, + $ DLASET, DLASR, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( LSAME( COMPQ, 'N' ) ) THEN + ICOMPQ = 0 + ELSE IF( LSAME( COMPQ, 'P' ) ) THEN + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ICOMPQ = 2 + ELSE + ICOMPQ = -1 + END IF + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT. + $ N ) ) ) THEN + INFO = -7 + ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT. + $ N ) ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSDC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + SMLSIZ = ILAENV( 9, 'DBDSDC', ' ', 0, 0, 0, 0 ) + IF( N.EQ.1 ) THEN + IF( ICOMPQ.EQ.1 ) THEN + Q( 1 ) = SIGN( ONE, D( 1 ) ) + Q( 1+SMLSIZ*N ) = ONE + ELSE IF( ICOMPQ.EQ.2 ) THEN + U( 1, 1 ) = SIGN( ONE, D( 1 ) ) + VT( 1, 1 ) = ONE + END IF + D( 1 ) = ABS( D( 1 ) ) + RETURN + END IF + NM1 = N - 1 +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + WSTART = 1 + QSTART = 3 + IF( ICOMPQ.EQ.1 ) THEN + CALL DCOPY( N, D, 1, Q( 1 ), 1 ) + CALL DCOPY( N-1, E, 1, Q( N+1 ), 1 ) + END IF + IF( IUPLO.EQ.2 ) THEN + QSTART = 5 + IF( ICOMPQ .EQ. 2 ) WSTART = 2*N - 1 + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ICOMPQ.EQ.1 ) THEN + Q( I+2*N ) = CS + Q( I+3*N ) = SN + ELSE IF( ICOMPQ.EQ.2 ) THEN + WORK( I ) = CS + WORK( NM1+I ) = -SN + END IF + 10 CONTINUE + END IF +* +* If ICOMPQ = 0, use DLASDQ to compute the singular values. +* + IF( ICOMPQ.EQ.0 ) THEN +* Ignore WSTART, instead using WORK( 1 ), since the two vectors +* for CS and -SN above are added only if ICOMPQ == 2, +* and adding them exceeds documented WORK size of 4*n. + CALL DLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( 1 ), INFO ) + GO TO 40 + END IF +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + ELSE IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = IU + N + CALL DLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ), + $ N ) + CALL DLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ), + $ N ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, + $ Q( IVT+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ), + $ INFO ) + END IF + GO TO 40 + END IF +* + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + END IF +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR ) +* + EPS = (0.9D+0)*DLAMCH( 'Epsilon' ) +* + MLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 + SMLSZP = SMLSIZ + 1 +* + IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = 1 + SMLSIZ + DIFL = IVT + SMLSZP + DIFR = DIFL + MLVL + Z = DIFR + MLVL*2 + IC = Z + MLVL + IS = IC + 1 + POLES = IS + 1 + GIVNUM = POLES + 2*MLVL +* + K = 1 + GIVPTR = 2 + PERM = 3 + GIVCOL = PERM + MLVL + END IF +* + DO 20 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 20 CONTINUE +* + START = 1 + SQRE = 0 +* + DO 30 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - START + 1 + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - START + 1 + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem +* first. +* + NSIZE = I - START + 1 + IF( ICOMPQ.EQ.2 ) THEN + U( N, N ) = SIGN( ONE, D( N ) ) + VT( N, N ) = ONE + ELSE IF( ICOMPQ.EQ.1 ) THEN + Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) ) + Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE + END IF + D( N ) = ABS( D( N ) ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL DLASD0( NSIZE, SQRE, D( START ), E( START ), + $ U( START, START ), LDU, VT( START, START ), + $ LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO ) + ELSE + CALL DLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ), + $ E( START ), Q( START+( IU+QSTART-2 )*N ), N, + $ Q( START+( IVT+QSTART-2 )*N ), + $ IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )* + $ N ), Q( START+( DIFR+QSTART-2 )*N ), + $ Q( START+( Z+QSTART-2 )*N ), + $ Q( START+( POLES+QSTART-2 )*N ), + $ IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ), + $ N, IQ( START+PERM*N ), + $ Q( START+( GIVNUM+QSTART-2 )*N ), + $ Q( START+( IC+QSTART-2 )*N ), + $ Q( START+( IS+QSTART-2 )*N ), + $ WORK( WSTART ), IWORK, INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + START = I + 1 + END IF + 30 CONTINUE +* +* Unscale +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR ) + 40 CONTINUE +* +* Use Selection Sort to minimize swaps of singular vectors +* + DO 60 II = 2, N + I = II - 1 + KK = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).GT.P ) THEN + KK = J + P = D( J ) + END IF + 50 CONTINUE + IF( KK.NE.I ) THEN + D( KK ) = D( I ) + D( I ) = P + IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = KK + ELSE IF( ICOMPQ.EQ.2 ) THEN + CALL DSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 ) + CALL DSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT ) + END IF + ELSE IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = I + END IF + 60 CONTINUE +* +* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO +* + IF( ICOMPQ.EQ.1 ) THEN + IF( IUPLO.EQ.1 ) THEN + IQ( N ) = 1 + ELSE + IQ( N ) = 0 + END IF + END IF +* +* If B is lower bidiagonal, update U by those Givens rotations +* which rotated B to be upper bidiagonal +* + IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) ) + $ CALL DLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU ) +* + RETURN +* +* End of DBDSDC +* + END diff --git a/dspl/liblapack/SRC/dbdsqr.f b/dspl/liblapack/SRC/dbdsqr.f new file mode 100644 index 0000000..93db95e --- /dev/null +++ b/dspl/liblapack/SRC/dbdsqr.f @@ -0,0 +1,867 @@ +*> \brief \b DBDSQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DBDSQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, +* LDU, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DBDSQR computes the singular values and, optionally, the right and/or +*> left singular vectors from the singular value decomposition (SVD) of +*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +*> zero-shift QR algorithm. The SVD of B has the form +*> +*> B = Q * S * P**T +*> +*> where S is the diagonal matrix of singular values, Q is an orthogonal +*> matrix of left singular vectors, and P is an orthogonal matrix of +*> right singular vectors. If left singular vectors are requested, this +*> subroutine actually returns U*Q instead of Q, and, if right singular +*> vectors are requested, this subroutine returns P**T*VT instead of +*> P**T, for given real input matrices U and VT. When U and VT are the +*> orthogonal matrices that reduce a general matrix A to bidiagonal +*> form: A = U*B*VT, as computed by DGEBRD, then +*> +*> A = (U*Q) * S * (P**T*VT) +*> +*> is the SVD of A. Optionally, the subroutine may also compute Q**T*C +*> for a given real input matrix C. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +*> no. 5, pp. 873-912, Sept 1990) and +*> "Accurate singular values and differential qd algorithms," by +*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +*> Department, University of California at Berkeley, July 1992 +*> for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> The number of columns of the matrix VT. NCVT >= 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> The number of rows of the matrix U. NRU >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B in decreasing +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the N-1 offdiagonal elements of the bidiagonal +*> matrix B. +*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +*> will contain the diagonal and superdiagonal elements of a +*> bidiagonal matrix orthogonally equivalent to the one given +*> as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) +*> On entry, an N-by-NCVT matrix VT. +*> On exit, VT is overwritten by P**T * VT. +*> Not referenced if NCVT = 0. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. +*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, N) +*> On entry, an NRU-by-N matrix U. +*> On exit, U is overwritten by U * Q. +*> Not referenced if NRU = 0. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,NRU). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, NCC) +*> On entry, an N-by-NCC matrix C. +*> On exit, C is overwritten by Q**T * C. +*> Not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0: +*> if NCVT = NRU = NCC = 0, +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 30*N +*> iterations (in inner while loop) +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> else NCVT = NRU = NCC = 0, +*> the algorithm did not converge; D and E contain the +*> elements of a bidiagonal matrix which is orthogonally +*> similar to the input matrix B; if INFO = i, i +*> elements of E have not converged to zero. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> If it is positive, TOLMUL*EPS is the desired relative +*> precision in the computed singular values. +*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the +*> desired absolute accuracy in the computed singular +*> values (corresponds to relative accuracy +*> abs(TOLMUL*EPS) in the largest singular value. +*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably +*> between 10 (for fast convergence) and .1/EPS +*> (for there to be some accuracy in the results). +*> Default is to lose at either one eighth or 2 of the +*> available decimal digits in each computed singular value +*> (whichever is smaller). +*> +*> MAXITR INTEGER, default = 6 +*> MAXITR controls the maximum number of passes of the +*> algorithm through its inner loop. The algorithms stops +*> (and so fails to converge) if the number of passes +*> through the inner loop exceeds MAXITR*N**2. +*> +*> \endverbatim +* +*> \par Note: +* =========== +*> +*> \verbatim +*> Bug report from Cezary Dendek. +*> On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is +*> removed since it can overflow pretty easily (for N larger or equal +*> than 18,919). We instead use MAXITDIVN = MAXITR*N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) + DOUBLE PRECISION HNDRTH + PARAMETER ( HNDRTH = 0.01D0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D0 ) + DOUBLE PRECISION HNDRD + PARAMETER ( HNDRD = 100.0D0 ) + DOUBLE PRECISION MEIGTH + PARAMETER ( MEIGTH = -0.125D0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M, + $ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM + DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT, + $ DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL DLASQ1( N, D, E, WORK, INFO ) +* +* If INFO equals 2, dqds didn't finish, try to finish +* + IF( INFO .NE. 2 ) RETURN + INFO = 0 + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( DBLE( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXITDIVN = MAXITR*N + ITERDIVN = 0 + ITER = -1 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 +* + IF( ITER.GE.N ) THEN + ITER = ITER - N + ITERDIVN = ITERDIVN + 1 + IF( ITERDIVN.GE.MAXITDIVN ) + $ GO TO 200 + END IF +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL DLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of DBDSQR +* + END diff --git a/dspl/liblapack/SRC/dbdsvdx.f b/dspl/liblapack/SRC/dbdsvdx.f new file mode 100644 index 0000000..96fdb3d --- /dev/null +++ b/dspl/liblapack/SRC/dbdsvdx.f @@ -0,0 +1,792 @@ +*> \brief \b DBDSVDX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DBDSVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* $ NS, S, Z, LDZ, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, N, NS +* DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), S( * ), WORK( * ), +* Z( LDZ, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DBDSVDX computes the singular value decomposition (SVD) of a real +*> N-by-N (upper or lower) bidiagonal matrix B, B = U * S * VT, +*> where S is a diagonal matrix with non-negative diagonal elements +*> (the singular values of B), and U and VT are orthogonal matrices +*> of left and right singular vectors, respectively. +*> +*> Given an upper bidiagonal B with diagonal D = [ d_1 d_2 ... d_N ] +*> and superdiagonal E = [ e_1 e_2 ... e_N-1 ], DBDSVDX computes the +*> singular value decompositon of B through the eigenvalues and +*> eigenvectors of the N*2-by-N*2 tridiagonal matrix +*> +*> | 0 d_1 | +*> | d_1 0 e_1 | +*> TGK = | e_1 0 d_2 | +*> | d_2 . . | +*> | . . . | +*> +*> If (s,u,v) is a singular triplet of B with ||u|| = ||v|| = 1, then +*> (+/-s,q), ||q|| = 1, are eigenpairs of TGK, with q = P * ( u' +/-v' ) / +*> sqrt(2) = ( v_1 u_1 v_2 u_2 ... v_n u_n ) / sqrt(2), and +*> P = [ e_{n+1} e_{1} e_{n+2} e_{2} ... ]. +*> +*> Given a TGK matrix, one can either a) compute -s,-v and change signs +*> so that the singular values (and corresponding vectors) are already in +*> descending order (as in DGESVD/DGESDD) or b) compute s,v and reorder +*> the values (and corresponding vectors). DBDSVDX implements a) by +*> calling DSTEVX (bisection plus inverse iteration, to be replaced +*> with a version of the Multiple Relative Robust Representation +*> algorithm. (See P. Willems and B. Lang, A framework for the MR^3 +*> algorithm: theory and implementation, SIAM J. Sci. Comput., +*> 35:740-766, 2013.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute singular values only; +*> = 'V': Compute singular values and singular vectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval [VL,VU) +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (max(1,N-1)) +*> The (n-1) superdiagonal elements of the bidiagonal matrix +*> B in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found. 0 <= NS <= N. +*> If RANGE = 'A', NS = N, and if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The first NS elements contain the selected singular values in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (2*N,K) ) +*> If JOBZ = 'V', then if INFO = 0 the first NS columns of Z +*> contain the singular vectors of the matrix B corresponding to +*> the selected singular values, with U in rows 1 to N and V +*> in rows N+1 to N*2, i.e. +*> Z = [ U ] +*> [ V ] +*> If JOBZ = 'N', then Z is not referenced. +*> Note: The user must ensure that at least K = NS+1 columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of +*> NS is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(2,N*2). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (14*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*N) +*> If JOBZ = 'V', then if INFO = 0, the first NS elements of +*> IWORK are zero. If INFO > 0, then IWORK contains the indices +*> of the eigenvectors that failed to converge in DSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in DSTEVX. The indices of the eigenvectors +*> (as returned by DSTEVX) are stored in the +*> array IWORK. +*> if INFO = N*2 + 1, an internal error occurred. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ NS, S, Z, LDZ, WORK, IWORK, INFO) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, N, NS + DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), S( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TEN, HNDRD, MEIGTH + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TEN = 10.0D0, + $ HNDRD = 100.0D0, MEIGTH = -0.1250D0 ) + DOUBLE PRECISION FUDGE + PARAMETER ( FUDGE = 2.0D0 ) +* .. +* .. Local Scalars .. + CHARACTER RNGVX + LOGICAL ALLSV, INDSV, LOWER, SPLIT, SVEQ0, VALSV, WANTZ + INTEGER I, ICOLZ, IDBEG, IDEND, IDTGK, IDPTR, IEPTR, + $ IETGK, IIFAIL, IIWORK, ILTGK, IROWU, IROWV, + $ IROWZ, ISBEG, ISPLT, ITEMP, IUTGK, J, K, + $ NTGK, NRU, NRV, NSL + DOUBLE PRECISION ABSTOL, EPS, EMIN, MU, NRMU, NRMV, ORTOL, SMAX, + $ SMIN, SQRT2, THRESH, TOL, ULP, + $ VLTGK, VUTGK, ZJTJI +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH, DNRM2 + EXTERNAL IDAMAX, LSAME, DAXPY, DDOT, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DSTEVX, DCOPY, DLASET, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + ALLSV = LSAME( RANGE, 'A' ) + VALSV = LSAME( RANGE, 'V' ) + INDSV = LSAME( RANGE, 'I' ) + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLSV .OR. VALSV .OR. INDSV ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N.GT.0 ) THEN + IF( VALSV ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -7 + ELSE IF( VU.LE.VL ) THEN + INFO = -8 + END IF + ELSE IF( INDSV ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N*2 ) ) INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DBDSVDX', -INFO ) + RETURN + END IF +* +* Quick return if possible (N.LE.1) +* + NS = 0 + IF( N.EQ.0 ) RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLSV .OR. INDSV ) THEN + NS = 1 + S( 1 ) = ABS( D( 1 ) ) + ELSE + IF( VL.LT.ABS( D( 1 ) ) .AND. VU.GE.ABS( D( 1 ) ) ) THEN + NS = 1 + S( 1 ) = ABS( D( 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = SIGN( ONE, D( 1 ) ) + Z( 2, 1 ) = ONE + END IF + RETURN + END IF +* + ABSTOL = 2*DLAMCH( 'Safe Minimum' ) + ULP = DLAMCH( 'Precision' ) + EPS = DLAMCH( 'Epsilon' ) + SQRT2 = SQRT( 2.0D0 ) + ORTOL = SQRT( ULP ) +* +* Criterion for splitting is taken from DBDSQR when singular +* values are computed to relative accuracy TOL. (See J. Demmel and +* W. Kahan, Accurate singular values of bidiagonal matrices, SIAM +* J. Sci. and Stat. Comput., 11:873–912, 1990.) +* + TOL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )*EPS +* +* Compute approximate maximum, minimum singular values. +* + I = IDAMAX( N, D, 1 ) + SMAX = ABS( D( I ) ) + I = IDAMAX( N-1, E, 1 ) + SMAX = MAX( SMAX, ABS( E( I ) ) ) +* +* Compute threshold for neglecting D's and E's. +* + SMIN = ABS( D( 1 ) ) + IF( SMIN.NE.ZERO ) THEN + MU = SMIN + DO I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMIN = MIN( SMIN, MU ) + IF( SMIN.EQ.ZERO ) EXIT + END DO + END IF + SMIN = SMIN / SQRT( DBLE( N ) ) + THRESH = TOL*SMIN +* +* Check for zeros in D and E (splits), i.e. submatrices. +* + DO I = 1, N-1 + IF( ABS( D( I ) ).LE.THRESH ) D( I ) = ZERO + IF( ABS( E( I ) ).LE.THRESH ) E( I ) = ZERO + END DO + IF( ABS( D( N ) ).LE.THRESH ) D( N ) = ZERO +* +* Pointers for arrays used by DSTEVX. +* + IDTGK = 1 + IETGK = IDTGK + N*2 + ITEMP = IETGK + N*2 + IIFAIL = 1 + IIWORK = IIFAIL + N*2 +* +* Set RNGVX, which corresponds to RANGE for DSTEVX in TGK mode. +* VL,VU or IL,IU are redefined to conform to implementation a) +* described in the leading comments. +* + ILTGK = 0 + IUTGK = 0 + VLTGK = ZERO + VUTGK = ZERO +* + IF( ALLSV ) THEN +* +* All singular values will be found. We aim at -s (see +* leading comments) with RNGVX = 'I'. IL and IU are set +* later (as ILTGK and IUTGK) according to the dimension +* of the active submatrix. +* + RNGVX = 'I' + IF( WANTZ ) CALL DLASET( 'F', N*2, N+1, ZERO, ZERO, Z, LDZ ) + ELSE IF( VALSV ) THEN +* +* Find singular values in a half-open interval. We aim +* at -s (see leading comments) and we swap VL and VU +* (as VUTGK and VLTGK), changing their signs. +* + RNGVX = 'V' + VLTGK = -VU + VUTGK = -VL + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL DCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL DSTEVX( 'N', 'V', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VLTGK, VUTGK, ILTGK, ILTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + IF( NS.EQ.0 ) THEN + RETURN + ELSE + IF( WANTZ ) CALL DLASET( 'F', N*2, NS, ZERO, ZERO, Z, LDZ ) + END IF + ELSE IF( INDSV ) THEN +* +* Find the IL-th through the IU-th singular values. We aim +* at -s (see leading comments) and indices are mapped into +* values, therefore mimicking DSTEBZ, where +* +* GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN +* GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* + ILTGK = IL + IUTGK = IU + RNGVX = 'V' + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL DCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL DSTEVX( 'N', 'I', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VLTGK, VLTGK, ILTGK, ILTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + VLTGK = S( 1 ) - FUDGE*SMAX*ULP*N + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL DCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL DSTEVX( 'N', 'I', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VUTGK, VUTGK, IUTGK, IUTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + VUTGK = S( 1 ) + FUDGE*SMAX*ULP*N + VUTGK = MIN( VUTGK, ZERO ) +* +* If VLTGK=VUTGK, DSTEVX returns an error message, +* so if needed we change VUTGK slightly. +* + IF( VLTGK.EQ.VUTGK ) VLTGK = VLTGK - TOL +* + IF( WANTZ ) CALL DLASET( 'F', N*2, IU-IL+1, ZERO, ZERO, Z, LDZ) + END IF +* +* Initialize variables and pointers for S, Z, and WORK. +* +* NRU, NRV: number of rows in U and V for the active submatrix +* IDBEG, ISBEG: offsets for the entries of D and S +* IROWZ, ICOLZ: offsets for the rows and columns of Z +* IROWU, IROWV: offsets for the rows of U and V +* + NS = 0 + NRU = 0 + NRV = 0 + IDBEG = 1 + ISBEG = 1 + IROWZ = 1 + ICOLZ = 1 + IROWU = 2 + IROWV = 1 + SPLIT = .FALSE. + SVEQ0 = .FALSE. +* +* Form the tridiagonal TGK matrix. +* + S( 1:N ) = ZERO + WORK( IETGK+2*N-1 ) = ZERO + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL DCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) +* +* +* Check for splits in two levels, outer level +* in E and inner level in D. +* + DO IEPTR = 2, N*2, 2 + IF( WORK( IETGK+IEPTR-1 ).EQ.ZERO ) THEN +* +* Split in E (this piece of B is square) or bottom +* of the (input bidiagonal) matrix. +* + ISPLT = IDBEG + IDEND = IEPTR - 1 + DO IDPTR = IDBEG, IDEND, 2 + IF( WORK( IETGK+IDPTR-1 ).EQ.ZERO ) THEN +* +* Split in D (rectangular submatrix). Set the number +* of rows in U and V (NRU and NRV) accordingly. +* + IF( IDPTR.EQ.IDBEG ) THEN +* +* D=0 at the top. +* + SVEQ0 = .TRUE. + IF( IDBEG.EQ.IDEND) THEN + NRU = 1 + NRV = 1 + END IF + ELSE IF( IDPTR.EQ.IDEND ) THEN +* +* D=0 at the bottom. +* + SVEQ0 = .TRUE. + NRU = (IDEND-ISPLT)/2 + 1 + NRV = NRU + IF( ISPLT.NE.IDBEG ) THEN + NRU = NRU + 1 + END IF + ELSE + IF( ISPLT.EQ.IDBEG ) THEN +* +* Split: top rectangular submatrix. +* + NRU = (IDPTR-IDBEG)/2 + NRV = NRU + 1 + ELSE +* +* Split: middle square submatrix. +* + NRU = (IDPTR-ISPLT)/2 + 1 + NRV = NRU + END IF + END IF + ELSE IF( IDPTR.EQ.IDEND ) THEN +* +* Last entry of D in the active submatrix. +* + IF( ISPLT.EQ.IDBEG ) THEN +* +* No split (trivial case). +* + NRU = (IDEND-IDBEG)/2 + 1 + NRV = NRU + ELSE +* +* Split: bottom rectangular submatrix. +* + NRV = (IDEND-ISPLT)/2 + 1 + NRU = NRV + 1 + END IF + END IF +* + NTGK = NRU + NRV +* + IF( NTGK.GT.0 ) THEN +* +* Compute eigenvalues/vectors of the active +* submatrix according to RANGE: +* if RANGE='A' (ALLSV) then RNGVX = 'I' +* if RANGE='V' (VALSV) then RNGVX = 'V' +* if RANGE='I' (INDSV) then RNGVX = 'V' +* + ILTGK = 1 + IUTGK = NTGK / 2 + IF( ALLSV .OR. VUTGK.EQ.ZERO ) THEN + IF( SVEQ0 .OR. + $ SMIN.LT.EPS .OR. + $ MOD(NTGK,2).GT.0 ) THEN +* Special case: eigenvalue equal to zero or very +* small, additional eigenvector is needed. + IUTGK = IUTGK + 1 + END IF + END IF +* +* Workspace needed by DSTEVX: +* WORK( ITEMP: ): 2*5*NTGK +* IWORK( 1: ): 2*6*NTGK +* + CALL DSTEVX( JOBZ, RNGVX, NTGK, WORK( IDTGK+ISPLT-1 ), + $ WORK( IETGK+ISPLT-1 ), VLTGK, VUTGK, + $ ILTGK, IUTGK, ABSTOL, NSL, S( ISBEG ), + $ Z( IROWZ,ICOLZ ), LDZ, WORK( ITEMP ), + $ IWORK( IIWORK ), IWORK( IIFAIL ), + $ INFO ) + IF( INFO.NE.0 ) THEN +* Exit with the error code from DSTEVX. + RETURN + END IF + EMIN = ABS( MAXVAL( S( ISBEG:ISBEG+NSL-1 ) ) ) +* + IF( NSL.GT.0 .AND. WANTZ ) THEN +* +* Normalize u=Z([2,4,...],:) and v=Z([1,3,...],:), +* changing the sign of v as discussed in the leading +* comments. The norms of u and v may be (slightly) +* different from 1/sqrt(2) if the corresponding +* eigenvalues are very small or too close. We check +* those norms and, if needed, reorthogonalize the +* vectors. +* + IF( NSL.GT.1 .AND. + $ VUTGK.EQ.ZERO .AND. + $ MOD(NTGK,2).EQ.0 .AND. + $ EMIN.EQ.0 .AND. .NOT.SPLIT ) THEN +* +* D=0 at the top or bottom of the active submatrix: +* one eigenvalue is equal to zero; concatenate the +* eigenvectors corresponding to the two smallest +* eigenvalues. +* + Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) = + $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) + + $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) + Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) = + $ ZERO +* IF( IUTGK*2.GT.NTGK ) THEN +* Eigenvalue equal to zero or very small. +* NSL = NSL - 1 +* END IF + END IF +* + DO I = 0, MIN( NSL-1, NRU-1 ) + NRMU = DNRM2( NRU, Z( IROWU, ICOLZ+I ), 2 ) + IF( NRMU.EQ.ZERO ) THEN + INFO = N*2 + 1 + RETURN + END IF + CALL DSCAL( NRU, ONE/NRMU, + $ Z( IROWU,ICOLZ+I ), 2 ) + IF( NRMU.NE.ONE .AND. + $ ABS( NRMU-ORTOL )*SQRT2.GT.ONE ) + $ THEN + DO J = 0, I-1 + ZJTJI = -DDOT( NRU, Z( IROWU, ICOLZ+J ), + $ 2, Z( IROWU, ICOLZ+I ), 2 ) + CALL DAXPY( NRU, ZJTJI, + $ Z( IROWU, ICOLZ+J ), 2, + $ Z( IROWU, ICOLZ+I ), 2 ) + END DO + NRMU = DNRM2( NRU, Z( IROWU, ICOLZ+I ), 2 ) + CALL DSCAL( NRU, ONE/NRMU, + $ Z( IROWU,ICOLZ+I ), 2 ) + END IF + END DO + DO I = 0, MIN( NSL-1, NRV-1 ) + NRMV = DNRM2( NRV, Z( IROWV, ICOLZ+I ), 2 ) + IF( NRMV.EQ.ZERO ) THEN + INFO = N*2 + 1 + RETURN + END IF + CALL DSCAL( NRV, -ONE/NRMV, + $ Z( IROWV,ICOLZ+I ), 2 ) + IF( NRMV.NE.ONE .AND. + $ ABS( NRMV-ORTOL )*SQRT2.GT.ONE ) + $ THEN + DO J = 0, I-1 + ZJTJI = -DDOT( NRV, Z( IROWV, ICOLZ+J ), + $ 2, Z( IROWV, ICOLZ+I ), 2 ) + CALL DAXPY( NRU, ZJTJI, + $ Z( IROWV, ICOLZ+J ), 2, + $ Z( IROWV, ICOLZ+I ), 2 ) + END DO + NRMV = DNRM2( NRV, Z( IROWV, ICOLZ+I ), 2 ) + CALL DSCAL( NRV, ONE/NRMV, + $ Z( IROWV,ICOLZ+I ), 2 ) + END IF + END DO + IF( VUTGK.EQ.ZERO .AND. + $ IDPTR.LT.IDEND .AND. + $ MOD(NTGK,2).GT.0 ) THEN +* +* D=0 in the middle of the active submatrix (one +* eigenvalue is equal to zero): save the corresponding +* eigenvector for later use (when bottom of the +* active submatrix is reached). +* + SPLIT = .TRUE. + Z( IROWZ:IROWZ+NTGK-1,N+1 ) = + $ Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) + Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) = + $ ZERO + END IF + END IF !** WANTZ **! +* + NSL = MIN( NSL, NRU ) + SVEQ0 = .FALSE. +* +* Absolute values of the eigenvalues of TGK. +* + DO I = 0, NSL-1 + S( ISBEG+I ) = ABS( S( ISBEG+I ) ) + END DO +* +* Update pointers for TGK, S and Z. +* + ISBEG = ISBEG + NSL + IROWZ = IROWZ + NTGK + ICOLZ = ICOLZ + NSL + IROWU = IROWZ + IROWV = IROWZ + 1 + ISPLT = IDPTR + 1 + NS = NS + NSL + NRU = 0 + NRV = 0 + END IF !** NTGK.GT.0 **! + IF( IROWZ.LT.N*2 .AND. WANTZ ) THEN + Z( 1:IROWZ-1, ICOLZ ) = ZERO + END IF + END DO !** IDPTR loop **! + IF( SPLIT .AND. WANTZ ) THEN +* +* Bring back eigenvector corresponding +* to eigenvalue equal to zero. +* + Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) = + $ Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) + + $ Z( IDBEG:IDEND-NTGK+1,N+1 ) + Z( IDBEG:IDEND-NTGK+1,N+1 ) = 0 + END IF + IROWV = IROWV - 1 + IROWU = IROWU + 1 + IDBEG = IEPTR + 1 + SVEQ0 = .FALSE. + SPLIT = .FALSE. + END IF !** Check for split in E **! + END DO !** IEPTR loop **! +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO I = 1, NS-1 + K = 1 + SMIN = S( 1 ) + DO J = 2, NS + 1 - I + IF( S( J ).LE.SMIN ) THEN + K = J + SMIN = S( J ) + END IF + END DO + IF( K.NE.NS+1-I ) THEN + S( K ) = S( NS+1-I ) + S( NS+1-I ) = SMIN + IF( WANTZ ) CALL DSWAP( N*2, Z( 1,K ), 1, Z( 1,NS+1-I ), 1 ) + END IF + END DO +* +* If RANGE=I, check for singular values/vectors to be discarded. +* + IF( INDSV ) THEN + K = IU - IL + 1 + IF( K.LT.NS ) THEN + S( K+1:NS ) = ZERO + IF( WANTZ ) Z( 1:N*2,K+1:NS ) = ZERO + NS = K + END IF + END IF +* +* Reorder Z: U = Z( 1:N,1:NS ), V = Z( N+1:N*2,1:NS ). +* If B is a lower diagonal, swap U and V. +* + IF( WANTZ ) THEN + DO I = 1, NS + CALL DCOPY( N*2, Z( 1,I ), 1, WORK, 1 ) + IF( LOWER ) THEN + CALL DCOPY( N, WORK( 2 ), 2, Z( N+1,I ), 1 ) + CALL DCOPY( N, WORK( 1 ), 2, Z( 1 ,I ), 1 ) + ELSE + CALL DCOPY( N, WORK( 2 ), 2, Z( 1 ,I ), 1 ) + CALL DCOPY( N, WORK( 1 ), 2, Z( N+1,I ), 1 ) + END IF + END DO + END IF +* + RETURN +* +* End of DBDSVDX +* + END diff --git a/dspl/liblapack/SRC/ddisna.f b/dspl/liblapack/SRC/ddisna.f new file mode 100644 index 0000000..61345c6 --- /dev/null +++ b/dspl/liblapack/SRC/ddisna.f @@ -0,0 +1,245 @@ +*> \brief \b DDISNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DDISNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER INFO, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), SEP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DDISNA computes the reciprocal condition numbers for the eigenvectors +*> of a real symmetric or complex Hermitian matrix or for the left or +*> right singular vectors of a general m-by-n matrix. The reciprocal +*> condition number is the 'gap' between the corresponding eigenvalue or +*> singular value and the nearest other one. +*> +*> The bound on the error, measured by angle in radians, in the I-th +*> computed vector is given by +*> +*> DLAMCH( 'E' ) * ( ANORM / SEP( I ) ) +*> +*> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed +*> to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of +*> the error bound. +*> +*> DDISNA may also be used to compute error bounds for eigenvectors of +*> the generalized symmetric definite eigenproblem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies for which problem the reciprocal condition numbers +*> should be computed: +*> = 'E': the eigenvectors of a symmetric/Hermitian matrix; +*> = 'L': the left singular vectors of a general matrix; +*> = 'R': the right singular vectors of a general matrix. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> If JOB = 'L' or 'R', the number of columns of the matrix, +*> in which case N >= 0. Ignored if JOB = 'E'. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (M) if JOB = 'E' +*> dimension (min(M,N)) if JOB = 'L' or 'R' +*> The eigenvalues (if JOB = 'E') or singular values (if JOB = +*> 'L' or 'R') of the matrix, in either increasing or decreasing +*> order. If singular values, they must be non-negative. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is DOUBLE PRECISION array, dimension (M) if JOB = 'E' +*> dimension (min(M,N)) if JOB = 'L' or 'R' +*> The reciprocal condition numbers of the vectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER INFO, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), SEP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DECR, EIGEN, INCR, LEFT, RIGHT, SING + INTEGER I, K + DOUBLE PRECISION ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + EIGEN = LSAME( JOB, 'E' ) + LEFT = LSAME( JOB, 'L' ) + RIGHT = LSAME( JOB, 'R' ) + SING = LEFT .OR. RIGHT + IF( EIGEN ) THEN + K = M + ELSE IF( SING ) THEN + K = MIN( M, N ) + END IF + IF( .NOT.EIGEN .AND. .NOT.SING ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( K.LT.0 ) THEN + INFO = -3 + ELSE + INCR = .TRUE. + DECR = .TRUE. + DO 10 I = 1, K - 1 + IF( INCR ) + $ INCR = INCR .AND. D( I ).LE.D( I+1 ) + IF( DECR ) + $ DECR = DECR .AND. D( I ).GE.D( I+1 ) + 10 CONTINUE + IF( SING .AND. K.GT.0 ) THEN + IF( INCR ) + $ INCR = INCR .AND. ZERO.LE.D( 1 ) + IF( DECR ) + $ DECR = DECR .AND. D( K ).GE.ZERO + END IF + IF( .NOT.( INCR .OR. DECR ) ) + $ INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DDISNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Compute reciprocal condition numbers +* + IF( K.EQ.1 ) THEN + SEP( 1 ) = DLAMCH( 'O' ) + ELSE + OLDGAP = ABS( D( 2 )-D( 1 ) ) + SEP( 1 ) = OLDGAP + DO 20 I = 2, K - 1 + NEWGAP = ABS( D( I+1 )-D( I ) ) + SEP( I ) = MIN( OLDGAP, NEWGAP ) + OLDGAP = NEWGAP + 20 CONTINUE + SEP( K ) = OLDGAP + END IF + IF( SING ) THEN + IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN + IF( INCR ) + $ SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) ) + IF( DECR ) + $ SEP( K ) = MIN( SEP( K ), D( K ) ) + END IF + END IF +* +* Ensure that reciprocal condition numbers are not less than +* threshold, in order to limit the size of the error bound +* + EPS = DLAMCH( 'E' ) + SAFMIN = DLAMCH( 'S' ) + ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) ) + IF( ANORM.EQ.ZERO ) THEN + THRESH = EPS + ELSE + THRESH = MAX( EPS*ANORM, SAFMIN ) + END IF + DO 30 I = 1, K + SEP( I ) = MAX( SEP( I ), THRESH ) + 30 CONTINUE +* + RETURN +* +* End of DDISNA +* + END diff --git a/dspl/liblapack/SRC/dgbbrd.f b/dspl/liblapack/SRC/dgbbrd.f new file mode 100644 index 0000000..350a982 --- /dev/null +++ b/dspl/liblapack/SRC/dgbbrd.f @@ -0,0 +1,547 @@ +*> \brief \b DGBBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, +* LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), +* $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBBRD reduces a real general m-by-n band matrix A to upper +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> The routine computes B, and optionally forms Q or P**T, or computes +*> Q**T*C for a given matrix C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether or not the matrices Q and P**T are to be +*> formed. +*> = 'N': do not form Q or P**T; +*> = 'Q': form Q only; +*> = 'P': form P**T only; +*> = 'B': form both. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the m-by-n band matrix A, stored in rows 1 to +*> KL+KU+1. The j-th column of A is stored in the j-th column of +*> the array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> On exit, A is overwritten by values generated during the +*> reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The superdiagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,M) +*> If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. +*> If VECT = 'N' or 'P', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] PT +*> \verbatim +*> PT is DOUBLE PRECISION array, dimension (LDPT,N) +*> If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. +*> If VECT = 'N' or 'Q', the array PT is not referenced. +*> \endverbatim +*> +*> \param[in] LDPT +*> \verbatim +*> LDPT is INTEGER +*> The leading dimension of the array PT. +*> LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,NCC) +*> On entry, an m-by-ncc matrix C. +*> On exit, C is overwritten by Q**T*C. +*> C is not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), + $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT + DOUBLE PRECISION RA, RB, RC, RS +* .. +* .. External Subroutines .. + EXTERNAL DLARGV, DLARTG, DLARTV, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P**T to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL DLASET( 'Full', M, M, ZERO, ONE, Q, LDQ ) + IF( WANTPT ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The sines of the plane rotations are stored in WORK(1:max(m,n)) +* and the cosines in WORK(max(m,n)+1:2*max(m,n)). +* + MN = MAX( M, N ) + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL DLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, WORK( MN+J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ WORK( MN+J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL DLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ), + $ RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL DROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL DROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ WORK( MN+J ), WORK( J ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL DROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ WORK( MN+J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL DLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ WORK( MN+J1+KUN ), WORK( J1+KUN ), + $ KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL DLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ), + $ RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL DROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P**T +* + DO 60 J = J1, J2, KB1 + CALL DROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ), + $ WORK( J+KUN ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, storing diagonal elements in D +* and off-diagonal elements in E +* + DO 100 I = 1, MIN( M-1, N ) + CALL DLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + D( I ) = RA + IF( I.LT.N ) THEN + E( I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL DROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS ) + IF( WANTC ) + $ CALL DROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + IF( M.LE.N ) + $ D( M ) = AB( 1, M ) + ELSE IF( KU.GT.0 ) THEN +* +* A has been reduced to upper bidiagonal form +* + IF( M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right, storing diagonal elements in D and off-diagonal +* elements in E +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL DLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + D( I ) = RA + IF( I.GT.1 ) THEN + RB = -RS*AB( KU, I ) + E( I-1 ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL DROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, RS ) + 110 CONTINUE + ELSE +* +* Copy off-diagonal elements to E and diagonal elements to D +* + DO 120 I = 1, MINMN - 1 + E( I ) = AB( KU, I+1 ) + 120 CONTINUE + DO 130 I = 1, MINMN + D( I ) = AB( KU+1, I ) + 130 CONTINUE + END IF + ELSE +* +* A is diagonal. Set elements of E to zero and copy diagonal +* elements to D. +* + DO 140 I = 1, MINMN - 1 + E( I ) = ZERO + 140 CONTINUE + DO 150 I = 1, MINMN + D( I ) = AB( 1, I ) + 150 CONTINUE + END IF + RETURN +* +* End of DGBBRD +* + END diff --git a/dspl/liblapack/SRC/dgbcon.f b/dspl/liblapack/SRC/dgbcon.f new file mode 100644 index 0000000..26f14fd --- /dev/null +++ b/dspl/liblapack/SRC/dgbcon.f @@ -0,0 +1,311 @@ +*> \brief \b DGBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, KL, KU, LDAB, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBCON estimates the reciprocal of the condition number of a real +*> general band matrix A, in either the 1-norm or the infinity-norm, +*> using the LU factorization computed by DGBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + DOUBLE PRECISION AINVNM, SCALE, SMLNUM, T +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLACN2, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL DAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(U**T). +* + CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) +* +* Multiply by inv(L**T). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - DDOT( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of DGBCON +* + END diff --git a/dspl/liblapack/SRC/dgbequ.f b/dspl/liblapack/SRC/dgbequ.f new file mode 100644 index 0000000..486c88d --- /dev/null +++ b/dspl/liblapack/SRC/dgbequ.f @@ -0,0 +1,324 @@ +*> \brief \b DGBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBEQU computes row and column scalings intended to equilibrate an +*> M-by-N band matrix A and reduce its condition number. R returns the +*> row scale factors and C the column scale factors, chosen to try to +*> make the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0, or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGBEQU +* + END diff --git a/dspl/liblapack/SRC/dgbequb.f b/dspl/liblapack/SRC/dgbequb.f new file mode 100644 index 0000000..f7543aa --- /dev/null +++ b/dspl/liblapack/SRC/dgbequb.f @@ -0,0 +1,340 @@ +*> \brief \b DGBEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from DGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = DLAMCH( 'B' ) + LOGRDX = LOG(RADIX) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGBEQUB +* + END diff --git a/dspl/liblapack/SRC/dgbrfs.f b/dspl/liblapack/SRC/dgbrfs.f new file mode 100644 index 0000000..179ddfe --- /dev/null +++ b/dspl/liblapack/SRC/dgbrfs.f @@ -0,0 +1,464 @@ +*> \brief \b DGBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is banded, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGBTRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGBMV, DGBTRS, DLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1, + $ ONE, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = ABS( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DGBRFS +* + END diff --git a/dspl/liblapack/SRC/dgbrfsx.f b/dspl/liblapack/SRC/dgbrfsx.f new file mode 100644 index 0000000..fb52d64 --- /dev/null +++ b/dspl/liblapack/SRC/dgbrfsx.f @@ -0,0 +1,765 @@ +*> \brief \b DGBRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, +* $ NPARAMS, N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBRFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, + $ NPARAMS, N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DGBCON + EXTERNAL DLA_GBRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL DLAMCH, DLANGB, DLA_GBRCOND + DOUBLE PRECISION DLAMCH, DLANGB, DLA_GBRCOND + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = DLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + CALL DGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 .AND. INFO .EQ. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + IF ( NOTRAN ) THEN + CALL DLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), + $ WORK( 1 ), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + ELSE + CALL DLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), + $ WORK( 1 ), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, -1, C, INFO, WORK, IWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, -1, R, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, 0, R, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, 1, X( 1, J ), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of DGBRFSX +* + END diff --git a/dspl/liblapack/SRC/dgbsv.f b/dspl/liblapack/SRC/dgbsv.f new file mode 100644 index 0000000..b14fcaa --- /dev/null +++ b/dspl/liblapack/SRC/dgbsv.f @@ -0,0 +1,223 @@ +*> \brief DGBSV computes the solution to system of linear equations A * X = B for GB matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBSV computes the solution to a real system of linear equations +*> A * X = B, where A is a band matrix of order N with KL subdiagonals +*> and KU superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as A = L * U, where L is a product of permutation +*> and unit lower triangular matrices with KL subdiagonals, and U is +*> upper triangular with KL+KU superdiagonals. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DGBTRF, DGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL DGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of DGBSV +* + END diff --git a/dspl/liblapack/SRC/dgbsvx.f b/dspl/liblapack/SRC/dgbsvx.f new file mode 100644 index 0000000..da4bf91 --- /dev/null +++ b/dspl/liblapack/SRC/dgbsvx.f @@ -0,0 +1,642 @@ +*> \brief DGBSVX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), C( * ), FERR( * ), R( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBSVX uses the LU factorization to compute the solution to a real +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a band matrix of order N with KL subdiagonals and KU +*> superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed by this subroutine: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = L * U, +*> where L is a product of permutation and unit lower triangular +*> matrices with KL subdiagonals, and U is upper triangular with +*> KL+KU superdiagonals. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB and IPIV contain the factored form of +*> A. If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> AB, AFB, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then A must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by DGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns details of the LU factorization of A. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns details of the LU factorization of the equilibrated +*> matrix A (see the description of AB for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = L*U +*> as computed by DGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> On exit, WORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If WORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGBsolve +* +* ===================================================================== + SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGB, DLANTB + EXTERNAL LSAME, DLAMCH, DLANGB, DLANTB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGBCON, DGBEQU, DGBRFS, DGBTRF, DGBTRS, + $ DLACPY, DLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL DCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL DGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = DLANTB( 'M', 'U', 'N', INFO, MIN( INFO-1, KL+KU ), + $ AFB( MAX( 1, KL+KU+2-INFO ), 1 ), LDAFB, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + RPVGRW = DLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL DGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of DGBSVX +* + END diff --git a/dspl/liblapack/SRC/dgbsvxx.f b/dspl/liblapack/SRC/dgbsvxx.f new file mode 100644 index 0000000..819d20c --- /dev/null +++ b/dspl/liblapack/SRC/dgbsvxx.f @@ -0,0 +1,799 @@ +*> \brief DGBSVXX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, RPVGRW, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS, KL, KU +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBSVXX uses the LU factorization to compute the solution to a +*> double precision system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. DGBSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> DGBSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> DGBSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what DGBSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then AB must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by DGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In DGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGBsolve +* +* ===================================================================== + SUBROUTINE DGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, RPVGRW, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS, KL, KU + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, I, J + DOUBLE PRECISION AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, DLA_GBRPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLA_GBRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL DGBEQUB, DGBTRF, DGBTRS, DLACPY, DLAQGB, + $ XERBLA, DLASCL2, DGBRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in DGBRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until DGBRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGBEQUB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0D+0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0D+0 + END DO + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL DLASCL2(N, NRHS, R, B, LDB) + ELSE + IF( COLEQU ) CALL DLASCL2(N, NRHS, C, B, LDB) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + DO 40, J = 1, N + DO 30, I = KL+1, 2*KL+KU+1 + AFB( I, J ) = AB( I-KL, J ) + 30 CONTINUE + 40 CONTINUE + CALL DGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = DLA_GBRPVGRW( N, KL, KU, INFO, AB, LDAB, AFB, + $ LDAFB ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = DLA_GBRPVGRW( N, KL, KU, N, AB, LDAB, AFB, LDAFB ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL DLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL DLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of DGBSVXX +* + END diff --git a/dspl/liblapack/SRC/dgbtf2.f b/dspl/liblapack/SRC/dgbtf2.f new file mode 100644 index 0000000..eae7d27 --- /dev/null +++ b/dspl/liblapack/SRC/dgbtf2.f @@ -0,0 +1,277 @@ +*> \brief \b DGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBTF2 computes an LU factorization of a real m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U, because of fill-in resulting from the row +*> interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = IDAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL DSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) +* + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL DSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL DGER( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of DGBTF2 +* + END diff --git a/dspl/liblapack/SRC/dgbtrf.f b/dspl/liblapack/SRC/dgbtrf.f new file mode 100644 index 0000000..86fad80 --- /dev/null +++ b/dspl/liblapack/SRC/dgbtrf.f @@ -0,0 +1,516 @@ +*> \brief \b DGBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBTRF computes an LU factorization of a real m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + DOUBLE PRECISION TEMP +* .. +* .. Local Arrays .. + DOUBLE PRECISION WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER IDAMAX, ILAENV + EXTERNAL IDAMAX, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL, + $ DSWAP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use DLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL DGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL DGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL DGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL DGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of DGBTRF +* + END diff --git a/dspl/liblapack/SRC/dgbtrs.f b/dspl/liblapack/SRC/dgbtrs.f new file mode 100644 index 0000000..0837349 --- /dev/null +++ b/dspl/liblapack/SRC/dgbtrs.f @@ -0,0 +1,269 @@ +*> \brief \b DGBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGBTRS solves a system of linear equations +*> A * X = B or A**T * X = B +*> with a general band matrix A using the LU factorization computed +*> by DGBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSWAP, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL DGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE +* +* Solve A**T*X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T*X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL DGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DGBTRS +* + END diff --git a/dspl/liblapack/SRC/dgebak.f b/dspl/liblapack/SRC/dgebak.f new file mode 100644 index 0000000..45a86ee --- /dev/null +++ b/dspl/liblapack/SRC/dgebak.f @@ -0,0 +1,268 @@ +*> \brief \b DGEBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION SCALE( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBAK forms the right or left eigenvectors of a real general matrix +*> by backward transformation on the computed eigenvectors of the +*> balanced matrix output by DGEBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N', do nothing, return immediately; +*> = 'P', do backward transformation for permutation only; +*> = 'S', do backward transformation for scaling only; +*> = 'B', do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to DGEBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by DGEBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutation and scaling factors, as returned +*> by DGEBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by DHSEIN or DTREVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + DOUBLE PRECISION S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL DSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL DSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of DGEBAK +* + END diff --git a/dspl/liblapack/SRC/dgebal.f b/dspl/liblapack/SRC/dgebal.f new file mode 100644 index 0000000..95876ac --- /dev/null +++ b/dspl/liblapack/SRC/dgebal.f @@ -0,0 +1,398 @@ +*> \brief \b DGEBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), SCALE( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBAL balances a general real matrix A. This involves, first, +*> permuting A by a similarity transformation to isolate eigenvalues +*> in the first 1 to ILO-1 and last IHI+1 to N elements on the +*> diagonal; and second, applying a diagonal similarity transformation +*> to rows and columns ILO to IHI to make the rows and columns as +*> close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrix, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A: +*> = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +*> for i = 1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied to +*> A. If P(j) is the index of the row and column interchanged +*> with row and column j and D(j) is the scaling factor +*> applied to row and column j, then +*> SCALE(j) = P(j) for j = 1,...,ILO-1 +*> = D(j) for j = ILO,...,IHI +*> = P(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The permutations consist of row and column interchanges which put +*> the matrix in the form +*> +*> ( T1 X Y ) +*> P A P = ( 0 B Z ) +*> ( 0 0 T2 ) +*> +*> where T1 and T2 are upper triangular matrices whose eigenvalues lie +*> along the diagonal. The column indices ILO and IHI mark the starting +*> and ending columns of the submatrix B. Balancing consists of applying +*> a diagonal similarity transformation inv(D) * B * D to make the +*> 1-norms of each row of B and its corresponding column nearly equal. +*> The output matrix is +*> +*> ( T1 X*D Y ) +*> ( 0 inv(D)*B*D inv(D)*Z ). +*> ( 0 0 T2 ) +*> +*> Information about the permutations P and the diagonal matrix D is +*> returned in the vector SCALE. +*> +*> This subroutine is based on the EISPACK routine BALANC. +*> +*> Modified by Tzu-Yi Chen, Computer Science Division, University of +*> California at Berkeley, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), SCALE( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION SCLFAC + PARAMETER ( SCLFAC = 2.0D+0 ) + DOUBLE PRECISION FACTOR + PARAMETER ( FACTOR = 0.95D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + DOUBLE PRECISION C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL DISNAN, LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL DISNAN, LSAME, IDAMAX, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL DSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( A( J, I ).NE.ZERO ) + $ GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( A( I, J ).NE.ZERO ) + $ GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 +* + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L +* + C = DNRM2( L-K+1, A( K, I ), 1 ) + R = DNRM2( L-K+1, A( I, K ), LDA ) + ICA = IDAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = IDAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + IF( DISNAN( C+F+CA+R+G+RA ) ) THEN +* +* Exit if NaN to avoid infinite loop +* + INFO = -3 + CALL XERBLA( 'DGEBAL', -INFO ) + RETURN + END IF + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL DSCAL( N-K+1, G, A( I, K ), LDA ) + CALL DSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of DGEBAL +* + END diff --git a/dspl/liblapack/SRC/dgebd2.f b/dspl/liblapack/SRC/dgebd2.f new file mode 100644 index 0000000..2bec4e2 --- /dev/null +++ b/dspl/liblapack/SRC/dgebd2.f @@ -0,0 +1,320 @@ +*> \brief \b DGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBD2 reduces a real general m by n matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGEBD2 +* + END diff --git a/dspl/liblapack/SRC/dgebrd.f b/dspl/liblapack/SRC/dgebrd.f new file mode 100644 index 0000000..56a7abe --- /dev/null +++ b/dspl/liblapack/SRC/dgebrd.f @@ -0,0 +1,352 @@ +*> \brief \b DGEBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEBRD reduces a general real M-by-N matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,M,N). +*> For optimum performance LWORK >= (M+N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX, WS +* .. +* .. External Subroutines .. + EXTERNAL DGEBD2, DGEMM, DLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'DGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y**T - X*U**T +* + CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of DGEBRD +* + END diff --git a/dspl/liblapack/SRC/dgecon.f b/dspl/liblapack/SRC/dgecon.f new file mode 100644 index 0000000..be20bbc --- /dev/null +++ b/dspl/liblapack/SRC/dgecon.f @@ -0,0 +1,261 @@ +*> \brief \b DGECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGECON estimates the reciprocal of the condition number of a general +*> real matrix A, in either the 1-norm or the infinity-norm, using +*> the LU factorization computed by DGETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U**T). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L**T). +* + CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DGECON +* + END diff --git a/dspl/liblapack/SRC/dgeequ.f b/dspl/liblapack/SRC/dgeequ.f new file mode 100644 index 0000000..2d9475c --- /dev/null +++ b/dspl/liblapack/SRC/dgeequ.f @@ -0,0 +1,304 @@ +*> \brief \b DGEEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEEQU computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGEEQU +* + END diff --git a/dspl/liblapack/SRC/dgeequb.f b/dspl/liblapack/SRC/dgeequb.f new file mode 100644 index 0000000..0404274 --- /dev/null +++ b/dspl/liblapack/SRC/dgeequb.f @@ -0,0 +1,321 @@ +*> \brief \b DGEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from DGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = DLAMCH( 'B' ) + LOGRDX = LOG( RADIX ) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of DGEEQUB +* + END diff --git a/dspl/liblapack/SRC/dgees.f b/dspl/liblapack/SRC/dgees.f new file mode 100644 index 0000000..c2723f6 --- /dev/null +++ b/dspl/liblapack/SRC/dgees.f @@ -0,0 +1,535 @@ +*> \brief DGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, +* VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEES computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues, the real Schur form T, and, optionally, the matrix of +*> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> real Schur form so that selected eigenvalues are at the top left. +*> The leading columns of Z then form an orthonormal basis for the +*> invariant subspace corresponding to the selected eigenvalues. +*> +*> A matrix is in real Schur form if it is upper quasi-triangular with +*> 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the +*> form +*> [ a b ] +*> [ c a ] +*> +*> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +*> SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex +*> conjugate pair of eigenvalues is selected, then both complex +*> eigenvalues are selected. +*> Note that a selected complex eigenvalue may no longer +*> satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned); in this +*> case INFO is set to N+2 (see INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten by its real Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELECT is true. (Complex conjugate +*> pairs for which SELECT is true for either +*> eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues in the same order +*> that they appear on the diagonal of the output Schur form T. +*> Complex conjugate pairs of eigenvalues will appear +*> consecutively with the eigenvalue having the positive +*> imaginary part first. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is DOUBLE PRECISION array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1; if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the matrix which reduces A +*> to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, + $ VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST, + $ WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, + $ DLABAD, DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 3*N +* + CALL DHSEQR( 'S', JOBVS, N, 1, N, A, LDA, WR, WI, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, N + HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'DORGHR', ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N + HSWORK ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (Workspace: none needed) +* + CALL DTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ ICOND ) + IF( ICOND.GT.0 ) + $ INFO = N + ICOND + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (Workspace: need N) +* + CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL DCOPY( N, A, LDA+1, WR, 1 ) + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, + $ MAX( ILO-1, 1 ), IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL DSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + IF( WANTVS ) THEN + CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + END IF + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF +* +* Undo scaling for the imaginary part of the eigenvalues +* + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEES +* + END diff --git a/dspl/liblapack/SRC/dgeesx.f b/dspl/liblapack/SRC/dgeesx.f new file mode 100644 index 0000000..26042a5 --- /dev/null +++ b/dspl/liblapack/SRC/dgeesx.f @@ -0,0 +1,649 @@ +*> \brief DGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, +* WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, +* IWORK, LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SENSE, SORT +* INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM +* DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEESX computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues, the real Schur form T, and, optionally, the matrix of +*> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> real Schur form so that selected eigenvalues are at the top left; +*> computes a reciprocal condition number for the average of the +*> selected eigenvalues (RCONDE); and computes a reciprocal condition +*> number for the right invariant subspace corresponding to the +*> selected eigenvalues (RCONDV). The leading columns of Z form an +*> orthonormal basis for this invariant subspace. +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +*> these quantities are called s and sep respectively). +*> +*> A real matrix is in real Schur form if it is upper quasi-triangular +*> with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in +*> the form +*> [ a b ] +*> [ c a ] +*> +*> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of two DOUBLE PRECISION arguments +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +*> SELECT(WR(j),WI(j)) is true; i.e., if either one of a +*> complex conjugate pair of eigenvalues is selected, then both +*> are. Note that a selected complex eigenvalue may no longer +*> satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned); in this +*> case INFO may be set to N+3 (see INFO below). +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for average of selected eigenvalues only; +*> = 'V': Computed for selected right invariant subspace only; +*> = 'B': Computed for both. +*> If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the N-by-N matrix A. +*> On exit, A is overwritten by its real Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELECT is true. (Complex conjugate +*> pairs for which SELECT is true for either +*> eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> WR and WI contain the real and imaginary parts, respectively, +*> of the computed eigenvalues, in the same order that they +*> appear on the diagonal of the output Schur form T. Complex +*> conjugate pairs of eigenvalues appear consecutively with the +*> eigenvalue having the positive imaginary part first. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is DOUBLE PRECISION array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1, and if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION +*> If SENSE = 'E' or 'B', RCONDE contains the reciprocal +*> condition number for the average of the selected eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION +*> If SENSE = 'V' or 'B', RCONDV contains the reciprocal +*> condition number for the selected right invariant subspace. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N). +*> Also, if SENSE = 'E' or 'V' or 'B', +*> LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of +*> selected eigenvalues computed by this routine. Note that +*> N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only +*> returned if LWORK < max(1,3*N), but if SENSE = 'E' or 'V' or +*> 'B' this may not be large enough. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates upper bounds on the optimal sizes of the +*> arrays WORK and IWORK, returns these values as the first +*> entries of the WORK and IWORK arrays, and no error messages +*> related to LWORK or LIWORK are issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). +*> Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is +*> only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this +*> may not be large enough. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates upper bounds on the optimal sizes of +*> the arrays WORK and IWORK, returns these values as the first +*> entries of the WORK and IWORK arrays, and no error messages +*> related to LWORK or LIWORK are issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the transformation which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, + $ WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM + DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTSB, + $ WANTSE, WANTSN, WANTST, WANTSV, WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, LIWRK, LWRK, + $ MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, + $ DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLABAD, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "RWorkspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* IWorkspace refers to integer workspace. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine DTRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + LIWRK = 1 + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 3*N +* + CALL DHSEQR( 'S', JOBVS, N, 1, N, A, LDA, WR, WI, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, N + HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'DORGHR', ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N + HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, N + ( N*N )/2 ) + IF( WANTSV .OR. WANTSB ) + $ LIWRK = ( N*N )/4 + END IF + IWORK( 1 ) = LIWRK + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEESX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (RWorkspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (RWorkspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) +* otherwise, need N ) +* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) +* otherwise, need 0 ) +* + CALL DTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-15 ) THEN +* +* Not enough real workspace +* + INFO = -16 + ELSE IF( ICOND.EQ.-17 ) THEN +* +* Not enough integer workspace +* + INFO = -18 + ELSE IF( ICOND.GT.0 ) THEN +* +* DTRSEN failed to reorder or to restore standard Schur form +* + INFO = ICOND + N + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (RWorkspace: need N) +* + CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL DCOPY( N, A, LDA+1, WR, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL DSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + IF( WANTSV .OR. WANTSB ) THEN + IWORK( 1 ) = MAX( 1, SDIM*( N-SDIM ) ) + ELSE + IWORK( 1 ) = 1 + END IF +* + RETURN +* +* End of DGEESX +* + END diff --git a/dspl/liblapack/SRC/dgeev.f b/dspl/liblapack/SRC/dgeev.f new file mode 100644 index 0000000..2dc1588 --- /dev/null +++ b/dspl/liblapack/SRC/dgeev.f @@ -0,0 +1,529 @@ +*> \brief DGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, +* LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEEV computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate-transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues. Complex +*> conjugate pairs of eigenvalues appear consecutively +*> with the eigenvalue having the positive imaginary part +*> first. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j), +*> the j-th column of VL. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> If the j-th eigenvalue is real, then v(j) = VR(:,j), +*> the j-th column of VR. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +*> v(j+1) = VR(:,j) - i*VR(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N), and +*> if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good +*> performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors have been computed; +*> elements i+1:N of WR and WI contain eigenvalues which +*> have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @precisions fortran d -> s +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, + $ LDVR, WORK, LWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLABAD, DLACPY, + $ DLARTG, DLASCL, DORGHR, DROT, DSCAL, DTREVC3, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DLANGE, DLAPY2, DNRM2 + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2, + $ DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -9 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) + IF( WANTVL ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'DORGHR', ' ', N, 1, N, -1 ) ) + CALL DHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VL, LDVL, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + CALL DTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, N, NOUT, + $ WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE IF( WANTVR ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'DORGHR', ' ', N, 1, N, -1 ) ) + CALL DHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + CALL DTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, N, NOUT, + $ WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE + MINWRK = 3*N + CALL DHSEQR( 'E', 'N', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (Workspace: need N) +* + IBAL = 1 + CALL DGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = IBAL + N + IWRK = ITAU + N + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from DHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 4*N, prefer N + N + 2*N*NB) +* + CALL DTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (Workspace: need N) +* + CALL DGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ), + $ DNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = IDAMAX( N, WORK( IWRK ), 1 ) + CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (Workspace: need N) +* + CALL DGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ), + $ DNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = IDAMAX( N, WORK( IWRK ), 1 ) + CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEEV +* + END diff --git a/dspl/liblapack/SRC/dgeevx.f b/dspl/liblapack/SRC/dgeevx.f new file mode 100644 index 0000000..edf6a43 --- /dev/null +++ b/dspl/liblapack/SRC/dgeevx.f @@ -0,0 +1,694 @@ +*> \brief DGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, +* VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, +* RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N +* DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), RCONDE( * ), RCONDV( * ), +* $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEEVX computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +*> (RCONDE), and reciprocal condition numbers for the right +*> eigenvectors (RCONDV). +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate-transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> +*> Balancing a matrix means permuting the rows and columns to make it +*> more nearly upper triangular, and applying a diagonal similarity +*> transformation D * A * D**(-1), where D is a diagonal matrix, to +*> make its rows and columns closer in norm and the condition numbers +*> of its eigenvalues and eigenvectors smaller. The computed +*> reciprocal condition numbers correspond to the balanced matrix. +*> Permuting rows and columns will not change the condition numbers +*> (in exact arithmetic) but diagonal scaling will. For further +*> explanation of balancing, see section 4.10.2 of the LAPACK +*> Users' Guide. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Indicates how the input matrix should be diagonally scaled +*> and/or permuted to improve the conditioning of its +*> eigenvalues. +*> = 'N': Do not diagonally scale or permute; +*> = 'P': Perform permutations to make the matrix more nearly +*> upper triangular. Do not diagonally scale; +*> = 'S': Diagonally scale the matrix, i.e. replace A by +*> D*A*D**(-1), where D is a diagonal matrix chosen +*> to make the rows and columns of A more equal in +*> norm. Do not permute; +*> = 'B': Both diagonally scale and permute A. +*> +*> Computed reciprocal condition numbers will be for the matrix +*> after balancing and/or permuting. Permuting does not change +*> condition numbers (in exact arithmetic), but balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVL must = 'V'. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVR must = 'V'. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for eigenvalues only; +*> = 'V': Computed for right eigenvectors only; +*> = 'B': Computed for eigenvalues and right eigenvectors. +*> +*> If SENSE = 'E' or 'B', both left and right eigenvectors +*> must also be computed (JOBVL = 'V' and JOBVR = 'V'). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. If JOBVL = 'V' or +*> JOBVR = 'V', A contains the real Schur form of the balanced +*> version of the input matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues. Complex +*> conjugate pairs of eigenvalues will appear consecutively +*> with the eigenvalue having the positive imaginary part +*> first. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j), +*> the j-th column of VL. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> If the j-th eigenvalue is real, then v(j) = VR(:,j), +*> the j-th column of VR. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +*> v(j+1) = VR(:,j) - i*VR(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values determined when A was +*> balanced. The balanced A(i,j) = 0 if I > J and +*> J = 1,...,ILO-1 or I = IHI+1,...,N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> when balancing A. If P(j) is the index of the row and column +*> interchanged with row and column j, and D(j) is the scaling +*> factor applied to row and column j, then +*> SCALE(J) = P(J), for J = 1,...,ILO-1 +*> = D(J), for J = ILO,...,IHI +*> = P(J) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix (the maximum +*> of the sum of absolute values of elements of any column). +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension (N) +*> RCONDE(j) is the reciprocal condition number of the j-th +*> eigenvalue. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension (N) +*> RCONDV(j) is the reciprocal condition number of the j-th +*> right eigenvector. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. If SENSE = 'N' or 'E', +*> LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', +*> LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N-2) +*> If SENSE = 'N' or 'E', not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors or condition numbers +*> have been computed; elements 1:ILO-1 and i+1:N of WR +*> and WI contain eigenvalues which have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @precisions fortran d -> s +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, + $ VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, + $ RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), RCONDE( * ), RCONDV( * ), + $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLABAD, DLACPY, + $ DLARTG, DLASCL, DORGHR, DROT, DSCAL, DTREVC3, + $ DTRSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DLANGE, DLAPY2, DNRM2 + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2, + $ DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) + $ .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by DHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 ) +* + IF( WANTVL ) THEN + CALL DTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL DHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + CALL DTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL DHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + IF( WNTSNN ) THEN + CALL DHSEQR( 'E', 'N', N, 1, N, A, LDA, WR, WI, VR, + $ LDVR, WORK, -1, INFO ) + ELSE + CALL DHSEQR( 'S', 'N', N, 1, N, A, LDA, WR, WI, VR, + $ LDVR, WORK, -1, INFO ) + END IF + END IF + HSWORK = INT( WORK(1) ) +* + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = 2*N + IF( .NOT.WNTSNN ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + IF( .NOT.WNTSNN ) + $ MAXWRK = MAX( MAXWRK, N*N + 6*N ) + ELSE + MINWRK = 3*N + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 6*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'DORGHR', + $ ' ', N, 1, N, -1 ) ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 6*N ) + MAXWRK = MAX( MAXWRK, 3*N ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = DLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL DGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = DLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + IWRK = ITAU + N + CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL DHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from DHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 3*N, prefer N + 2*N*NB) +* + CALL DTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, IERR ) + END IF +* +* Compute condition numbers if desired +* (Workspace: need N*N+6*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL DTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL DGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ), + $ DNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VL( 1, I ), 1 ) + CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = IDAMAX( N, WORK, 1 ) + CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL DGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / DNRM2( N, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ), + $ DNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL DSCAL( N, SCL, VR( 1, I ), 1 ) + CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = IDAMAX( N, WORK, 1 ) + CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGEEVX +* + END diff --git a/dspl/liblapack/SRC/dgehd2.f b/dspl/liblapack/SRC/dgehd2.f new file mode 100644 index 0000000..4521b66 --- /dev/null +++ b/dspl/liblapack/SRC/dgehd2.f @@ -0,0 +1,225 @@ +*> \brief \b DGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEHD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEHD2 reduces a real general matrix A to upper Hessenberg form H by +*> an orthogonal similarity transformation: Q**T * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to DGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= max(1,N). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the n by n general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the orthogonal matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + CALL DLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + AII = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL DLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i) to A(i+1:ihi,i+1:n) from the left +* + CALL DLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = AII + 10 CONTINUE +* + RETURN +* +* End of DGEHD2 +* + END diff --git a/dspl/liblapack/SRC/dgehrd.f b/dspl/liblapack/SRC/dgehrd.f new file mode 100644 index 0000000..23fd872 --- /dev/null +++ b/dspl/liblapack/SRC/dgehrd.f @@ -0,0 +1,356 @@ +*> \brief \b DGEHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEHRD reduces a real general matrix A to upper Hessenberg form H by +*> an orthogonal similarity transformation: Q**T * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to DGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the orthogonal matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +*> zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,N). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This file is a slight modification of LAPACK-3.0's DGEHRD +*> subroutine incorporating improvements proposed by Quintana-Orti and +*> Van de Geijn (2006). (See DLAHR2.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'DGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IF( LWORK.LT.N*NB+TSIZE ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + IWT = 1 + N*NB + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V**T +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL DGEMM( 'No transpose', 'Transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL DAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGEHRD +* + END diff --git a/dspl/liblapack/SRC/dgejsv.f b/dspl/liblapack/SRC/dgejsv.f new file mode 100644 index 0000000..25ed248 --- /dev/null +++ b/dspl/liblapack/SRC/dgejsv.f @@ -0,0 +1,1783 @@ +*> \brief \b DGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), +* $ WORK( LWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEJSV computes the singular value decomposition (SVD) of a real M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^t, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and +*> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> DGEJSV can sometimes compute tiny singular values and their singular vectors much +*> more accurately than other SVD routines, see below under Further Details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=D*B. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are the noise and the matrix is treated +*> as numerically rank deficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^t restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations. This option is +*> allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use DGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> ~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use DGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^t seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. This is subject to +*> changes in the future. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^t is taken as input. If A is +*> replaced with A^t, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> This option can be used to compute only the singular values, or the +*> full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension ( LDU, N ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^t. In that case, [V] is computed +*> in U as left singular vectors of A^t and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^t. In that case, [U] is computed +*> in V as right singular vectors of A^t and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if N.GT.0 .AND. M.GT.0 (else not referenced), +*> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such +*> that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> WORK(2) = See the description of WORK(1). +*> WORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). +*> It is computed using DPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> WORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> WORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> +*> WORK(6) = the entropy of A^t*A :: this is the Shannon entropy +*> of diag(A^t*A) / Trace(A^t*A) taken as point in the +*> probability simplex. +*> WORK(7) = the entropy of A*A^t. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of WORK to confirm proper allocation of work space. +*> LWORK depends on the job: +*> +*> If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> -> .. no scaled condition estimate required (JOBE.EQ.'N'): +*> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal +*> block size for DGEQP3 and DGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). +*> -> .. an estimate of the scaled condition number of A is +*> required (JOBA='E', 'G'). In this case, LWORK is the maximum +*> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7). +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), +*> N+N*N+LWORK(DPOCON),7). +*> +*> If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). +*> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7), +*> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQF, +*> DORMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), +*> N+LWORK(DGELQF), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). +*> +*> If SIGMA and the left singular vectors are needed +*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7), +*> if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7), +*> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), +*> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). +*> Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or +*> M*NB (for JOBU.EQ.'F'). +*> +*> If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> -> if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N). +*> -> if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6). +*> -> For optimal performance, LWORK should be additionally +*> larger than N+M*NB, where NB is the optimal block size +*> for DORMQR. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+3*N). +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : DGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3, +*> DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by DGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (DGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (DGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: DGEQP3) should be +*> implemented as in [3]. We have a new version of DGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in DGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of DGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), + $ WORK( LWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK, + $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM, + $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC, + $ L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, + $ NOSCAL, ROWPIV, RSVEC, TRANSP +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS, DLOG, MAX, MIN, DBLE, IDNINT, DSIGN, DSQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DNRM2 + INTEGER IDAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, LSAME, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGELQF, DGEQP3, DGEQRF, DLACPY, DLASCL, + $ DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ, + $ DORMQR, DPOCON, DSCAL, DSWAP, DTRSM, XERBLA +* + EXTERNAL DGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ LSAME( JOBU, 'W' )) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( L2TRAN .OR. LSAME( JOBT, 'N' ) ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND. + & (LWORK .LT. MAX(7,4*N+1,2*M+N))) .OR. + & (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND. + & (LWORK .LT. MAX(7,4*N+N*N,2*M+N))) .OR. + & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1))) + & .OR. + & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1))) + & .OR. + & (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND. + & (LWORK.LT.MAX(2*M+N,6*N+2*N*N))) + & .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND. + & LWORK.LT.MAX(2*M+N,4*N+N*N,2*N+N*N+6))) + & THEN + INFO = - 17 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'DGEJSV', - INFO ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:3) = 0 + WORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure DLAMCH() does not fail on the target architecture. +* + EPSLN = DLAMCH('Epsilon') + SFMIN = DLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = DLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / DSQRT(DBLE(M)*DBLE(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL DLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'DGEJSV', -INFO ) + RETURN + END IF + AAQQ = DSQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL DSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL DLASET( 'G', M, N1, ZERO, ONE, U, LDU ) + IF ( RSVEC ) CALL DLASET( 'G', N, N, ZERO, ONE, V, LDV ) + WORK(1) = ONE + WORK(2) = ONE + IF ( ERREST ) WORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = ONE + WORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + WORK(6) = ZERO + WORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL DLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL DLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL DGEQRF( M, N, U,LDU, WORK, WORK(N+1),LWORK-N,IERR ) + CALL DORGQR( M,N1,1, U,LDU,WORK,WORK(N+1),LWORK-N,IERR ) + CALL DCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = ONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + WORK(1) = ONE / SCALEM + WORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IF ( ERREST ) WORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = ONE + WORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + WORK(6) = ZERO + WORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. + L2TRAN = L2TRAN .AND. ( M .EQ. N ) +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL DLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* DLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + WORK(M+N+p) = XSC * SCALEM + WORK(N+p) = XSC * (SCALEM*DSQRT(TEMP1)) + AATMAX = MAX( AATMAX, WORK(N+p) ) + IF (WORK(N+p) .NE. ZERO) AATMIN = MIN(AATMIN,WORK(N+p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + WORK(M+N+p) = SCALEM*DABS( A(p,IDAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, WORK(M+N+p) ) + AATMIN = MIN( AATMIN, WORK(M+N+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^t would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL DLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / DLOG(DBLE(N)) +* +* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. +* It is derived from the diagonal of A^t * A. Do the same with the +* diagonal of A * A^t, compute the entropy of the corresponding +* probability distribution. Note that A * A^t and A^t * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = N+1, N+M + BIG1 = ( ( WORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / DLOG(DBLE(M)) +* +* Analyze the entropies and decide A or A^t. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^t is better than A, transpose A. +* + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + DO 1116 q = p + 1, N + TEMP1 = A(q,p) + A(q,p) = A(p,q) + A(p,q) = TEMP1 + 1116 CONTINUE + 1115 CONTINUE + DO 1117 p = 1, N + WORK(M+N+p) = SVA(p) + SVA(p) = WORK(N+p) + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than DSQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep +* DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then DGESVJ will compute them. So, in that case, +* one should use DGESVJ instead of DGEJSV. +* + BIG1 = DSQRT( BIG ) + TEMP1 = DSQRT( BIG / DBLE(N) ) +* + CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN). + XSC = DSQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using DGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.DSQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL DLASET( 'A', M, 1, ZERO, ZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + DO 1952 p = 1, M - 1 + q = IDAMAX( M-p+1, WORK(M+N+p), 1 ) + p - 1 + IWORK(2*N+p) = q + IF ( p .NE. q ) THEN + TEMP1 = WORK(M+N+p) + WORK(M+N+p) = WORK(M+N+q) + WORK(M+N+q) = TEMP1 + END IF + 1952 CONTINUE + CALL DLASWP( N, A, LDA, 1, M-1, IWORK(2*N+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in SGEQPX from TOMS # 782). Good results will be obtained using +* SGEQPX with properly (!) chosen numerical parameters. +* Any improvement of DGEQP3 improves overal performance of DGEJSV. +* +* A * P1 = Q1 * [ R1^t 0]^t: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL DGEQP3( M,N,A,LDA, IWORK,WORK, WORK(N+1),LWORK-N, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then DGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = DSQRT(DBLE(N))*EPSLN + DO 3001 p = 2, N + IF ( DABS(A(p,p)) .GE. (TEMP1*DABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-deficient. + TEMP1 = DSQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( DABS(A(p,p)) .LT. (EPSLN*DABS(A(p-1,p-1))) ) .OR. + $ ( DABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = DSQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( DABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = DABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL DLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL DSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + CALL DPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ WORK(N+1), IWORK(2*N+M+1), IERR ) + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL DLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL DSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL DPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ WORK(N+1), IWORK(2*N+M+1), IERR ) + ELSE + CALL DLACPY( 'U', N, N, A, LDA, WORK(N+1), N ) + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL DSCAL( p, ONE/TEMP1, WORK(N+(p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. + CALL DPOCON( 'U', N, WORK(N+1), N, ONE, TEMP1, + $ WORK(N+N*N+1), IWORK(2*N+M+1), IERR ) + END IF + SCONDA = ONE / DSQRT(TEMP1) +* SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( DABS( A(1,1)/A(NR,NR) ) .GT. DSQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL DCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + 1946 CONTINUE +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = DSQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 4947 q = 1, NR + TEMP1 = XSC*DABS(A(q,q)) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) + $ A(p,q) = DSIGN( TEMP1, A(p,q) ) + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL DLASET( 'U', NR-1,NR-1, ZERO,ZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL DGEQRF( N,NR, A,LDA, WORK, WORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL DCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = DSQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 1947 q = 1, NR + TEMP1 = XSC*DABS(A(q,q)) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) + $ A(p,q) = DSIGN( TEMP1, A(p,q) ) + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL DGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA, + $ N, V, LDV, WORK, LWORK, INFO ) +* + SCALEM = WORK(1) + NUMRANK = IDNINT(WORK(2)) +* +* + ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 1998 CONTINUE + CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) +* + CALL DGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA, + $ WORK, LWORK, INFO ) + SCALEM = WORK(1) + NUMRANK = IDNINT(WORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL DLASET( 'Lower', NR-1, NR-1, ZERO, ZERO, A(2,1), LDA ) + CALL DGELQF( NR, N, A, LDA, WORK, WORK(N+1), LWORK-N, IERR) + CALL DLACPY( 'Lower', NR, NR, A, LDA, V, LDV ) + CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + CALL DGEQRF( NR, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL DCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + 8998 CONTINUE + CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) +* + CALL DGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, WORK(N+1), LWORK, INFO ) + SCALEM = WORK(N+1) + NUMRANK = IDNINT(WORK(N+2)) + IF ( NR .LT. N ) THEN + CALL DLASET( 'A',N-NR, NR, ZERO,ZERO, V(NR+1,1), LDV ) + CALL DLASET( 'A',NR, N-NR, ZERO,ZERO, V(1,NR+1), LDV ) + CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE, V(NR+1,NR+1), LDV ) + END IF +* + CALL DORMLQ( 'Left', 'Transpose', N, N, NR, A, LDA, WORK, + $ V, LDV, WORK(N+1), LWORK-N, IERR ) +* + END IF +* + DO 8991 p = 1, N + CALL DCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) + 8991 CONTINUE + CALL DLACPY( 'All', N, N, A, LDA, V, LDV ) +* + IF ( TRANSP ) THEN + CALL DLACPY( 'All', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL DCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + 1965 CONTINUE + CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) +* + CALL DGEQRF( N, NR, U, LDU, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL DCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + 1967 CONTINUE + CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) +* + CALL DGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, WORK(N+1), LWORK-N, INFO ) + SCALEM = WORK(N+1) + NUMRANK = IDNINT(WORK(N+2)) +* + IF ( NR .LT. M ) THEN + CALL DLASET( 'A', M-NR, NR,ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL DLASET( 'A',NR, N1-NR, ZERO, ZERO, U(1,NR+1), LDU ) + CALL DLASET( 'A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1), LDU ) + END IF + END IF +* + CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / DNRM2( M, U(1,p), 1 ) + CALL DSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL DLACPY( 'All', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of DGEJSV. +* + DO 1968 p = 1, NR + CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL) + DO 2969 q = 1, NR + TEMP1 = XSC*DABS( V(q,q) ) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) + $ V(p,q) = DSIGN( TEMP1, V(p,q) ) + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL DLACPY( 'L', NR, NR, V, LDV, WORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = DNRM2(NR-p+1,WORK(2*N+(p-1)*NR+p),1) + CALL DSCAL(NR-p+1,ONE/TEMP1,WORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL DPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1, + $ WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR) + CONDR1 = ONE / DSQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) +* more conservative <=> CONDR1 .LT. DSQRT(DBLE(N)) +* + COND_OK = DSQRT(DBLE(NR)) +*[TP] COND_OK is a tuning parameter. + + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^t = Q2 * R2 + CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q))) + IF ( DABS(V(q,p)) .LE. TEMP1 ) + $ V(q,p) = DSIGN( TEMP1, V(q,p) ) + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL DCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + 1969 CONTINUE +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to DGEQP3 +* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^t * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL DGEQP3( N, NR, V, LDV, IWORK(N+1), WORK(N+1), + $ WORK(2*N+1), LWORK-2*N, IERR ) +** CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q))) + IF ( DABS(V(q,p)) .LE. TEMP1 ) + $ V(q,p) = DSIGN( TEMP1, V(q,p) ) + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + TEMP1 = XSC * MIN(DABS(V(p,p)),DABS(V(q,q))) + V(p,q) = - DSIGN( TEMP1, V(q,p) ) + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL DLASET( 'L',NR-1,NR-1,ZERO,ZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL DGELQF( NR, NR, V, LDV, WORK(2*N+N*NR+1), + $ WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL DLACPY( 'L',NR,NR,V,LDV,WORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = DNRM2( p, WORK(2*N+N*NR+NR+p), NR ) + CALL DSCAL( p, ONE/TEMP1, WORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL DPOCON( 'L',NR,WORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR ) + CONDR2 = ONE / DSQRT(TEMP1) +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL DLACPY( 'U', NR, NR, V, LDV, WORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL) + DO 4968 q = 2, NR + TEMP1 = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - DSIGN( TEMP1, V(q,p) ) + V(p,q) = - DSIGN( TEMP1, V(p,q) ) + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL DLASET( 'U', NR-1,NR-1, ZERO,ZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL DGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, + $ LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2)) + DO 3970 p = 1, NR + CALL DCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL DSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in DGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL DTRSM( 'L','U','N','N', NR,NR,ONE, A,LDA, V,LDV ) + ELSE +* .. R1 is well conditioned, but non-square. Transpose(R2) +* is inverted to get the product of the Jacobi rotations +* used in DGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL DTRSM('L','U','T','N',NR,NR,ONE,WORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL DLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV) + CALL DLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV) + CALL DLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV) + END IF + CALL DORMQR('L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* :) .. the input matrix A is very likely a relative of +* the Kahan matrix :) +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^T*V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL DGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2)) + DO 3870 p = 1, NR + CALL DCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL DSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL DTRSM('L','U','N','N',NR,NR,ONE,WORK(2*N+1),N,U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = WORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that DGEJSV completes the task. +* Compute the full SVD of L3 using DGESVJ with explicit +* accumulation of Jacobi rotations. + CALL DGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2)) + IF ( NR .LT. N ) THEN + CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL DORMLQ( 'L', 'T', NR, NR, NR, WORK(2*N+1), N, + $ WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = WORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = DSQRT(DBLE(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = WORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / DNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL DSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL DLASET( 'A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL DLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU) + CALL DLASET('A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL DORMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = DSQRT(DBLE(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / DNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL DSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL DLACPY( 'Upper', N, N, A, LDA, WORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL) + DO 5970 p = 2, N + TEMP1 = XSC * WORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 + WORK(N+(q-1)*N+p)=-DSIGN(TEMP1,WORK(N+(p-1)*N+q)) + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL DLASET( 'Lower',N-1,N-1,ZERO,ZERO,WORK(N+2),N ) + END IF +* + CALL DGESVJ( 'Upper', 'U', 'N', N, N, WORK(N+1), N, SVA, + $ N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO ) +* + SCALEM = WORK(N+N*N+1) + NUMRANK = IDNINT(WORK(N+N*N+2)) + DO 6970 p = 1, N + CALL DCOPY( N, WORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL DSCAL( N, SVA(p), WORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL DTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N, + $ ONE, A, LDA, WORK(N+1), N ) + DO 6972 p = 1, N + CALL DCOPY( N, WORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = DSQRT(DBLE(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / DNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL DSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL DLASET( 'A', M-N, N, ZERO, ZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL DLASET( 'A',N, N1-N, ZERO, ZERO, U(1,N+1),LDU ) + CALL DLASET( 'A',M-N,N1-N, ZERO, ONE,U(N+1,N+1),LDU ) + END IF + END IF + CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) + TEMP1 = DSQRT(DBLE(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / DNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL DSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values. Since that is not always the case, ... +* + DO 7968 p = 1, NR + CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + TEMP1 = XSC*DABS( V(q,q) ) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) + $ V(p,q) = DSIGN( TEMP1, V(p,q) ) + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + END IF + + CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL DLACPY( 'L', N, NR, V, LDV, WORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL DCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = DSQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + TEMP1 = XSC * MIN(DABS(U(p,p)),DABS(U(q,q))) + U(p,q) = - DSIGN( TEMP1, U(q,p) ) + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL DLASET('U', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) + END IF + + CALL DGESVJ( 'G', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO ) + SCALEM = WORK(2*N+N*NR+1) + NUMRANK = IDNINT(WORK(2*N+N*NR+2)) + + IF ( NR .LT. N ) THEN + CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + + CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = DSQRT(DBLE(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = WORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / DNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL DSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL DLASET( 'A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL DLASET( 'A',NR, N1-NR, ZERO, ZERO, U(1,NR+1),LDU ) + CALL DLASET( 'A',M-NR,N1-NR, ZERO, ONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^t + DO 6974 p = 1, N + CALL DSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + WORK(1) = USCAL2 * SCALEM + WORK(2) = USCAL1 + IF ( ERREST ) WORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = CONDR1 + WORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + WORK(6) = ENTRA + WORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING +* + RETURN +* .. +* .. END OF DGEJSV +* .. + END +* diff --git a/dspl/liblapack/SRC/dgelq.f b/dspl/liblapack/SRC/dgelq.f new file mode 100644 index 0000000..ece6450 --- /dev/null +++ b/dspl/liblapack/SRC/dgelq.f @@ -0,0 +1,306 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DGELQ computes a LQ factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-min(M,N) lower trapezoidal matrix L +*> (L is lower triangular if M <= N); +*> the elements above the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any LQ factorization algorithm they wish. The triangular +*> (trapezoidal) L has to be stored in the lower part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> DLASWLQ or DGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, DGELQ will use either +*> DLASWLQ (if the matrix is short-and-wide) or DGELQT to compute +*> the LQ factorization. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGELQT, DLASWLQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'DGELQ ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'DGELQ ', ' ', M, N, 2, -1 ) + ELSE + MB = 1 + NB = N + END IF + IF( MB.GT.MIN( M, N ) .OR. MB.LT.1 ) MB = 1 + IF( NB.GT.N .OR. NB.LE.M ) NB = N + MINTSZ = M + 5 + IF ( NB.GT.M .AND. N.GT.M ) THEN + IF( MOD( N - M, NB - M ).EQ.0 ) THEN + NBLCKS = ( N - M ) / ( NB - M ) + ELSE + NBLCKS = ( N - M ) / ( NB - M ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M ) + $ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + MB = 1 + NB = N + END IF + IF( LWORK.LT.MB*M ) THEN + LMINWS = .TRUE. + MB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = MB*M*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, MB*M ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN + CALL DGELQT( M, N, MB, A, LDA, T( 6 ), MB, WORK, INFO ) + ELSE + CALL DLASWLQ( M, N, MB, NB, A, LDA, T( 6 ), MB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, MB*M ) +* + RETURN +* +* End of DGELQ +* + END diff --git a/dspl/liblapack/SRC/dgelq2.f b/dspl/liblapack/SRC/dgelq2.f new file mode 100644 index 0000000..04aa57f --- /dev/null +++ b/dspl/liblapack/SRC/dgelq2.f @@ -0,0 +1,192 @@ +*> \brief \b DGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQ2 computes an LQ factorization of a real m by n matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m by min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGELQ2 +* + END diff --git a/dspl/liblapack/SRC/dgelqf.f b/dspl/liblapack/SRC/dgelqf.f new file mode 100644 index 0000000..834c471 --- /dev/null +++ b/dspl/liblapack/SRC/dgelqf.f @@ -0,0 +1,269 @@ +*> \brief \b DGELQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQF computes an LQ factorization of a real M-by-N matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGELQ2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGELQF +* + END diff --git a/dspl/liblapack/SRC/dgelqt.f b/dspl/liblapack/SRC/dgelqt.f new file mode 100644 index 0000000..2124f3d --- /dev/null +++ b/dspl/liblapack/SRC/dgelqt.f @@ -0,0 +1,210 @@ +*> \brief \b DGELQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQT computes a blocked LQ factorization of a real M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is +*> lower triangular if M <= N); the elements above the diagonal +*> are the rows of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K +* .. +* .. External Subroutines .. + EXTERNAL DGEQRT2, DGELQT3, DGEQRT3, DLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, MB + IB = MIN( K-I+1, MB ) +* +* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) +* + CALL DGELQT3( IB, N-I+1, A(I,I), LDA, T(1,I), LDT, IINFO ) + IF( I+IB.LE.M ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the right +* + CALL DLARFB( 'R', 'N', 'F', 'R', M-I-IB+1, N-I+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, WORK , M-I-IB+1 ) + END IF + END DO + RETURN +* +* End of DGELQT +* + END diff --git a/dspl/liblapack/SRC/dgelqt3.f b/dspl/liblapack/SRC/dgelqt3.f new file mode 100644 index 0000000..f19a91c --- /dev/null +++ b/dspl/liblapack/SRC/dgelqt3.f @@ -0,0 +1,259 @@ +*> \brief \b DGELQT3 recursively computes a LQ factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQT3 recursively computes a LQ factorization of a real M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M =< N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> below the diagonal contain the N-by-N lower triangular matrix L; the +*> elements above the diagonal are the rows of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE DGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+00 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, M1, M2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DTRMM, DGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( N .LT. M ) THEN + INFO = -2 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, M ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELQT3', -INFO ) + RETURN + END IF +* + IF( M.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL DLARFG( N, A, A( 1, MIN( 2, N ) ), LDA, T ) +* + ELSE +* +* Otherwise, split A into blocks... +* + M1 = M/2 + M2 = M-M1 + I1 = MIN( M1+1, M ) + J1 = MIN( M+1, N ) +* +* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL DGELQT3( M1, N, A, LDA, T, LDT, IINFO ) +* +* Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] +* + DO I=1,M2 + DO J=1,M1 + T( I+M1, J ) = A( I+M1, J ) + END DO + END DO + CALL DTRMM( 'R', 'U', 'T', 'U', M2, M1, ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + CALL DGEMM( 'N', 'T', M2, M1, N-M1, ONE, A( I1, I1 ), LDA, + & A( 1, I1 ), LDA, ONE, T( I1, 1 ), LDT) +* + CALL DTRMM( 'R', 'U', 'N', 'N', M2, M1, ONE, + & T, LDT, T( I1, 1 ), LDT ) +* + CALL DGEMM( 'N', 'N', M2, N-M1, M1, -ONE, T( I1, 1 ), LDT, + & A( 1, I1 ), LDA, ONE, A( I1, I1 ), LDA ) +* + CALL DTRMM( 'R', 'U', 'N', 'U', M2, M1 , ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + DO I=1,M2 + DO J=1,M1 + A( I+M1, J ) = A( I+M1, J ) - T( I+M1, J ) + T( I+M1, J )=0 + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL DGELQT3( M2, N-M1, A( I1, I1 ), LDA, + & T( I1, I1 ), LDT, IINFO ) +* +* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,M2 + DO J=1,M1 + T( J, I+M1 ) = (A( J, I+M1 )) + END DO + END DO +* + CALL DTRMM( 'R', 'U', 'T', 'U', M1, M2, ONE, + & A( I1, I1 ), LDA, T( 1, I1 ), LDT ) +* + CALL DGEMM( 'N', 'T', M1, M2, N-M, ONE, A( 1, J1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, I1 ), LDT ) +* + CALL DTRMM( 'L', 'U', 'N', 'N', M1, M2, -ONE, T, LDT, + & T( 1, I1 ), LDT ) +* + CALL DTRMM( 'R', 'U', 'N', 'N', M1, M2, ONE, + & T( I1, I1 ), LDT, T( 1, I1 ), LDT ) +* +* +* +* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] +* [ A(1:N1,J1:N) L2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of DGELQT3 +* + END diff --git a/dspl/liblapack/SRC/dgels.f b/dspl/liblapack/SRC/dgels.f new file mode 100644 index 0000000..33e6d51 --- /dev/null +++ b/dspl/liblapack/SRC/dgels.f @@ -0,0 +1,504 @@ +*> \brief DGELS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELS solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by DGEQRF; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by DGELQF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLABAD, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGELQF, DGEQRF, DLASCL, DLASET, DORMLQ, DORMQR, + $ DTRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB ) + WORK( 1 ) = DBLE( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL DGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL DORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* Underdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL DORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL DGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL DORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL DORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( WSIZE ) +* + RETURN +* +* End of DGELS +* + END diff --git a/dspl/liblapack/SRC/dgelsd.f b/dspl/liblapack/SRC/dgelsd.f new file mode 100644 index 0000000..f2cfd63 --- /dev/null +++ b/dspl/liblapack/SRC/dgelsd.f @@ -0,0 +1,629 @@ +*> \brief DGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELSD computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize 2-norm(| b - A*x |) +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The problem is solved in three steps: +*> (1) Reduce the coefficient matrix A to bidiagonal form with +*> Householder transformations, reducing the original problem +*> into a "bidiagonal least squares problem" (BLS) +*> (2) Solve the BLS using a divide and conquer approach. +*> (3) Apply back all the Householder transformations to solve +*> the original least squares problem. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution +*> matrix X. If m >= n and RANK = n, the residual +*> sum-of-squares for the solution in the i-th column is given +*> by the sum of squares of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,max(M,N)). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK must be at least 1. +*> The exact minimum amount of workspace needed depends on M, +*> N and NRHS. As long as LWORK is at least +*> 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, +*> if M is greater than or equal to N or +*> 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, +*> if M is less than N, the code will execute correctly. +*> SMLSIZ is returned by ILAENV and is equal to the maximum +*> size of the subproblems at the bottom of the computation +*> tree (usually about 25), and +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN), +*> where MINMN = MIN( M,N ). +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEsolve +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK, + $ MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD, + $ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, LOG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* + SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 ) +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + MINWRK = 1 + LIWORK = 1 + MINMN = MAX( 1, MINMN ) + NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) +* + IF( INFO.EQ.0 ) THEN + MAXWRK = 0 + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N+NRHS* + $ ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 3*N+( MM+N )* + $ ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N+( N-1 )* + $ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) ) + WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2 + MAXWRK = MAX( MAXWRK, 3*N+WLALSD ) + MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD ) + END IF + IF( N.GT.M ) THEN + WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*M+4*M+2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )* + $ ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M+2*M ) + END IF + MAXWRK = MAX( MAXWRK, M+NRHS* + $ ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD ) +! XXX: Ensure the Path 2a case below is triggered. The workspace +! calculation should use queries for all routines eventually. + MAXWRK = MAX( MAXWRK, + $ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) ) + ELSE +* +* Path 2 - remaining underdetermined cases. +* + MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N, + $ -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M+NRHS* + $ ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+M* + $ ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M+WLALSD ) + END IF + MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD ) + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + GO TO 10 + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure certain entries of B are zero. +* + IF( M.LT.N ) + $ CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A. +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL). +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A. +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RETURN +* +* End of DGELSD +* + END diff --git a/dspl/liblapack/SRC/dgelss.f b/dspl/liblapack/SRC/dgelss.f new file mode 100644 index 0000000..674a7ba --- /dev/null +++ b/dspl/liblapack/SRC/dgelss.f @@ -0,0 +1,747 @@ +*> \brief DGELSS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELSS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELSS computes the minimum norm solution to a real linear least +*> squares problem: +*> +*> Minimize 2-norm(| b - A*x |). +*> +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +*> X. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the first min(m,n) rows of A are overwritten with +*> its right singular vectors, stored rowwise. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution +*> matrix X. If m >= n and RANK = n, the residual +*> sum-of-squares for the solution in the i-th column is given +*> by the sum of squares of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,max(M,N)). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1, and also: +*> LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + INTEGER LWORK_DGEQRF, LWORK_DORMQR, LWORK_DGEBRD, + $ LWORK_DORMBR, LWORK_DORGBR, LWORK_DORMLQ, + $ LWORK_DGELQF + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV, + $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR, + $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* +* Compute space needed for DGEQRF + CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) + LWORK_DGEQRF=DUM(1) +* Compute space needed for DORMQR + CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B, + $ LDB, DUM(1), -1, INFO ) + LWORK_DORMQR=DUM(1) + MM = N + MAXWRK = MAX( MAXWRK, N + LWORK_DGEQRF ) + MAXWRK = MAX( MAXWRK, N + LWORK_DORMQR ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute workspace needed for DBDSQR +* + BDSPAC = MAX( 1, 5*N ) +* Compute space needed for DGEBRD + CALL DGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_DGEBRD=DUM(1) +* Compute space needed for DORMBR + CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_DORMBR=DUM(1) +* Compute space needed for DORGBR + CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_DORGBR=DUM(1) +* Compute total workspace needed + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORMBR ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC ) + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + IF( N.GT.M ) THEN +* +* Compute workspace needed for DBDSQR +* + BDSPAC = MAX( 1, 5*M ) + MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Compute space needed for DGELQF + CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), + $ -1, INFO ) + LWORK_DGELQF=DUM(1) +* Compute space needed for DGEBRD + CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_DGEBRD=DUM(1) +* Compute space needed for DORMBR + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_DORMBR=DUM(1) +* Compute space needed for DORGBR + CALL DORGBR( 'P', M, M, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_DORGBR=DUM(1) +* Compute space needed for DORMLQ + CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_DORMLQ=DUM(1) +* Compute total workspace needed + MAXWRK = M + LWORK_DGELQF + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DGEBRD ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORMBR ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_DORGBR ) + MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + LWORK_DORMLQ ) + ELSE +* +* Path 2 - underdetermined +* +* Compute space needed for DGEBRD + CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_DGEBRD=DUM(1) +* Compute space needed for DORMBR + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_DORMBR=DUM(1) +* Compute space needed for DORGBR + CALL DORGBR( 'P', M, N, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_DORGBR=DUM(1) + MAXWRK = 3*M + LWORK_DGEBRD + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORMBR ) + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL DCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of L in WORK(IL) and +* multiplying B by transpose of left singular vectors +* (Workspace: need M*M+M+BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IE +* +* Multiply B by right singular vectors of L in WORK(IL) +* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, + $ B, LDB, ZERO, WORK( IWORK ), LDB ) + CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M ) + CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, ZERO, WORK( IWORK ), 1 ) + CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL DCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGELSS +* + END diff --git a/dspl/liblapack/SRC/dgelsy.f b/dspl/liblapack/SRC/dgelsy.f new file mode 100644 index 0000000..1ca238d --- /dev/null +++ b/dspl/liblapack/SRC/dgelsy.f @@ -0,0 +1,479 @@ +*> \brief DGELSY solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGELSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELSY computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by orthogonal transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**T [ inv(T11)*Q1**T*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> +*> This routine is basically identical to the original xGELSX except +*> three differences: +*> o The call to the subroutine xGEQPF has been substituted by the +*> the call to the subroutine xGEQP3. This subroutine is a Blas-3 +*> version of the QR factorization with column pivoting. +*> o Matrix B (the right hand side) is updated with Blas-3. +*> o The permutation of matrix B (the right hand side) is faster and +*> more simple. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of AP, otherwise column i is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of AP +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> The unblocked strategy requires that: +*> LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), +*> where MN = min( M, N ). +*> The block algorithm requires that: +*> LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), +*> where NB is an upper bound on the blocksize returned +*> by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR, +*> and DORMRZ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEsolve +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n +*> E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> +* ===================================================================== + SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKMIN, + $ LWKOPT, MN, NB, NB1, NB2, NB3, NB4 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL ILAENV, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEQP3, DLABAD, DLAIC1, DLASCL, DLASET, + $ DORMQR, DORMRZ, DTRSM, DTZRZF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 ) THEN + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = MN + MAX( 2*MN, N + 1, MN + NRHS ) + LWKOPT = MAX( LWKMIN, + $ MN + 2*N + NB*( N + 1 ), 2*MN + NB*NRHS ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL DGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, INFO ) + WSIZE = MN + WORK( MN+1 ) +* +* workspace: MN+2*N+NB*(N+1). +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL DTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL DORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) ) +* +* workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL DORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A, + $ LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) + END IF +* +* workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL DCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGELSY +* + END diff --git a/dspl/liblapack/SRC/dgemlq.f b/dspl/liblapack/SRC/dgemlq.f new file mode 100644 index 0000000..bb6b286 --- /dev/null +++ b/dspl/liblapack/SRC/dgemlq.f @@ -0,0 +1,284 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by short wide LQ +*> factorization (DGELQ) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> Part of the data structure to represent Q as returned by DGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by DGELQ. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> DLASWLQ or DGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, DGELQ will use either +*> DLASWLQ (if the matrix is wide-and-short) or DGELQT to compute +*> the LQ factorization. +*> This version of DGEMLQ will use either DLAMSWLQ or DGEMLQT to +*> multiply matrix Q by another matrix. +*> Further Details in DLAMSWLQ or DGEMLQT. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAMSWLQ, DGEMLQT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * MB + MN = M + ELSE + LW = M * MB + MN = N + END IF +* + IF( ( NB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, NB - K ) .EQ. 0 ) THEN + NBLCKS = ( MN - K ) / ( NB - K ) + ELSE + NBLCKS = ( MN - K ) / ( NB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( NB.LE.K ) .OR. ( NB.GE.MAX( M, N, K ) ) ) THEN + CALL DGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T( 6 ), MB, C, LDC, WORK, INFO ) + ELSE + CALL DLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ MB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of DGEMLQ +* + END diff --git a/dspl/liblapack/SRC/dgemlqt.f b/dspl/liblapack/SRC/dgemlqt.f new file mode 100644 index 0000000..8cc59b8 --- /dev/null +++ b/dspl/liblapack/SRC/dgemlqt.f @@ -0,0 +1,289 @@ +*> \brief \b DGEMLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEMLQT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMLQT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'T': Q**T C C Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**T +*> +*> generated using the compact WY representation as returned by DGELQT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DGELQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQT in the first K rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DGELQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. The dimension of +*> WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0) THEN + INFO = -5 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL DLARFB( 'L', 'T', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL DLARFB( 'R', 'N', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL DLARFB( 'L', 'N', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL DLARFB( 'R', 'T', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of DGEMLQT +* + END diff --git a/dspl/liblapack/SRC/dgemqr.f b/dspl/liblapack/SRC/dgemqr.f new file mode 100644 index 0000000..8509b13 --- /dev/null +++ b/dspl/liblapack/SRC/dgemqr.f @@ -0,0 +1,285 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (DGEQR) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> Part of the data structure to represent Q as returned by DGEQR. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by DGEQR. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> DLATSQR or DGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, DGEQR will use either +*> DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute +*> the QR factorization. +*> This version of DGEMQR will use either DLAMTSQR or DGEMQRT to +*> multiply matrix Q by another matrix. +*> Further Details in DLATMSQR or DGEMQRT. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMQRT, DLAMTSQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * NB + MN = M + ELSE + LW = MB * NB + MN = N + END IF +* + IF( ( MB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, MB - K ).EQ.0 ) THEN + NBLCKS = ( MN - K ) / ( MB - K ) + ELSE + NBLCKS = ( MN - K ) / ( MB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, MN ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( MB.LE.K ) .OR. ( MB.GE.MAX( M, N, K ) ) ) THEN + CALL DGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, INFO ) + ELSE + CALL DLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of DGEMQR +* + END diff --git a/dspl/liblapack/SRC/dgemqrt.f b/dspl/liblapack/SRC/dgemqrt.f new file mode 100644 index 0000000..12cf929 --- /dev/null +++ b/dspl/liblapack/SRC/dgemqrt.f @@ -0,0 +1,291 @@ +*> \brief \b DGEMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMQRT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'T': Q**T C C Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**T +*> +*> generated using the compact WY representation as returned by DGEQRT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CGEQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRT in the first K columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CGEQRT, stored as a NB-by-N matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. The dimension of +*> WORK is N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF, Q +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + Q = M + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + Q = N + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.Q ) THEN + INFO = -5 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, Q ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL DLARFB( 'L', 'T', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL DLARFB( 'R', 'N', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL DLARFB( 'L', 'N', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL DLARFB( 'R', 'T', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of DGEMQRT +* + END diff --git a/dspl/liblapack/SRC/dgeql2.f b/dspl/liblapack/SRC/dgeql2.f new file mode 100644 index 0000000..539ef29 --- /dev/null +++ b/dspl/liblapack/SRC/dgeql2.f @@ -0,0 +1,193 @@ +*> \brief \b DGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQL2 computes a QL factorization of a real m by n matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the m by n lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + CALL DLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL DLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), + $ A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DGEQL2 +* + END diff --git a/dspl/liblapack/SRC/dgeqlf.f b/dspl/liblapack/SRC/dgeqlf.f new file mode 100644 index 0000000..e8c3f8e --- /dev/null +++ b/dspl/liblapack/SRC/dgeqlf.f @@ -0,0 +1,287 @@ +*> \brief \b DGEQLF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQLF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQLF computes a QL factorization of a real M-by-N matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the M-by-N lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQL2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'DGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL DGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL DGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQLF +* + END diff --git a/dspl/liblapack/SRC/dgeqp3.f b/dspl/liblapack/SRC/dgeqp3.f new file mode 100644 index 0000000..2b9faf6 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqp3.f @@ -0,0 +1,361 @@ +*> \brief \b DGEQP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQP3 computes a QR factorization with column pivoting of a +*> matrix A: A*P = Q*R using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper trapezoidal matrix R; the elements below +*> the diagonal, together with the array TAU, represent the +*> orthogonal matrix Q as a product of min(M,N) elementary +*> reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(J).ne.0, the J-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(J)=0, +*> the J-th column of A is a free column. +*> On exit, if JPVT(J)=K, then the J-th column of A*P was the +*> the K-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 3*N+1. +*> For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real/complex vector +*> with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +*> A(i+1:m,i), and tau in TAU(i). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +* ===================================================================== + SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DLAQP2, DLAQPS, DORMQR, DSWAP, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DNRM2 + EXTERNAL ILAENV, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* ==================== +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = 3*N + 1 + NB = ILAENV( INB, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = 2*N + ( N + 1 )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL DSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL DGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, +*CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) + CALL DORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU, + $ A( 1, NA+1 ), LDA, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'DGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'DGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = 2*SN + ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = ( LWORK-2*SN ) / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'DGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + WORK( J ) = DNRM2( SM, A( NFXD+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL DLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL DLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQP3 +* + END diff --git a/dspl/liblapack/SRC/dgeqr.f b/dspl/liblapack/SRC/dgeqr.f new file mode 100644 index 0000000..d0a1a18 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqr.f @@ -0,0 +1,307 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DGEQR computes a QR factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R +*> (R is upper triangular if M >= N); +*> the elements below the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any QR factorization algorithm they wish. The triangular +*> (trapezoidal) R has to be stored in the upper part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> DLATSQR or DGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, DGEQR will use either +*> DLATSQR (if the matrix is tall-and-skinny) or DGEQRT to compute +*> the QR factorization. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLATSQR, DGEQRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'DGEQR ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'DGEQR ', ' ', M, N, 2, -1 ) + ELSE + MB = M + NB = 1 + END IF + IF( MB.GT.M .OR. MB.LE.N ) MB = M + IF( NB.GT.MIN( M, N ) .OR. NB.LT.1 ) NB = 1 + MINTSZ = N + 5 + IF( MB.GT.N .AND. M.GT.N ) THEN + IF( MOD( M - N, MB - N ).EQ.0 ) THEN + NBLCKS = ( M - N ) / ( MB - N ) + ELSE + NBLCKS = ( M - N ) / ( MB - N ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) .OR. LWORK.LT.NB*N ) + $ .AND. ( LWORK.GE.N ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + NB = 1 + MB = M + END IF + IF( LWORK.LT.NB*N ) THEN + LMINWS = .TRUE. + NB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, N*NB ) ) .AND. ( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = NB*N*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, NB*N ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF( ( M.LE.N ) .OR. ( MB.LE.N ) .OR. ( MB.GE.M ) ) THEN + CALL DGEQRT( M, N, NB, A, LDA, T( 6 ), NB, WORK, INFO ) + ELSE + CALL DLATSQR( M, N, MB, NB, A, LDA, T( 6 ), NB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, NB*N ) +* + RETURN +* +* End of DGEQR +* + END diff --git a/dspl/liblapack/SRC/dgeqr2.f b/dspl/liblapack/SRC/dgeqr2.f new file mode 100644 index 0000000..c1e91e9 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqr2.f @@ -0,0 +1,192 @@ +*> \brief \b DGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQR2 computes a QR factorization of a real m by n matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGEQR2 +* + END diff --git a/dspl/liblapack/SRC/dgeqr2p.f b/dspl/liblapack/SRC/dgeqr2p.f new file mode 100644 index 0000000..921f799 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqr2p.f @@ -0,0 +1,195 @@ +*> \brief \b DGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagonal elements using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQR2P + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQR2P computes a QR factorization of a real m by n matrix A: +*> A = Q * R. The diagonal entries of R are nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R are +*> nonnegative; the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFGP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQR2P', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL DLARFGP( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of DGEQR2P +* + END diff --git a/dspl/liblapack/SRC/dgeqrf.f b/dspl/liblapack/SRC/dgeqrf.f new file mode 100644 index 0000000..83d7d8d --- /dev/null +++ b/dspl/liblapack/SRC/dgeqrf.f @@ -0,0 +1,270 @@ +*> \brief \b DGEQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRF computes a QR factorization of a real M-by-N matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQRF +* + END diff --git a/dspl/liblapack/SRC/dgeqrfp.f b/dspl/liblapack/SRC/dgeqrfp.f new file mode 100644 index 0000000..d182f98 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqrfp.f @@ -0,0 +1,273 @@ +*> \brief \b DGEQRFP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRFP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRFP computes a QR factorization of a real M-by-N matrix A: +*> A = Q * R. The diagonal entries of R are nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are nonnegative; the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DGEQR2P, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRFP', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL DGEQR2P( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL DGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGEQRFP +* + END diff --git a/dspl/liblapack/SRC/dgeqrt.f b/dspl/liblapack/SRC/dgeqrt.f new file mode 100644 index 0000000..c7d9320 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqrt.f @@ -0,0 +1,218 @@ +*> \brief \b DGEQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRT computes a blocked QR factorization of a real M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if M >= N); the elements below the diagonal +*> are the columns of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K + LOGICAL USE_RECURSIVE_QR + PARAMETER( USE_RECURSIVE_QR=.TRUE. ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRT2, DGEQRT3, DLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NB.LT.1 .OR. ( NB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block A(I:M,I:I+IB-1) +* + IF( USE_RECURSIVE_QR ) THEN + CALL DGEQRT3( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + ELSE + CALL DGEQRT2( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + END IF + IF( I+IB.LE.N ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the left +* + CALL DLARFB( 'L', 'T', 'F', 'C', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, WORK , N-I-IB+1 ) + END IF + END DO + RETURN +* +* End of DGEQRT +* + END diff --git a/dspl/liblapack/SRC/dgeqrt2.f b/dspl/liblapack/SRC/dgeqrt2.f new file mode 100644 index 0000000..138dd4d --- /dev/null +++ b/dspl/liblapack/SRC/dgeqrt2.f @@ -0,0 +1,227 @@ +*> \brief \b DGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRT2 computes a QR factorization of a real M-by-N matrix A, +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER( ONE = 1.0D+00, ZERO = 0.0D+00 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII, ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DGEMV, DGER, DTRMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRT2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO I = 1, K +* +* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(I:M,I+1:N) from the left +* + AII = A( I, I ) + A( I, I ) = ONE +* +* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] +* + CALL DGEMV( 'T',M-I+1, N-I, ONE, A( I, I+1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, N ), 1 ) +* +* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H +* + ALPHA = -(T( I, 1 )) + CALL DGER( M-I+1, N-I, ALPHA, A( I, I ), 1, + $ T( 1, N ), 1, A( I, I+1 ), LDA ) + A( I, I ) = AII + END IF + END DO +* + DO I = 2, N + AII = A( I, I ) + A( I, I ) = ONE +* +* T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) +* + ALPHA = -T( I, 1 ) + CALL DGEMV( 'T', M-I+1, I-1, ALPHA, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, I ), 1 ) + A( I, I ) = AII +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL DTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1) = ZERO + END DO + +* +* End of DGEQRT2 +* + END diff --git a/dspl/liblapack/SRC/dgeqrt3.f b/dspl/liblapack/SRC/dgeqrt3.f new file mode 100644 index 0000000..efec078 --- /dev/null +++ b/dspl/liblapack/SRC/dgeqrt3.f @@ -0,0 +1,257 @@ +*> \brief \b DGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEQRT3 recursively computes a QR factorization of a real M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE DGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+00 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, N1, N2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DTRMM, DGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N .LT. 0 ) THEN + INFO = -2 + ELSE IF( M .LT. N ) THEN + INFO = -1 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGEQRT3', -INFO ) + RETURN + END IF +* + IF( N.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL DLARFG( M, A(1,1), A( MIN( 2, M ), 1 ), 1, T(1,1) ) +* + ELSE +* +* Otherwise, split A into blocks... +* + N1 = N/2 + N2 = N-N1 + J1 = MIN( N1+1, N ) + I1 = MIN( N+1, M ) +* +* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL DGEQRT3( M, N1, A, LDA, T, LDT, IINFO ) +* +* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] +* + DO J=1,N2 + DO I=1,N1 + T( I, J+N1 ) = A( I, J+N1 ) + END DO + END DO + CALL DTRMM( 'L', 'L', 'T', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + CALL DGEMM( 'T', 'N', N1, N2, M-N1, ONE, A( J1, 1 ), LDA, + & A( J1, J1 ), LDA, ONE, T( 1, J1 ), LDT) +* + CALL DTRMM( 'L', 'U', 'T', 'N', N1, N2, ONE, + & T, LDT, T( 1, J1 ), LDT ) +* + CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( J1, 1 ), LDA, + & T( 1, J1 ), LDT, ONE, A( J1, J1 ), LDA ) +* + CALL DTRMM( 'L', 'L', 'N', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + DO J=1,N2 + DO I=1,N1 + A( I, J+N1 ) = A( I, J+N1 ) - T( I, J+N1 ) + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL DGEQRT3( M-N1, N2, A( J1, J1 ), LDA, + & T( J1, J1 ), LDT, IINFO ) +* +* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,N1 + DO J=1,N2 + T( I, J+N1 ) = (A( J+N1, I )) + END DO + END DO +* + CALL DTRMM( 'R', 'L', 'N', 'U', N1, N2, ONE, + & A( J1, J1 ), LDA, T( 1, J1 ), LDT ) +* + CALL DGEMM( 'T', 'N', N1, N2, M-N, ONE, A( I1, 1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, J1 ), LDT ) +* + CALL DTRMM( 'L', 'U', 'N', 'N', N1, N2, -ONE, T, LDT, + & T( 1, J1 ), LDT ) +* + CALL DTRMM( 'R', 'U', 'N', 'N', N1, N2, ONE, + & T( J1, J1 ), LDT, T( 1, J1 ), LDT ) +* +* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] +* [ 0 R2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of DGEQRT3 +* + END diff --git a/dspl/liblapack/SRC/dgerfs.f b/dspl/liblapack/SRC/dgerfs.f new file mode 100644 index 0000000..a6f14e2 --- /dev/null +++ b/dspl/liblapack/SRC/dgerfs.f @@ -0,0 +1,438 @@ +*> \brief \b DGERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGERFS improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates for +*> the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGETRS, DLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = ABS( X( K, J ) ) + DO 40 I = 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ), + $ N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DGERFS +* + END diff --git a/dspl/liblapack/SRC/dgerfsx.f b/dspl/liblapack/SRC/dgerfsx.f new file mode 100644 index 0000000..aafca8d --- /dev/null +++ b/dspl/liblapack/SRC/dgerfsx.f @@ -0,0 +1,731 @@ +*> \brief \b DGERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ), WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGERFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. +*> If R is accessed, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. +*> If C is accessed, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ), WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DGECON, DLA_GERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL DLAMCH, DLANGE, DLA_GERCOND + DOUBLE PRECISION DLAMCH, DLANGE, DLA_GERCOND + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = DLANGE( NORM, N, N, A, LDA, WORK ) + CALL DGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + IF ( NOTRAN ) THEN + CALL DLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK(N+1), WORK(1), WORK(2*N+1), + $ WORK(1), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + ELSE + CALL DLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK(N+1), WORK(1), WORK(2*N+1), + $ WORK(1), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = DLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ -1, C, INFO, WORK, IWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = DLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ -1, R, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = DLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ 0, R, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = DLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, + $ IPIV, 1, X(1,J), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF +* + RETURN +* +* End of DGERFSX +* + END diff --git a/dspl/liblapack/SRC/dgerq2.f b/dspl/liblapack/SRC/dgerq2.f new file mode 100644 index 0000000..b1713c1 --- /dev/null +++ b/dspl/liblapack/SRC/dgerq2.f @@ -0,0 +1,193 @@ +*> \brief \b DGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGERQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGERQ2 computes an RQ factorization of a real m by n matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the m by n upper trapezoidal matrix R; the remaining +*> elements, with the array TAU, represent the orthogonal matrix +*> Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + DOUBLE PRECISION AII +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL DLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL DLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DGERQ2 +* + END diff --git a/dspl/liblapack/SRC/dgerqf.f b/dspl/liblapack/SRC/dgerqf.f new file mode 100644 index 0000000..20f2668 --- /dev/null +++ b/dspl/liblapack/SRC/dgerqf.f @@ -0,0 +1,287 @@ +*> \brief \b DGERQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGERQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGERQF computes an RQ factorization of a real M-by-N matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of min(m,n) elementary +*> reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL DGERQ2, DLARFB, DLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL DGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL DLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL DGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of DGERQF +* + END diff --git a/dspl/liblapack/SRC/dgesc2.f b/dspl/liblapack/SRC/dgesc2.f new file mode 100644 index 0000000..2f01a76 --- /dev/null +++ b/dspl/liblapack/SRC/dgesc2.f @@ -0,0 +1,201 @@ +*> \brief \b DGESC2 solves a system of linear equations using the LU factorization with complete pivoting computed by sgetc2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* .. Scalar Arguments .. +* INTEGER LDA, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* DOUBLE PRECISION A( LDA, * ), RHS( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESC2 solves a system of linear equations +*> +*> A * X = scale* RHS +*> +*> with a general N-by-N matrix A using the LU factorization with +*> complete pivoting computed by DGETC2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix A computed by DGETC2: A = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is DOUBLE PRECISION array, dimension (N). +*> On entry, the right hand side vector b. +*> On exit, the solution vector X. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> 0 <= SCALE <= 1 to prevent owerflow in the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION A( LDA, * ), RHS( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, EPS, SMLNUM, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLASWP, DSCAL, DLABAD +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Set constant to control owerflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL DLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = IDAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = ( ONE / TWO ) / ABS( RHS( I ) ) + CALL DSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*TEMP + END IF +* + DO 40 I = N, 1, -1 + TEMP = ONE / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL DLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of DGESC2 +* + END diff --git a/dspl/liblapack/SRC/dgesdd.f b/dspl/liblapack/SRC/dgesdd.f new file mode 100644 index 0000000..926607f --- /dev/null +++ b/dspl/liblapack/SRC/dgesdd.f @@ -0,0 +1,1548 @@ +*> \brief \b DGESDD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESDD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESDD computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and right singular +*> vectors. If singular vectors are desired, it uses a +*> divide-and-conquer algorithm. +*> +*> The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns VT = V**T, not V. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U and all N rows of V**T are +*> returned in the arrays U and VT; +*> = 'S': the first min(M,N) columns of U and the first +*> min(M,N) rows of V**T are returned in the arrays U +*> and VT; +*> = 'O': If M >= N, the first N columns of U are overwritten +*> on the array A and all rows of V**T are returned in +*> the array VT; +*> otherwise, all columns of U are returned in the +*> array U and the first M rows of V**T are overwritten +*> in the array A; +*> = 'N': no columns of U or rows of V**T are computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBZ = 'O', A is overwritten with the first N columns +*> of U (the left singular vectors, stored +*> columnwise) if M >= N; +*> A is overwritten with the first M rows +*> of V**T (the right singular vectors, stored +*> rowwise) otherwise. +*> if JOBZ .ne. 'O', the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,UCOL) +*> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +*> UCOL = min(M,N) if JOBZ = 'S'. +*> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +*> orthogonal matrix U; +*> if JOBZ = 'S', U contains the first min(M,N) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT,N) +*> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +*> N-by-N orthogonal matrix V**T; +*> if JOBZ = 'S', VT contains the first min(M,N) rows of +*> V**T (the right singular vectors, stored rowwise); +*> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; +*> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +*> if JOBZ = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> If LWORK = -1, a workspace query is assumed. The optimal +*> size for the WORK array is calculated and stored in WORK(1), +*> and no other work except argument checking is performed. +*> +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ). +*> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ). +*> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. +*> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. +*> These are not tight minimums in all cases; see comments inside code. +*> For good performance, LWORK should generally be larger; +*> a query is recommended. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: DBDSDC did not converge, updating process failed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL, + $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR, NWORK, WRKBL + INTEGER LWORK_DGEBRD_MN, LWORK_DGEBRD_MM, + $ LWORK_DGEBRD_NN, LWORK_DGELQF_MN, + $ LWORK_DGEQRF_MN, + $ LWORK_DORGBR_P_MM, LWORK_DORGBR_Q_NN, + $ LWORK_DORGLQ_MN, LWORK_DORGLQ_NN, + $ LWORK_DORGQR_MM, LWORK_DORGQR_MN, + $ LWORK_DORMBR_PRT_MM, LWORK_DORMBR_QLN_MM, + $ LWORK_DORMBR_PRT_MN, LWORK_DORMBR_QLN_MN, + $ LWORK_DORMBR_PRT_NN, LWORK_DORMBR_QLN_NN + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSDC, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE, LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace allocated at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + BDSPAC = 0 + MNTHR = INT( MINMN*11.0D0 / 6.0D0 ) + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSDC +* + IF( WNTQN ) THEN +* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) +* keep 7*N for backwards compatability. + BDSPAC = 7*N + ELSE + BDSPAC = 3*N*N + 4*N + END IF +* +* Compute space preferred for each routine + CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD_MN = INT( DUM(1) ) +* + CALL DGEBRD( N, N, DUM(1), N, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD_NN = INT( DUM(1) ) +* + CALL DGEQRF( M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_DGEQRF_MN = INT( DUM(1) ) +* + CALL DORGBR( 'Q', N, N, N, DUM(1), N, DUM(1), DUM(1), -1, + $ IERR ) + LWORK_DORGBR_Q_NN = INT( DUM(1) ) +* + CALL DORGQR( M, M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_MM = INT( DUM(1) ) +* + CALL DORGQR( M, N, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_MN = INT( DUM(1) ) +* + CALL DORMBR( 'P', 'R', 'T', N, N, N, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_DORMBR_PRT_NN = INT( DUM(1) ) +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_DORMBR_QLN_NN = INT( DUM(1) ) +* + CALL DORMBR( 'Q', 'L', 'N', M, N, N, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_DORMBR_QLN_MN = INT( DUM(1) ) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_DORMBR_QLN_MM = INT( DUM(1) ) +* + IF( M.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* + WRKBL = N + LWORK_DGEQRF_MN + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN ) + MAXWRK = MAX( WRKBL, BDSPAC + N ) + MINWRK = BDSPAC + N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* + WRKBL = N + LWORK_DGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + 2*N*N + MINWRK = BDSPAC + 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* + WRKBL = N + LWORK_DGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* + WRKBL = N + LWORK_DGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_MM ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + N*N + MINWRK = N*N + MAX( 3*N + BDSPAC, N + M ) + END IF + ELSE +* +* Path 5 (M >= N, but not much larger) +* + WRKBL = 3*N + LWORK_DGEBRD_MN + IF( WNTQN ) THEN +* Path 5n (M >= N, jobz='N') + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQO ) THEN +* Path 5o (M >= N, jobz='O') + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + M*N + MINWRK = 3*N + MAX( M, N*N + BDSPAC ) + ELSE IF( WNTQS ) THEN +* Path 5s (M >= N, jobz='S') + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQA ) THEN +* Path 5a (M >= N, jobz='A') + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + END IF + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSDC +* + IF( WNTQN ) THEN +* dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) +* keep 7*N for backwards compatability. + BDSPAC = 7*M + ELSE + BDSPAC = 3*M*M + 4*M + END IF +* +* Compute space preferred for each routine + CALL DGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD_MN = INT( DUM(1) ) +* + CALL DGEBRD( M, M, A, M, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD_MM = INT( DUM(1) ) +* + CALL DGELQF( M, N, A, M, DUM(1), DUM(1), -1, IERR ) + LWORK_DGELQF_MN = INT( DUM(1) ) +* + CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_NN = INT( DUM(1) ) +* + CALL DORGLQ( M, N, M, A, M, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_MN = INT( DUM(1) ) +* + CALL DORGBR( 'P', M, M, M, A, N, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGBR_P_MM = INT( DUM(1) ) +* + CALL DORMBR( 'P', 'R', 'T', M, M, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_DORMBR_PRT_MM = INT( DUM(1) ) +* + CALL DORMBR( 'P', 'R', 'T', M, N, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_DORMBR_PRT_MN = INT( DUM(1) ) +* + CALL DORMBR( 'P', 'R', 'T', N, N, M, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_DORMBR_PRT_NN = INT( DUM(1) ) +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_DORMBR_QLN_MM = INT( DUM(1) ) +* + IF( N.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* + WRKBL = M + LWORK_DGELQF_MN + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM ) + MAXWRK = MAX( WRKBL, BDSPAC + M ) + MINWRK = BDSPAC + M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* + WRKBL = M + LWORK_DGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_MN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + 2*M*M + MINWRK = BDSPAC + 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* + WRKBL = M + LWORK_DGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_MN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* + WRKBL = M + LWORK_DGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_NN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*M + MINWRK = M*M + MAX( 3*M + BDSPAC, M + N ) + END IF + ELSE +* +* Path 5t (N > M, but not much larger) +* + WRKBL = 3*M + LWORK_DGEBRD_MN + IF( WNTQN ) THEN +* Path 5tn (N > M, jobz='N') + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQO ) THEN +* Path 5to (N > M, jobz='O') + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*N + MINWRK = 3*M + MAX( N, M*M + BDSPAC ) + ELSE IF( WNTQS ) THEN +* Path 5ts (N > M, jobz='S') + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_MN ) + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQA ) THEN +* Path 5ta (N > M, jobz='A') + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + END IF + END IF + END IF + + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N [tau] + N [work] +* Workspace: prefer N [tau] + N*NB [work] +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Zero out below R +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* Workspace: need 3*N [e, tauq, taup] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + N +* +* Perform bidiagonal SVD, computing singular values only +* Workspace: need N [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ = 'O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is LDWRKR by N +* + IF( LWORK .GE. LDA*N + N*N + 3*N + BDSPAC ) THEN + LDWRKR = LDA + ELSE + LDWRKR = ( LWORK - N*N - 3*N - BDSPAC ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1), + $ LDWRKR ) +* +* Generate Q in A +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* WORK(IU) is N by N +* + IU = NWORK + NWORK = IU + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R +* and VT by right singular vectors of R +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] +* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M - I + 1, LDWRKR ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), N, ZERO, WORK( IR ), + $ LDWRKR ) + CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1), + $ LDWRKR ) +* +* Generate Q in A +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagoal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of R and VT +* by right singular vectors of R +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* Workspace: need N*N [R] +* + CALL DLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ), + $ LDWRKR, ZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* Workspace: need N*N [U] + N [tau] + N [work] +* Workspace: prefer N*N [U] + N [tau] + N*NB [work] +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* Workspace: need N*N [U] + N [tau] + M [work] +* Workspace: prefer N*N [U] + N [tau] + M*NB [work] + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Produce R in A, zeroing out other entries +* + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R and VT +* by right singular vectors of R +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* Workspace: need N*N [U] +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ), + $ LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 5 (M >= N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* Workspace: need 3*N [e, tauq, taup] + M [work] +* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5n (M >= N, JOBZ='N') +* Perform bidiagonal SVD, only computing singular values +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 5o (M >= N, JOBZ='O') + IU = NWORK + IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + NWORK = IU + LDWRKU*N + CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IU ), + $ LDWRKU ) +* IR is unused; silence compile warnings + IR = -1 + ELSE +* +* WORK( IU ) is N by N +* + LDWRKU = N + NWORK = IU + LDWRKU*N +* +* WORK(IR) is LDWRKR by N +* + IR = NWORK + LDWRKR = ( LWORK - N*N - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), + $ LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite VT by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN +* +* Path 5o-fast +* Overwrite WORK(IU) by left singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Copy left singular vectors of A from WORK(IU) to A +* + CALL DLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Path 5o-slow +* Generate Q in A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of +* bidiagonal matrix in WORK(IU), storing result in +* WORK(IR) and copying to A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R] +* + DO 20 I = 1, M, LDWRKR + CHUNK = MIN( M - I + 1, LDWRKR ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, ZERO, + $ WORK( IR ), LDWRKR ) + CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 20 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Path 5s (M >= N, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL DLASET( 'F', M, N, ZERO, ZERO, U, LDU ) + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Path 5a (M >= N, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL DLASET( 'F', M, M, ZERO, ZERO, U, LDU ) + CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of U to identity matrix +* + IF( M.GT.N ) THEN + CALL DLASET( 'F', M - N, M - N, ZERO, ONE, U(N+1,N+1), + $ LDU ) + END IF +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + M [work] +* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M [tau] + M [work] +* Workspace: prefer M [tau] + M*NB [work] +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Zero out above L +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* Workspace: need 3*M [e, tauq, taup] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + M +* +* Perform bidiagonal SVD, computing singular values only +* Workspace: need M [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* WORK(IL) is M by M; it is later resized to M by chunk for gemm +* + IL = IVT + M*M + IF( LWORK .GE. M*N + M*M + 3*M + BDSPAC ) THEN + LDWRKL = M + CHUNK = N + ELSE + LDWRKL = M + CHUNK = ( LWORK - M*M ) / M + END IF + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO, + $ WORK( IL + LDWRKL ), LDWRKL ) +* +* Generate Q in A +* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U, and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), M, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), M, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by Q +* in A, storing result in WORK(IL) and copying to A +* Workspace: need M*M [VT] + M*M [L] +* Workspace: prefer M*M [VT] + M*N [L] +* At this point, L is resized as M by chunk. +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N - I + 1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M, + $ A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL ) + CALL DLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [L] + M [tau] + M*NB [work] +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL DLASET( 'U', M - 1, M - 1, ZERO, ZERO, + $ WORK( IL + LDWRKL ), LDWRKL ) +* +* Generate Q in A +* Workspace: need M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [L] + M [tau] + M*NB [work] +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU). +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of L and VT +* by right singular vectors of L +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by +* Q in A, storing result in VT +* Workspace: need M*M [L] +* + CALL DLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL, + $ A, LDA, ZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* Workspace: need M*M [VT] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M [tau] + M*NB [work] +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* Workspace: need M*M [VT] + M [tau] + N [work] +* Workspace: prefer M*M [VT] + M [tau] + N*NB [work] +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Produce L in A, zeroing out other entries +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work] +* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* Workspace: need M*M [VT] +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 5t (N > M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* Workspace: need 3*M [e, tauq, taup] + N [work] +* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5tn (N > M, JOBZ='N') +* Perform bidiagonal SVD, only computing singular values +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL DBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 5to (N > M, JOBZ='O') + LDWKVT = M + IVT = NWORK + IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN +* +* WORK( IVT ) is M by N +* + CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N +* IL is unused; silence compile warnings + IL = -1 + ELSE +* +* WORK( IVT ) is M by M +* + NWORK = IVT + LDWKVT*M + IL = NWORK +* +* WORK(IL) is M by CHUNK +* + CHUNK = ( LWORK - M*M - 3*M ) / M + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC +* + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN +* +* Path 5to-fast +* Overwrite WORK(IVT) by left singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] +* + CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Copy right singular vectors of A from WORK(IVT) to A +* + CALL DLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Path 5to-slow +* Generate P**T in A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by right singular vectors of +* bidiagonal matrix in WORK(IVT), storing result in +* WORK(IL) and copying to A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L] +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N - I + 1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), + $ LDWKVT, A( 1, I ), LDA, ZERO, + $ WORK( IL ), M ) + CALL DLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ), + $ LDA ) + 40 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Path 5ts (N > M, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL DLASET( 'F', M, N, ZERO, ZERO, VT, LDVT ) + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Path 5ta (N > M, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL DLASET( 'F', N, N, ZERO, ZERO, VT, LDVT ) + CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of VT to identity matrix +* + IF( N.GT.M ) THEN + CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT(M+1,M+1), + $ LDVT ) + END IF +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + N [work] +* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] +* + CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGESDD +* + END diff --git a/dspl/liblapack/SRC/dgesv.f b/dspl/liblapack/SRC/dgesv.f new file mode 100644 index 0000000..23999e1 --- /dev/null +++ b/dspl/liblapack/SRC/dgesv.f @@ -0,0 +1,179 @@ +*> \brief DGESV computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as +*> A = P * L * U, +*> where P is a permutation matrix, L is unit lower triangular, and U is +*> upper triangular. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DGETRF, DGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL DGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of DGESV +* + END diff --git a/dspl/liblapack/SRC/dgesvd.f b/dspl/liblapack/SRC/dgesvd.f new file mode 100644 index 0000000..ddf0bd5 --- /dev/null +++ b/dspl/liblapack/SRC/dgesvd.f @@ -0,0 +1,3504 @@ +*> \brief DGESVD computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVD computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U are returned in array U: +*> = 'S': the first min(m,n) columns of U (the left singular +*> vectors) are returned in the array U; +*> = 'O': the first min(m,n) columns of U (the left singular +*> vectors) are overwritten on the array A; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'A': all N rows of V**T are returned in the array VT; +*> = 'S': the first min(m,n) rows of V**T (the right singular +*> vectors) are returned in the array VT; +*> = 'O': the first min(m,n) rows of V**T (the right singular +*> vectors) are overwritten on the array A; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> +*> JOBVT and JOBU cannot both be 'O'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBU = 'O', A is overwritten with the first min(m,n) +*> columns of U (the left singular vectors, +*> stored columnwise); +*> if JOBVT = 'O', A is overwritten with the first min(m,n) +*> rows of V**T (the right singular vectors, +*> stored rowwise); +*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +*> are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,UCOL) +*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +*> if JOBU = 'S', U contains the first min(m,n) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBU = 'N' or 'O', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'S' or 'A', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT,N) +*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +*> V**T; +*> if JOBVT = 'S', VT contains the first min(m,n) rows of +*> V**T (the right singular vectors, stored rowwise); +*> if JOBVT = 'N' or 'O', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +*> superdiagonal elements of an upper bidiagonal matrix B +*> whose diagonal is in S (not necessarily sorted). B +*> satisfies A = U * B * VT, so it has the same singular values +*> as A, and singular vectors related by U and VT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): +*> - PATH 1 (M much larger than N, JOBU='N') +*> - PATH 1t (N much larger than M, JOBVT='N') +*> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if DBDSQR did not converge, INFO specifies how many +*> superdiagonals of an intermediate bidiagonal form B +*> did not converge to zero. See the description of WORK +*> above for details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEsing +* +* ===================================================================== + SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, + $ VT, LDVT, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M, + $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q, + $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSQR +* + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*N +* Compute space needed for DGEQRF + CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DGEQRF = INT( DUM(1) ) +* Compute space needed for DORGQR + CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_N = INT( DUM(1) ) + CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGQR_M = INT( DUM(1) ) +* Compute space needed for DGEBRD + CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD = INT( DUM(1) ) +* Compute space needed for DORGBR P + CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P = INT( DUM(1) ) +* Compute space needed for DORGBR Q + CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q = INT( DUM(1) ) +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + LWORK_DGEQRF + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N ) + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N ) + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_DGEQRF + WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N + M, BDSPAC ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD = INT( DUM(1) ) + MAXWRK = 3*N + LWORK_DGEBRD + IF( WNTUS .OR. WNTUO ) THEN + CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q ) + END IF + IF( WNTUA ) THEN + CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q ) + END IF + IF( .NOT.WNTVN ) THEN + MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N + M, BDSPAC ) + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for DBDSQR +* + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*M +* Compute space needed for DGELQF + CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DGELQF = INT( DUM(1) ) +* Compute space needed for DORGLQ + CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_N = INT( DUM(1) ) + CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_DORGLQ_M = INT( DUM(1) ) +* Compute space needed for DGEBRD + CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD = INT( DUM(1) ) +* Compute space needed for DORGBR P + CALL DORGBR( 'P', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P = INT( DUM(1) ) +* Compute space needed for DORGBR Q + CALL DORGBR( 'Q', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_Q = INT( DUM(1) ) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + LWORK_DGELQF + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DGEBRD ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M ) + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M ) + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + LWORK_DGELQF + WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P ) + WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M + N, BDSPAC ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_DGEBRD = INT( DUM(1) ) + MAXWRK = 3*M + LWORK_DGEBRD + IF( WNTVS .OR. WNTVO ) THEN +* Compute space needed for DORGBR P + CALL DORGBR( 'P', M, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P ) + END IF + IF( WNTVA ) THEN + CALL DORGBR( 'P', N, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_DORGBR_P = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P ) + END IF + IF( .NOT.WNTUN ) THEN + MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M + N, BDSPAC ) + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + IF( N .GT. 1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) + END IF + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N + N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N + 2*N, prefer N*N + M*N + N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N + 4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N + 4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N + 4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N + M, prefer N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N + 4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N + 4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N + M, prefer N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N + N + M, prefer N*N + N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB) +* + CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N + 4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N + BDSPAC) +* + CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL DLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N + N*NB) +* + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N + M, prefer N + M*NB) +* + CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N + 2*N*NB) +* + CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N + M, prefer 3*N + M*NB) +* + CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB) +* + CALL DLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N + N*NB) +* + CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB) +* + CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M + 2*M, prefer M*M + M*N + M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M + 4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M + 4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M + 4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M + 4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M + N, prefer M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M + 4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M + 4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M + N, prefer M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M + M + N, prefer M*M + M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB) +* + CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M + BDSPAC) +* + CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M + M*NB) +* + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M + N, prefer M + N*NB) +* + CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M + 2*M*NB) +* + CALL DGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M + N, prefer 3*M + N*NB) +* + CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB) +* + CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) +* + CALL DLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB) +* + CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB) +* + CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M + M*NB) +* + CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If DBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGESVD +* + END diff --git a/dspl/liblapack/SRC/dgesvdx.f b/dspl/liblapack/SRC/dgesvdx.f new file mode 100644 index 0000000..3937c13 --- /dev/null +++ b/dspl/liblapack/SRC/dgesvdx.f @@ -0,0 +1,834 @@ +*> \brief DGESVDX computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, +* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, +* $ LWORK, IWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT, RANGE +* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS +* DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVDX computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> DGESVDX uses an eigenvalue problem for obtaining the SVD, which +*> allows for the computation of a subset of singular values and +*> vectors. See DBDSVDX for details. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'V': the first min(m,n) columns of U (the left singular +*> vectors) or as specified by RANGE are returned in +*> the array U; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'V': the first min(m,n) rows of V**T (the right singular +*> vectors) or as specified by RANGE are returned in +*> the array VT; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found, +*> 0 <= NS <= min(M,N). +*> If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,UCOL) +*> If JOBU = 'V', U contains columns of U (the left singular +*> vectors, stored columnwise) as specified by RANGE; if +*> JOBU = 'N', U is not referenced. +*> Note: The user must ensure that UCOL >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'V', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT,N) +*> If JOBVT = 'V', VT contains the rows of V**T (the right singular +*> vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', +*> VT is not referenced. +*> Note: The user must ensure that LDVT >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'V', LDVT >= NS (see above). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see +*> comments inside the code): +*> - PATH 1 (M much larger than N) +*> - PATH 1t (N much larger than M) +*> LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*MIN(M,N)) +*> If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, +*> then IWORK contains the indices of the eigenvectors that failed +*> to converge in DBDSVDX/DSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in DBDSVDX/DSTEVX. +*> if INFO = N*2 + 1, an internal error occurred in +*> DBDSVDX +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEsing +* +* ===================================================================== + SUBROUTINE DGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, + $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, + $ LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT, RANGE + INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS + DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + CHARACTER JOBZ, RNGTGK + LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT + INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL, + $ ITAU, ITAUP, ITAUQ, ITEMP, ITGKZ, IUTGK, + $ J, MAXWRK, MINMN, MINWRK, MNTHR + DOUBLE PRECISION ABSTOL, ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSVDX, DGEBRD, DGELQF, DGEQRF, DLACPY, + $ DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, + $ DCOPY, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + NS = 0 + INFO = 0 + ABSTOL = 2*DLAMCH('S') + LQUERY = ( LWORK.EQ.-1 ) + MINMN = MIN( M, N ) + + WANTU = LSAME( JOBU, 'V' ) + WANTVT = LSAME( JOBVT, 'V' ) + IF( WANTU .OR. WANTVT ) THEN + JOBZ = 'V' + ELSE + JOBZ = 'N' + END IF + ALLS = LSAME( RANGE, 'A' ) + VALS = LSAME( RANGE, 'V' ) + INDS = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.LSAME( JOBU, 'V' ) .AND. + $ .NOT.LSAME( JOBU, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( JOBVT, 'V' ) .AND. + $ .NOT.LSAME( JOBVT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLS .OR. VALS .OR. INDS ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( M.GT.LDA ) THEN + INFO = -7 + ELSE IF( MINMN.GT.0 ) THEN + IF( VALS ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -8 + ELSE IF( VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDS ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, MINMN ) ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( MINMN, IL ) .OR. IU.GT.MINMN ) THEN + INFO = -11 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( WANTU .AND. LDU.LT.M ) THEN + INFO = -15 + ELSE IF( WANTVT ) THEN + IF( INDS ) THEN + IF( LDVT.LT.IU-IL+1 ) THEN + INFO = -17 + END IF + ELSE IF( LDVT.LT.MINMN ) THEN + INFO = -17 + END IF + END IF + END IF + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + IF( M.GE.N ) THEN + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N) +* + MAXWRK = N + + $ N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, N*(N+5) + 2*N* + $ ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,N*(N*3+6)+N* + $ ILAENV( 1, 'DORMQR', ' ', N, N, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,N*(N*3+6)+N* + $ ILAENV( 1, 'DORMLQ', ' ', N, N, -1, -1 ) ) + END IF + MINWRK = N*(N*3+20) + ELSE +* +* Path 2 (M at least N, but not much larger) +* + MAXWRK = 4*N + ( M+N )* + $ ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,N*(N*2+5)+N* + $ ILAENV( 1, 'DORMQR', ' ', N, N, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,N*(N*2+5)+N* + $ ILAENV( 1, 'DORMLQ', ' ', N, N, -1, -1 ) ) + END IF + MINWRK = MAX(N*(N*2+19),4*N+M) + END IF + ELSE + MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M) +* + MAXWRK = M + + $ M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*(M+5) + 2*M* + $ ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,M*(M*3+6)+M* + $ ILAENV( 1, 'DORMQR', ' ', M, M, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,M*(M*3+6)+M* + $ ILAENV( 1, 'DORMLQ', ' ', M, M, -1, -1 ) ) + END IF + MINWRK = M*(M*3+20) + ELSE +* +* Path 2t (N at least M, but not much larger) +* + MAXWRK = 4*M + ( M+N )* + $ ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,M*(M*2+5)+M* + $ ILAENV( 1, 'DORMQR', ' ', M, M, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,M*(M*2+5)+M* + $ ILAENV( 1, 'DORMLQ', ' ', M, M, -1, -1 ) ) + END IF + MINWRK = MAX(M*(M*2+19),4*M+N) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = DBLE( MAXWRK ) +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVDX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Set singular values indices accord to RANGE. +* + IF( ALLS ) THEN + RNGTGK = 'I' + ILTGK = 1 + IUTGK = MIN( M, N ) + ELSE IF( INDS ) THEN + RNGTGK = 'I' + ILTGK = IL + IUTGK = IU + ELSE + RNGTGK = 'V' + ILTGK = 0 + IUTGK = 0 + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce A using the QR +* decomposition. +* + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N): +* A = Q * R = Q * ( QB * B * PB**T ) +* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) +* U = Q * QB * UB; V**T = VB**T * PB**T +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + ITEMP = ITAU + N + CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Copy R into WORK and bidiagonalize it: +* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) +* + IQRF = ITEMP + ID = IQRF + N*N + IE = ID + N + ITAUQ = IE + N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + CALL DLACPY( 'U', N, N, A, LDA, WORK( IQRF ), N ) + CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IQRF+1 ), N ) + CALL DGEBRD( N, N, WORK( IQRF ), N, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 14*N + 2*N*(N+1)) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + N*(N*2+1) + CALL DBDSVDX( 'U', JOBZ, RNGTGK, N, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ N*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL DCOPY( N, WORK( J ), 1, U( 1,I ), 1 ) + J = J + N*2 + END DO + CALL DLASET( 'A', M-N, NS, ZERO, ZERO, U( N+1,1 ), LDU ) +* +* Call DORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', N, NS, N, WORK( IQRF ), N, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call DORMQR to compute Q*(QB*UB). +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL DORMQR( 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAU ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + N + DO I = 1, NS + CALL DCOPY( N, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + N*2 + END DO +* +* Call DORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL DORMBR( 'P', 'R', 'T', NS, N, N, WORK( IQRF ), N, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* Reduce A to bidiagonal form without QR decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) +* + ID = 1 + IE = ID + N + ITAUQ = IE + N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + CALL DGEBRD( M, N, A, LDA, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 14*N + 2*N*(N+1)) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + N*(N*2+1) + CALL DBDSVDX( 'U', JOBZ, RNGTGK, N, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ N*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL DCOPY( N, WORK( J ), 1, U( 1,I ), 1 ) + J = J + N*2 + END DO + CALL DLASET( 'A', M-N, NS, ZERO, ZERO, U( N+1,1 ), LDU ) +* +* Call DORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + N + DO I = 1, NS + CALL DCOPY( N, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + N*2 + END DO +* +* Call DORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL DORMBR( 'P', 'R', 'T', NS, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF + END IF + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce A using the LQ decomposition. +* + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M): +* A = L * Q = ( QB * B * PB**T ) * Q +* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q +* U = QB * UB ; V**T = VB**T * PB**T * Q +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + ITAU = 1 + ITEMP = ITAU + M + CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + +* Copy L into WORK and bidiagonalize it: +* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) +* + ILQF = ITEMP + ID = ILQF + M*M + IE = ID + M + ITAUQ = IE + M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + CALL DLACPY( 'L', M, M, A, LDA, WORK( ILQF ), M ) + CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( ILQF+M ), M ) + CALL DGEBRD( M, M, WORK( ILQF ), M, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + M*(M*2+1) + CALL DBDSVDX( 'U', JOBZ, RNGTGK, M, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ M*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL DCOPY( M, WORK( J ), 1, U( 1,I ), 1 ) + J = J + M*2 + END DO +* +* Call DORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, NS, M, WORK( ILQF ), M, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + M + DO I = 1, NS + CALL DCOPY( M, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + M*2 + END DO + CALL DLASET( 'A', NS, N-M, ZERO, ZERO, VT( 1,M+1 ), LDVT) +* +* Call DORMBR to compute (VB**T)*(PB**T) +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL DORMBR( 'P', 'R', 'T', NS, M, M, WORK( ILQF ), M, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call DORMLQ to compute ((VB**T)*(PB**T))*Q. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL DORMLQ( 'R', 'N', NS, N, M, A, LDA, + $ WORK( ITAU ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) +* + ID = 1 + IE = ID + M + ITAUQ = IE + M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + CALL DGEBRD( M, N, A, LDA, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + M*(M*2+1) + CALL DBDSVDX( 'L', JOBZ, RNGTGK, M, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ M*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL DCOPY( M, WORK( J ), 1, U( 1,I ), 1 ) + J = J + M*2 + END DO +* +* Call DORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL DORMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + M + DO I = 1, NS + CALL DCOPY( M, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + M*2 + END DO + CALL DLASET( 'A', NS, N-M, ZERO, ZERO, VT( 1,M+1 ), LDVT) +* +* Call DORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL DORMBR( 'P', 'R', 'T', NS, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = DBLE( MAXWRK ) +* + RETURN +* +* End of DGESVDX +* + END diff --git a/dspl/liblapack/SRC/dgesvj.f b/dspl/liblapack/SRC/dgesvj.f new file mode 100644 index 0000000..2cbc5ce --- /dev/null +++ b/dspl/liblapack/SRC/dgesvj.f @@ -0,0 +1,1615 @@ +*> \brief \b DGESVJ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, +* LDV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N +* CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), SVA( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVJ computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, where M >= N. The SVD of A is written as +*> [++] [xx] [x0] [xx] +*> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] +*> [++] [xx] +*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal +*> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements +*> of SIGMA are the singular values of A. The columns of U and V are the +*> left and the right singular vectors of A, respectively. +*> DGESVJ can sometimes compute tiny singular values and their singular vectors much +*> more accurately than other SVD routines, see below under Further Details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the structure of A. +*> = 'L': The input matrix A is lower triangular; +*> = 'U': The input matrix A is upper triangular; +*> = 'G': The input matrix A is general M-by-N matrix, M >= N. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the left singular vectors +*> (columns of U): +*> = 'U': The left singular vectors corresponding to the nonzero +*> singular values are computed and returned in the leading +*> columns of A. See more details in the description of A. +*> The default numerical orthogonality threshold is set to +*> approximately TOL=CTOL*EPS, CTOL=DSQRT(M), EPS=DLAMCH('E'). +*> = 'C': Analogous to JOBU='U', except that user can control the +*> level of numerical orthogonality of the computed left +*> singular vectors. TOL can be set to TOL = CTOL*EPS, where +*> CTOL is given on input in the array WORK. +*> No CTOL smaller than ONE is allowed. CTOL greater +*> than 1 / EPS is meaningless. The option 'C' +*> can be used if M*EPS is satisfactory orthogonality +*> of the computed left singular vectors, so CTOL=M could +*> save few sweeps of Jacobi rotations. +*> See the descriptions of A and WORK(1). +*> = 'N': The matrix U is not computed. However, see the +*> description of A. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the right singular vectors, that +*> is, the matrix V: +*> = 'V' : the matrix V is computed and returned in the array V +*> = 'A' : the Jacobi rotations are applied to the MV-by-N +*> array V. In other words, the right singular vector +*> matrix V is not computed explicitly, instead it is +*> applied to an MV-by-N matrix initially stored in the +*> first MV rows of V. +*> = 'N' : the matrix V is not computed and the array V is not +*> referenced +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit : +*> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C' : +*> If INFO .EQ. 0 : +*> RANKA orthonormal columns of U are returned in the +*> leading RANKA columns of the array A. Here RANKA <= N +*> is the number of computed singular values of A that are +*> above the underflow threshold DLAMCH('S'). The singular +*> vectors corresponding to underflowed or zero singular +*> values are not computed. The value of RANKA is returned +*> in the array WORK as RANKA=NINT(WORK(2)). Also see the +*> descriptions of SVA and WORK. The computed columns of U +*> are mutually numerically orthogonal up to approximately +*> TOL=DSQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), +*> see the description of JOBU. +*> If INFO .GT. 0 : +*> the procedure DGESVJ did not converge in the given number +*> of iterations (sweeps). In that case, the computed +*> columns of U may not be orthogonal up to TOL. The output +*> U (stored in A), SIGMA (given by the computed singular +*> values in SVA(1:N)) and V is still a decomposition of the +*> input matrix A in the sense that the residual +*> ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. +*> +*> If JOBU .EQ. 'N' : +*> If INFO .EQ. 0 : +*> Note that the left singular vectors are 'for free' in the +*> one-sided Jacobi SVD algorithm. However, if only the +*> singular values are needed, the level of numerical +*> orthogonality of U is not an issue and iterations are +*> stopped when the columns of the iterated matrix are +*> numerically orthogonal up to approximately M*EPS. Thus, +*> on exit, A contains the columns of U scaled with the +*> corresponding singular values. +*> If INFO .GT. 0 : +*> the procedure DGESVJ did not converge in the given number +*> of iterations (sweeps). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On exit : +*> If INFO .EQ. 0 : +*> depending on the value SCALE = WORK(1), we have: +*> If SCALE .EQ. ONE : +*> SVA(1:N) contains the computed singular values of A. +*> During the computation SVA contains the Euclidean column +*> norms of the iterated matrices in the array A. +*> If SCALE .NE. ONE : +*> The singular values of A are SCALE*SVA(1:N), and this +*> factored representation is due to the fact that some of the +*> singular values of A might underflow or overflow. +*> If INFO .GT. 0 : +*> the procedure DGESVJ did not converge in the given number of +*> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then the product of Jacobi rotations in DGESVJ +*> is applied to the first MV rows of V. See the description of JOBV. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,N) +*> If JOBV = 'V', then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'A', then V contains the product of the computed right +*> singular vector matrix and the initial matrix in +*> the array V. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV .GE. 1. +*> If JOBV .EQ. 'V', then LDV .GE. max(1,N). +*> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . +*> \endverbatim +*> +*> \param[in,out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On entry : +*> If JOBU .EQ. 'C' : +*> WORK(1) = CTOL, where CTOL defines the threshold for convergence. +*> The process stops if all columns of A are mutually +*> orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). +*> It is required that CTOL >= ONE, i.e. it is not +*> allowed to force the routine to obtain orthogonality +*> below EPS. +*> On exit : +*> WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) +*> are the computed singular values of A. +*> (See description of SVA().) +*> WORK(2) = NINT(WORK(2)) is the number of the computed nonzero +*> singular values. +*> WORK(3) = NINT(WORK(3)) is the number of the computed singular +*> values that are larger than the underflow threshold. +*> WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi +*> rotations needed for numerical convergence. +*> WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. +*> This is useful information in cases when DGESVJ did +*> not converge, as it can be used to estimate whether +*> the output is stil useful and for post festum analysis. +*> WORK(6) = the largest absolute value over all sines of the +*> Jacobi rotation angles in the last sweep. It can be +*> useful for a post festum analysis. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> length of WORK, WORK >= MAX(6,M+N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> > 0 : DGESVJ did not converge in the maximal allowed number (30) +*> of sweeps. The output may still be useful. See the +*> description of WORK. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane +*> rotations. The rotations are implemented as fast scaled rotations of +*> Anda and Park [1]. In the case of underflow of the Jacobi angle, a +*> modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses +*> column interchanges of de Rijk [2]. The relative accuracy of the computed +*> singular values and the accuracy of the computed singular vectors (in +*> angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. +*> The condition number that determines the accuracy in the full rank case +*> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the +*> spectral condition number. The best performance of this Jacobi SVD +*> procedure is achieved if used in an accelerated version of Drmac and +*> Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. +*> Some tunning parameters (marked with [TP]) are available for the +*> implementer. +*> The computational range for the nonzero singular values is the machine +*> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even +*> denormalized singular values can be computed with the corresponding +*> gradual loss of accurate digits. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> ============ +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +*> \endverbatim +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. +*> SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. +*> [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the +*> singular value decomposition on a vector computer. +*> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. +*> [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. +*> [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular +*> value computation in floating point arithmetic. +*> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. +*> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> \verbatim +*> =========================== +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, + $ LDV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N + CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), SVA( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0) + INTEGER NSWEEP + PARAMETER ( NSWEEP = 30 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, CTOL, EPSLN, LARGE, MXAAPQ, + $ MXSINJ, ROOTBIG, ROOTEPS, ROOTSFMIN, ROOTTOL, + $ SKL, SFMIN, SMALL, SN, T, TEMP1, THETA, + $ THSIGN, TOL + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, N2, N34, + $ N4, NBL, NOTROT, p, PSKIPPED, q, ROWSKIP, + $ SWBAND + LOGICAL APPLV, GOSCALE, LOWER, LSVEC, NOSCALE, ROTOK, + $ RSVEC, UCTOL, UPPER +* .. +* .. Local Arrays .. + DOUBLE PRECISION FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS, MAX, MIN, DBLE, DSIGN, DSQRT +* .. +* .. External Functions .. +* .. +* from BLAS + DOUBLE PRECISION DDOT, DNRM2 + EXTERNAL DDOT, DNRM2 + INTEGER IDAMAX + EXTERNAL IDAMAX +* from LAPACK + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL DAXPY, DCOPY, DROTM, DSCAL, DSWAP +* from LAPACK + EXTERNAL DLASCL, DLASET, DLASSQ, XERBLA +* + EXTERNAL DGSVJ0, DGSVJ1 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) + UCTOL = LSAME( JOBU, 'C' ) + RSVEC = LSAME( JOBV, 'V' ) + APPLV = LSAME( JOBV, 'A' ) + UPPER = LSAME( JOBA, 'U' ) + LOWER = LSAME( JOBA, 'L' ) +* + IF( .NOT.( UPPER .OR. LOWER .OR. LSAME( JOBA, 'G' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSVEC .OR. UCTOL .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -5 + ELSE IF( LDA.LT.M ) THEN + INFO = -7 + ELSE IF( MV.LT.0 ) THEN + INFO = -9 + ELSE IF( ( RSVEC .AND. ( LDV.LT.N ) ) .OR. + $ ( APPLV .AND. ( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( UCTOL .AND. ( WORK( 1 ).LE.ONE ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.MAX( M+N, 6 ) ) THEN + INFO = -13 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVJ', -INFO ) + RETURN + END IF +* +* #:) Quick return for void matrix +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )RETURN +* +* Set numerical parameters +* The stopping criterion for Jacobi rotations is +* +* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS +* +* where EPS is the round-off and CTOL is defined as follows: +* + IF( UCTOL ) THEN +* ... user controlled + CTOL = WORK( 1 ) + ELSE +* ... default + IF( LSVEC .OR. RSVEC .OR. APPLV ) THEN + CTOL = DSQRT( DBLE( M ) ) + ELSE + CTOL = DBLE( M ) + END IF + END IF +* ... and the machine dependent parameters are +*[!] (Make sure that DLAMCH() works properly on the target machine.) +* + EPSLN = DLAMCH( 'Epsilon' ) + ROOTEPS = DSQRT( EPSLN ) + SFMIN = DLAMCH( 'SafeMinimum' ) + ROOTSFMIN = DSQRT( SFMIN ) + SMALL = SFMIN / EPSLN + BIG = DLAMCH( 'Overflow' ) +* BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + LARGE = BIG / DSQRT( DBLE( M*N ) ) + BIGTHETA = ONE / ROOTEPS +* + TOL = CTOL*EPSLN + ROOTTOL = DSQRT( TOL ) +* + IF( DBLE( M )*EPSLN.GE.ONE ) THEN + INFO = -4 + CALL XERBLA( 'DGESVJ', -INFO ) + RETURN + END IF +* +* Initialize the right singular vector matrix. +* + IF( RSVEC ) THEN + MVL = N + CALL DLASET( 'A', MVL, N, ZERO, ONE, V, LDV ) + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV +* +* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) +*(!) If necessary, scale A to protect the largest singular value +* from overflow. It is possible that saving the largest singular +* value destroys the information about the small ones. +* This initial scaling is almost minimal in the sense that the +* goal is to make sure that no column norm overflows, and that +* DSQRT(N)*max_i SVA(i) does not overflow. If INFinite entries +* in A are detected, the procedure returns with INFO=-6. +* + SKL= ONE / DSQRT( DBLE( M )*DBLE( N ) ) + NOSCALE = .TRUE. + GOSCALE = .TRUE. +* + IF( LOWER ) THEN +* the input matrix is M-by-N lower triangular (trapezoidal) + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL DLASSQ( M-p+1, A( p, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'DGESVJ', -INFO ) + RETURN + END IF + AAQQ = DSQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 1873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 1873 CONTINUE + END IF + END IF + 1874 CONTINUE + ELSE IF( UPPER ) THEN +* the input matrix is M-by-N upper triangular (trapezoidal) + DO 2874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL DLASSQ( p, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'DGESVJ', -INFO ) + RETURN + END IF + AAQQ = DSQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 2873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 2873 CONTINUE + END IF + END IF + 2874 CONTINUE + ELSE +* the input matrix is M-by-N general dense + DO 3874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'DGESVJ', -INFO ) + RETURN + END IF + AAQQ = DSQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 3873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 3873 CONTINUE + END IF + END IF + 3874 CONTINUE + END IF +* + IF( NOSCALE )SKL= ONE +* +* Move the smaller part of the spectrum from the underflow threshold +*(!) Start by determining the position of the nonzero entries of the +* array SVA() relative to ( SFMIN, BIG ). +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + IF( SVA( p ).NE.ZERO )AAQQ = MIN( AAQQ, SVA( p ) ) + AAPP = MAX( AAPP, SVA( p ) ) + 4781 CONTINUE +* +* #:) Quick return for zero matrix +* + IF( AAPP.EQ.ZERO ) THEN + IF( LSVEC )CALL DLASET( 'G', M, N, ZERO, ONE, A, LDA ) + WORK( 1 ) = ONE + WORK( 2 ) = ZERO + WORK( 3 ) = ZERO + WORK( 4 ) = ZERO + WORK( 5 ) = ZERO + WORK( 6 ) = ZERO + RETURN + END IF +* +* #:) Quick return for one-column matrix +* + IF( N.EQ.1 ) THEN + IF( LSVEC )CALL DLASCL( 'G', 0, 0, SVA( 1 ), SKL, M, 1, + $ A( 1, 1 ), LDA, IERR ) + WORK( 1 ) = ONE / SKL + IF( SVA( 1 ).GE.SFMIN ) THEN + WORK( 2 ) = ONE + ELSE + WORK( 2 ) = ZERO + END IF + WORK( 3 ) = ZERO + WORK( 4 ) = ZERO + WORK( 5 ) = ZERO + WORK( 6 ) = ZERO + RETURN + END IF +* +* Protect small singular values from underflow, and try to +* avoid underflows/overflows in computing Jacobi rotations. +* + SN = DSQRT( SFMIN / EPSLN ) + TEMP1 = DSQRT( BIG / DBLE( N ) ) + IF( ( AAPP.LE.SN ) .OR. ( AAQQ.GE.TEMP1 ) .OR. + $ ( ( SN.LE.AAQQ ) .AND. ( AAPP.LE.TEMP1 ) ) ) THEN + TEMP1 = MIN( BIG, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.LE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( AAPP*DSQRT( DBLE( N ) ) ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.GE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MAX( SN / AAQQ, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( DSQRT( DBLE( N ) )*AAPP ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE + TEMP1 = ONE + END IF +* +* Scale, if necessary +* + IF( TEMP1.NE.ONE ) THEN + CALL DLASCL( 'G', 0, 0, ONE, TEMP1, N, 1, SVA, N, IERR ) + END IF + SKL= TEMP1*SKL + IF( SKL.NE.ONE ) THEN + CALL DLASCL( JOBA, 0, 0, ONE, SKL, M, N, A, LDA, IERR ) + SKL= ONE / SKL + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* A is represented in factored form A = A * diag(WORK), where diag(WORK) +* is initialized to identity. WORK is updated during fast scaled +* rotations. +* + DO 1868 q = 1, N + WORK( q ) = ONE + 1868 CONTINUE +* +* + SWBAND = 3 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if DGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm DGESVJ. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* + IF( ( LOWER .OR. UPPER ) .AND. ( N.GT.MAX( 64, 4*KBL ) ) ) THEN +*[TP] The number of partition levels and the actual partition are +* tuning parameters. + N4 = N / 4 + N2 = N / 2 + N34 = 3*N4 + IF( APPLV ) THEN + q = 0 + ELSE + q = 1 + END IF +* + IF( LOWER ) THEN +* +* This works very well on lower triangular matrices, in particular +* in the framework of the preconditioned Jacobi SVD (xGEJSV). +* The idea is simple: +* [+ 0 0 0] Note that Jacobi transformations of [0 0] +* [+ + 0 0] [0 0] +* [+ + x 0] actually work on [x 0] [x 0] +* [+ + x x] [x x]. [x x] +* + CALL DGSVJ0( JOBV, M-N34, N-N34, A( N34+1, N34+1 ), LDA, + $ WORK( N34+1 ), SVA( N34+1 ), MVL, + $ V( N34*q+1, N34+1 ), LDV, EPSLN, SFMIN, TOL, + $ 2, WORK( N+1 ), LWORK-N, IERR ) +* + CALL DGSVJ0( JOBV, M-N2, N34-N2, A( N2+1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 2, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL DGSVJ1( JOBV, M-N2, N-N2, N4, A( N2+1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL DGSVJ0( JOBV, M-N4, N2-N4, A( N4+1, N4+1 ), LDA, + $ WORK( N4+1 ), SVA( N4+1 ), MVL, + $ V( N4*q+1, N4+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL DGSVJ0( JOBV, M, N4, A, LDA, WORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 1, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL DGSVJ1( JOBV, M, N2, N4, A, LDA, WORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, WORK( N+1 ), + $ LWORK-N, IERR ) +* +* + ELSE IF( UPPER ) THEN +* +* + CALL DGSVJ0( JOBV, N4, N4, A, LDA, WORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 2, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL DGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, WORK( N4+1 ), + $ SVA( N4+1 ), MVL, V( N4*q+1, N4+1 ), LDV, + $ EPSLN, SFMIN, TOL, 1, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL DGSVJ1( JOBV, N2, N2, N4, A, LDA, WORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, WORK( N+1 ), + $ LWORK-N, IERR ) +* + CALL DGSVJ0( JOBV, N2+N4, N4, A( 1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) + + END IF +* + END IF +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL DSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL DSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = WORK( p ) + WORK( p ) = WORK( q ) + WORK( q ) = TEMP1 + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Unfortunately, some BLAS implementations compute DNRM2(M,A(1,p),1) +* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). +* Hence, DNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented DNRM2 is available, the IF-THEN-ELSE +* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * WORK(p)". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = DNRM2( M, A( 1, p ), 1 )*WORK( p ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*DSQRT( AAPP )*WORK( p ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL DCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, + $ WORK( p ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = DDOT( M, WORK( N+1 ), 1, + $ A( 1, q ), 1 )*WORK( q ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL DCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, + $ WORK( q ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = DDOT( M, WORK( N+1 ), 1, + $ A( 1, p ), 1 )*WORK( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, DABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( DABS( AAPQ ).GT.TOL ) THEN +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*DABS(AQOAP-APOAQ)/AAPQ +* + IF( DABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + FASTR( 3 ) = T*WORK( p ) / WORK( q ) + FASTR( 4 ) = -T*WORK( q ) / + $ WORK( p ) + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, DABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -DSIGN( ONE, AAPQ ) + T = ONE / ( THETA+THSIGN* + $ DSQRT( ONE+THETA*THETA ) ) + CS = DSQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, DABS( SN ) ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = WORK( p ) / WORK( q ) + AQOAP = WORK( q ) / WORK( p ) + IF( WORK( p ).GE.ONE ) THEN + IF( WORK( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q )*CS + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + END IF + ELSE + IF( WORK( q ).GE.ONE ) THEN + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + ELSE + IF( WORK( p ).GE.WORK( q ) ) + $ THEN + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL DCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK( N+1 ), LDA, + $ IERR ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + TEMP1 = -AAPQ*WORK( p ) / WORK( q ) + CALL DAXPY( M, TEMP1, WORK( N+1 ), 1, + $ A( 1, q ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DNRM2( M, A( 1, q ), 1 )* + $ WORK( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*DSQRT( AAQQ )*WORK( q ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DNRM2( M, A( 1, p ), 1 )* + $ WORK( p ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*DSQRT( AAPP )*WORK( p ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL DCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, + $ WORK( p ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = DDOT( M, WORK( N+1 ), 1, + $ A( 1, q ), 1 )*WORK( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL DCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, + $ WORK( q ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = DDOT( M, WORK( N+1 ), 1, + $ A( 1, p ), 1 )*WORK( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, DABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( DABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*DABS(AQOAP-APOAQ)/AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( DABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*WORK( p ) / WORK( q ) + FASTR( 4 ) = -T*WORK( q ) / + $ WORK( p ) + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, DABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -DSIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ DSQRT( ONE+THETA*THETA ) ) + CS = DSQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, DABS( SN ) ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = WORK( p ) / WORK( q ) + AQOAP = WORK( q ) / WORK( p ) + IF( WORK( p ).GE.ONE ) THEN +* + IF( WORK( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q )*CS + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + END IF + ELSE + IF( WORK( q ).GE.ONE ) THEN + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + ELSE + IF( WORK( p ).GE.WORK( q ) ) + $ THEN + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL DCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK( N+1 ), LDA, + $ IERR ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*WORK( p ) / WORK( q ) + CALL DAXPY( M, TEMP1, WORK( N+1 ), + $ 1, A( 1, q ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL DCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK( N+1 ), LDA, + $ IERR ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*WORK( q ) / WORK( p ) + CALL DAXPY( M, TEMP1, WORK( N+1 ), + $ 1, A( 1, p ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DNRM2( M, A( 1, q ), 1 )* + $ WORK( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*DSQRT( AAQQ )*WORK( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DNRM2( M, A( 1, p ), 1 )* + $ WORK( p ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*DSQRT( AAPP )*WORK( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = DABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DNRM2( M, A( 1, N ), 1 )*WORK( N ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*DSQRT( AAPP )*WORK( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.DSQRT( DBLE( N ) )* + $ TOL ) .AND. ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the singular values and find how many are above +* the underflow threshold. +* + N2 = 0 + N4 = 0 + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = WORK( p ) + WORK( p ) = WORK( q ) + WORK( q ) = TEMP1 + CALL DSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL DSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + IF( SVA( p ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( p )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF + 5991 CONTINUE + IF( SVA( N ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( N )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF +* +* Normalize the left singular vectors. +* + IF( LSVEC .OR. UCTOL ) THEN + DO 1998 p = 1, N2 + CALL DSCAL( M, WORK( p ) / SVA( p ), A( 1, p ), 1 ) + 1998 CONTINUE + END IF +* +* Scale the product of Jacobi rotations (assemble the fast rotations). +* + IF( RSVEC ) THEN + IF( APPLV ) THEN + DO 2398 p = 1, N + CALL DSCAL( MVL, WORK( p ), V( 1, p ), 1 ) + 2398 CONTINUE + ELSE + DO 2399 p = 1, N + TEMP1 = ONE / DNRM2( MVL, V( 1, p ), 1 ) + CALL DSCAL( MVL, TEMP1, V( 1, p ), 1 ) + 2399 CONTINUE + END IF + END IF +* +* Undo scaling, if necessary (and possible). + IF( ( ( SKL.GT.ONE ) .AND. ( SVA( 1 ).LT.( BIG / SKL) ) ) + $ .OR. ( ( SKL.LT.ONE ) .AND. ( SVA( MAX( N2, 1 ) ) .GT. + $ ( SFMIN / SKL) ) ) ) THEN + DO 2400 p = 1, N + SVA( P ) = SKL*SVA( P ) + 2400 CONTINUE + SKL= ONE + END IF +* + WORK( 1 ) = SKL +* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE +* then some of the singular values may overflow or underflow and +* the spectrum is given in this factored representation. +* + WORK( 2 ) = DBLE( N4 ) +* N4 is the number of computed nonzero singular values of A. +* + WORK( 3 ) = DBLE( N2 ) +* N2 is the number of singular values of A greater than SFMIN. +* If N2 \brief DGESVX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), C( * ), FERR( * ), R( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVX uses the LU factorization to compute the solution to a real +*> system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = P * L * U, +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> On exit, WORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If WORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization has +*> been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE, DLANTR + EXTERNAL LSAME, DLAMCH, DLANGE, DLANTR +* .. +* .. External Subroutines .. + EXTERNAL DGECON, DGEEQU, DGERFS, DGETRF, DGETRS, DLACPY, + $ DLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL DLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL DGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = DLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGE( NORM, N, N, A, LDA, WORK ) + RPVGRW = DLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = DLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL DGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* + WORK( 1 ) = RPVGRW +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 + RETURN +* +* End of DGESVX +* + END diff --git a/dspl/liblapack/SRC/dgesvxx.f b/dspl/liblapack/SRC/dgesvxx.f new file mode 100644 index 0000000..afcd05d --- /dev/null +++ b/dspl/liblapack/SRC/dgesvxx.f @@ -0,0 +1,769 @@ +*> \brief DGESVXX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESVXX uses the LU factorization to compute the solution to a +*> double precision system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. DGESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> DGESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> DGESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what DGESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In DGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, COLCND, RCMAX, RCMIN, ROWCND, + $ SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, DLA_GERPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLA_GERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL DGEEQUB, DGETRF, DGETRS, DLACPY, DLAQGE, + $ XERBLA, DLASCL2, DGERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in DGERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until DGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DGEEQUB( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0D+0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0D+0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL DLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL DLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL DLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL DGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = DLA_GERPVGRW( N, INFO, A, LDA, AF, LDAF ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = DLA_GERPVGRW( N, N, A, LDA, AF, LDAF ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL DLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL DLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of DGESVXX + + END diff --git a/dspl/liblapack/SRC/dgetc2.f b/dspl/liblapack/SRC/dgetc2.f new file mode 100644 index 0000000..0896a70 --- /dev/null +++ b/dspl/liblapack/SRC/dgetc2.f @@ -0,0 +1,234 @@ +*> \brief \b DGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETC2 computes an LU factorization with complete pivoting of the +*> n-by-n matrix A. The factorization has the form A = P * L * U * Q, +*> where P and Q are permutation matrices, L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> +*> This is the Level 2 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the n-by-n matrix A to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U*Q; the unit diagonal elements of L are not stored. +*> If U(k, k) appears to be less than SMIN, U(k, k) is given the +*> value of SMIN, i.e., giving a nonsingular perturbed system. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension(N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension(N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, U(k, k) is likely to produce owerflow if +*> we try to solve for x in Ax = b. So U is perturbed to +*> avoid the overflow. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSWAP, DLABAD +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Handle the case N=1 by itself +* + IF( N.EQ.1 ) THEN + IPIV( 1 ) = 1 + JPIV( 1 ) = 1 + IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN + INFO = 1 + A( 1, 1 ) = SMLNUM + END IF + RETURN + END IF +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL DSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL DSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = SMIN + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL DGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA, + $ A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = SMIN + END IF +* +* Set last pivots to N +* + IPIV( N ) = N + JPIV( N ) = N +* + RETURN +* +* End of DGETC2 +* + END diff --git a/dspl/liblapack/SRC/dgetf2.f b/dspl/liblapack/SRC/dgetf2.f new file mode 100644 index 0000000..5458a5f --- /dev/null +++ b/dspl/liblapack/SRC/dgetf2.f @@ -0,0 +1,213 @@ +*> \brief \b DGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETF2 computes an LU factorization of a general m-by-n matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IDAMAX + EXTERNAL DLAMCH, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of DGETF2 +* + END diff --git a/dspl/liblapack/SRC/dgetrf.f b/dspl/liblapack/SRC/dgetrf.f new file mode 100644 index 0000000..9a340b6 --- /dev/null +++ b/dspl/liblapack/SRC/dgetrf.f @@ -0,0 +1,225 @@ +*> \brief \b DGETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRF computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGETRF2, DLASWP, DTRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL DGETRF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL DGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DGETRF +* + END diff --git a/dspl/liblapack/SRC/dgetrf2.f b/dspl/liblapack/SRC/dgetrf2.f new file mode 100644 index 0000000..77948d2 --- /dev/null +++ b/dspl/liblapack/SRC/dgetrf2.f @@ -0,0 +1,272 @@ +*> \brief \b DGETRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRF2 computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = min(m,n)/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> [ A11 ] +*> The subroutine calls itself to factor [ --- ], +*> [ A12 ] +*> [ A12 ] +*> do the swaps on [ --- ], solve A12, update A22, +*> [ A22 ] +*> +*> then calls itself to factor A22 and do the swaps on A21. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE DGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN, TEMP + INTEGER I, IINFO, N1, N2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IDAMAX + EXTERNAL DLAMCH, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DSCAL, DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN + + IF ( M.EQ.1 ) THEN +* +* Use unblocked code for one row case +* Just need to handle IPIV and INFO +* + IPIV( 1 ) = 1 + IF ( A(1,1).EQ.ZERO ) + $ INFO = 1 +* + ELSE IF( N.EQ.1 ) THEN +* +* Use unblocked code for one column case +* +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* +* Find pivot and test for singularity +* + I = IDAMAX( M, A( 1, 1 ), 1 ) + IPIV( 1 ) = I + IF( A( I, 1 ).NE.ZERO ) THEN +* +* Apply the interchange +* + IF( I.NE.1 ) THEN + TEMP = A( 1, 1 ) + A( 1, 1 ) = A( I, 1 ) + A( I, 1 ) = TEMP + END IF +* +* Compute elements 2:M of the column +* + IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN + CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 ) + ELSE + DO 10 I = 1, M-1 + A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 ) + 10 CONTINUE + END IF +* + ELSE + INFO = 1 + END IF +* + ELSE +* +* Use recursive code +* + N1 = MIN( M, N ) / 2 + N2 = N-N1 +* +* [ A11 ] +* Factor [ --- ] +* [ A21 ] +* + CALL DGETRF2( M, N1, A, LDA, IPIV, IINFO ) + + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* [ A12 ] +* Apply interchanges to [ --- ] +* [ A22 ] +* + CALL DLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 ) +* +* Solve A12 +* + CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA, + $ A( 1, N1+1 ), LDA ) +* +* Update A22 +* + CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA ) +* +* Factor A22 +* + CALL DGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ), + $ IINFO ) +* +* Adjust INFO and the pivot indices +* + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + N1 + DO 20 I = N1+1, MIN( M, N ) + IPIV( I ) = IPIV( I ) + N1 + 20 CONTINUE +* +* Apply interchanges to A21 +* + CALL DLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 ) +* + END IF + RETURN +* +* End of DGETRF2 +* + END diff --git a/dspl/liblapack/SRC/dgetri.f b/dspl/liblapack/SRC/dgetri.f new file mode 100644 index 0000000..9d8cf2a --- /dev/null +++ b/dspl/liblapack/SRC/dgetri.f @@ -0,0 +1,261 @@ +*> \brief \b DGETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRI computes the inverse of a matrix using the LU factorization +*> computed by DGETRF. +*> +*> This method inverts U and then computes inv(A) by solving the system +*> inv(A)*L = inv(U) for inv(A). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. +*> On exit, if INFO = 0, the inverse of the original matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimal performance LWORK >= N*NB, where NB is +*> the optimal blocksize returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +*> singular and its inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of DGETRI +* + END diff --git a/dspl/liblapack/SRC/dgetrs.f b/dspl/liblapack/SRC/dgetrs.f new file mode 100644 index 0000000..7ac7277 --- /dev/null +++ b/dspl/liblapack/SRC/dgetrs.f @@ -0,0 +1,225 @@ +*> \brief \b DGETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRS solves a system of linear equations +*> A * X = B or A**T * X = B +*> with a general N-by-N matrix A using the LU factorization computed +*> by DGETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B. +* +* Solve U**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of DGETRS +* + END diff --git a/dspl/liblapack/SRC/dgetsls.f b/dspl/liblapack/SRC/dgetsls.f new file mode 100644 index 0000000..3b44a40 --- /dev/null +++ b/dspl/liblapack/SRC/dgetsls.f @@ -0,0 +1,494 @@ +* Definition: +* =========== +* +* SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETSLS solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an undetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> A is overwritten by details of its QR or LQ +*> factorization as returned by DGEQR or DGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors. +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TRAN + INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, + $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, + $ WSIZEO, WSIZEM, INFO2 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLABAD, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DGEQR, DGEMQR, DLASCL, DLASET, + $ DTRTRS, XERBLA, DGELQ, DGEMLQ +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, INT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNK = MAX( MINMN, NRHS ) + TRAN = LSAME( TRANS, 'T' ) +* + LQUERY = ( LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. + $ LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size and minimum LWORK +* + IF( M.GE.N ) THEN + CALL DGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL DGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL DGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL DGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZM, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + ELSE + CALL DGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL DGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + END IF +* + IF( ( LWORK.LT.WSIZEM ).AND.( .NOT.LQUERY ) ) THEN + INFO = -10 + END IF +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETSLS', -INFO ) + WORK( 1 ) = DBLE( WSIZEO ) + RETURN + END IF + IF( LQUERY ) THEN + IF( LWORK.EQ.-1 ) WORK( 1 ) = REAL( WSIZEO ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + RETURN + END IF + IF( LWORK.LT.WSIZEO ) THEN + LW1 = TSZM + LW2 = LWM + ELSE + LW1 = TSZO + LW2 = LWO + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL DLASET( 'FULL', MAX( M, N ), NRHS, ZERO, ZERO, + $ B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL DLASET( 'F', MAXMN, NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF ( TRAN ) THEN + BROW = N + END IF + BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF ( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL DGEQR( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) + IF ( .NOT.TRAN ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL DGEMQR( 'L' , 'T', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'U', 'N', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) + IF( INFO.GT.0 ) THEN + RETURN + END IF + SCLLEN = N + ELSE +* +* Overdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL DTRTRS( 'U', 'T', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL DGEMQR( 'L', 'N', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL DGELQ( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TRAN ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'L', 'N', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL DGEMLQ( 'L', 'T', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL DGEMLQ( 'L', 'N', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL DTRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( TSZO + LWO ) + RETURN +* +* End of DGETSLS +* + END diff --git a/dspl/liblapack/SRC/dggbak.f b/dspl/liblapack/SRC/dggbak.f new file mode 100644 index 0000000..cd5c260 --- /dev/null +++ b/dspl/liblapack/SRC/dggbak.f @@ -0,0 +1,306 @@ +*> \brief \b DGGBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, +* LDV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGBAK forms the right or left eigenvectors of a real generalized +*> eigenvalue problem A*x = lambda*B*x, by backward transformation on +*> the computed eigenvectors of the balanced pair of matrices output by +*> DGGBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N': do nothing, return immediately; +*> = 'P': do backward transformation for permutation only; +*> = 'S': do backward transformation for scaling only; +*> = 'B': do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to DGGBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by DGGBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the left side of A and B, as returned by DGGBAL. +*> \endverbatim +*> +*> \param[in] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the right side of A and B, as returned by DGGBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by DTGEVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the matrix V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. Ward, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, INT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( N.EQ.0 .AND. IHI.EQ.0 .AND. ILO.NE.1 ) THEN + INFO = -4 + ELSE IF( N.GT.0 .AND. ( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -5 + ELSE IF( N.EQ.0 .AND. ILO.EQ.1 .AND. IHI.NE.0 ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -8 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL DSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL DSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 +* + DO 40 I = ILO - 1, 1, -1 + K = INT(RSCALE( I )) + IF( K.EQ.I ) + $ GO TO 40 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = INT(RSCALE( I )) + IF( K.EQ.I ) + $ GO TO 60 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = INT(LSCALE( I )) + IF( K.EQ.I ) + $ GO TO 80 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = INT(LSCALE( I )) + IF( K.EQ.I ) + $ GO TO 100 + CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of DGGBAK +* + END diff --git a/dspl/liblapack/SRC/dggbal.f b/dspl/liblapack/SRC/dggbal.f new file mode 100644 index 0000000..5f36aa0 --- /dev/null +++ b/dspl/liblapack/SRC/dggbal.f @@ -0,0 +1,559 @@ +*> \brief \b DGGBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, +* RSCALE, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), LSCALE( * ), +* $ RSCALE( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGBAL balances a pair of general real matrices (A,B). This +*> involves, first, permuting A and B by similarity transformations to +*> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +*> elements on the diagonal; and second, applying a diagonal similarity +*> transformation to rows and columns ILO to IHI to make the rows +*> and columns as close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrices, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors in the +*> generalized eigenvalue problem A*x = lambda*B*x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A and B: +*> = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +*> and RSCALE(I) = 1.0 for i = 1,...,N. +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the input matrix B. +*> On exit, B is overwritten by the balanced matrix. +*> If JOB = 'N', B is not referenced. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If P(j) is the index of the +*> row interchanged with row j, and D(j) +*> is the scaling factor applied to row j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If P(j) is the index of the +*> column interchanged with column j, and D(j) +*> is the scaling factor applied to column j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (lwork) +*> lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +*> at least 1 when JOB = 'N' or 'P'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. WARD, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), LSCALE( * ), + $ RSCALE( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION THREE, SCLFAC + PARAMETER ( THREE = 3.0D+0, SCLFAC = 1.0D+1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + DOUBLE PRECISION ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG10, MAX, MIN, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = ONE + LSCALE( 1 ) = ONE + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL DSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL DSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL DSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + TB = B( I, J ) + TA = A( I, J ) + IF( TA.EQ.ZERO ) + $ GO TO 210 + TA = LOG10( ABS( TA ) ) / BASL + 210 CONTINUE + IF( TB.EQ.ZERO ) + $ GO TO 220 + TB = LOG10( ABS( TB ) ) / BASL + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / DBLE( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL DSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = DLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = IDAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = IDAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = INT(LSCALE( I ) + SIGN( HALF, LSCALE( I ) )) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = IDAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = IDAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = INT(RSCALE( I ) + SIGN( HALF, RSCALE( I ) )) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL DSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL DSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL DSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL DSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of DGGBAL +* + END diff --git a/dspl/liblapack/SRC/dgges.f b/dspl/liblapack/SRC/dgges.f new file mode 100644 index 0000000..097ea77 --- /dev/null +++ b/dspl/liblapack/SRC/dgges.f @@ -0,0 +1,682 @@ +*> \brief DGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, +* SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, +* LDVSR, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), +*> the generalized eigenvalues, the generalized real Schur form (S,T), +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T.The +*> leading columns of VSL and VSR then form an orthonormal basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> DGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG); +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three DOUBLE PRECISION arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> +*> Note that in the ill-conditioned case, a selected complex +*> eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), +*> BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +*> in this case. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is DOUBLE PRECISION array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is DOUBLE PRECISION array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else LWORK >= 8*N+16. +*> For good performance , LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ. +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in DTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, + $ LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK, + $ MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0 )THEN + MINWRK = MAX( 8*N, 6*N + 16 ) + MAXWRK = MINWRK - N + + $ N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'DORMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 ) ) + END IF + ELSE + MINWRK = 1 + MAXWRK = 1 + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -19 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N space for storing balancing factors) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 50 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: need 4*N+16 ) +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL DTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT. + $ ( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 30 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 30 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 40 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 40 CONTINUE +* + END IF +* + 50 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of DGGES +* + END diff --git a/dspl/liblapack/SRC/dgges3.f b/dspl/liblapack/SRC/dgges3.f new file mode 100644 index 0000000..41d2ea0 --- /dev/null +++ b/dspl/liblapack/SRC/dgges3.f @@ -0,0 +1,674 @@ +*> \brief DGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGES3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, +* SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, +* LDVSR, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGES3 computes for a pair of N-by-N real nonsymmetric matrices (A,B), +*> the generalized eigenvalues, the generalized real Schur form (S,T), +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T.The +*> leading columns of VSL and VSR then form an orthonormal basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> DGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG); +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three DOUBLE PRECISION arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> +*> Note that in the ill-conditioned case, a selected complex +*> eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), +*> BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +*> in this case. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is DOUBLE PRECISION array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is DOUBLE PRECISION array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ. +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in DTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, + $ LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHD3, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.6*N+16 .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL DGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 6*N+16, 3*N+INT( WORK ( 1 ) ) ) + CALL DORMQR( 'L', 'T', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + IF( ILVSL ) THEN + CALL DORGQR( N, N, N, VSL, LDVSL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + END IF + CALL DGGHD3( JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK ( 1 ) ) ) + IF( WANTST ) THEN + CALL DTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PVSL, PVSR, DIF, WORK, -1, IDUM, 1, + $ IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK ( 1 ) ) ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGES3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL DGGHD3( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK( IWRK ), LWORK+1-IWRK, + $ IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWRK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 50 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL DTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* + IF( ILVSL ) + $ CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT. + $ ( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 30 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 30 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 40 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 40 CONTINUE +* + END IF +* + 50 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DGGES3 +* + END diff --git a/dspl/liblapack/SRC/dggesx.f b/dspl/liblapack/SRC/dggesx.f new file mode 100644 index 0000000..47022fb --- /dev/null +++ b/dspl/liblapack/SRC/dggesx.f @@ -0,0 +1,820 @@ +*> \brief DGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, +* B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, +* VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, +* LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SENSE, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, +* $ SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), RCONDE( 2 ), +* $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGESX computes for a pair of N-by-N real nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the real Schur form (S,T), and, +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T; computes +*> a reciprocal condition number for the average of the selected +*> eigenvalues (RCONDE); and computes a reciprocal condition number for +*> the right and left deflating subspaces corresponding to the selected +*> eigenvalues (RCONDV). The leading columns of VSL and VSR then form +*> an orthonormal basis for the corresponding left and right eigenspaces +*> (deflating subspaces). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or for both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three DOUBLE PRECISION arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, +*> since ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+3. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N' : None are computed; +*> = 'E' : Computed for average of selected eigenvalues only; +*> = 'V' : Computed for selected deflating subspaces only; +*> = 'B' : Computed for both. +*> If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is DOUBLE PRECISION array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is DOUBLE PRECISION array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension ( 2 ) +*> If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +*> reciprocal condition numbers for the average of the selected +*> eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension ( 2 ) +*> If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +*> reciprocal condition numbers for the selected deflating +*> subspaces. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', +*> LWORK >= max( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else +*> LWORK >= max( 8*N, 6*N+16 ). +*> Note that 2*SDIM*(N-SDIM) <= N*N/2. +*> Note also that an error is only returned if +*> LWORK < max( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' +*> this may not be large enough. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the bound on the optimal size of the WORK +*> array and the minimum size of the IWORK array, returns these +*> values as the first entries of the WORK and IWORK arrays, and +*> no error message related to LWORK or LIWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise +*> LIWORK >= N+6. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the bound on the optimal size of the +*> WORK array and the minimum size of the IWORK array, returns +*> these values as the first entries of the WORK and IWORK +*> arrays, and no error message related to LWORK or LIWORK is +*> issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in DTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / RCONDE( 1 ). +*> +*> An approximate (asymptotic) bound on the maximum angular error in +*> the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / RCONDV( 2 ). +*> +*> See LAPACK User's Guide, section 4.11 for more information. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, + $ LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), RCONDE( 2 ), + $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK, + $ LIWMIN, LWRK, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) + IF( WANTSN ) THEN + IJOB = 0 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -16 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -18 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0) THEN + MINWRK = MAX( 8*N, 6*N + 16 ) + MAXWRK = MINWRK - N + + $ N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'DORMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 ) ) + END IF + LWRK = MAXWRK + IF( IJOB.GE.1 ) + $ LWRK = MAX( LWRK, N*N/2 ) + ELSE + MINWRK = 1 + MAXWRK = 1 + LWRK = 1 + END IF + WORK( 1 ) = LWRK + IF( WANTSN .OR. N.EQ.0 ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 6 + END IF + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGESX', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N for permutation parameters) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 60 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) +* otherwise, need 8*(N+1) ) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* + CALL DTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-22 ) THEN +* +* not enough real workspace +* + INFO = -22 + ELSE + IF( IJOB.EQ.1 .OR. IJOB.EQ.4 ) THEN + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + END IF + IF( IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + END IF + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT. + $ ( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 30 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 30 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 50 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 50 CONTINUE +* + END IF +* + 60 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DGGESX +* + END diff --git a/dspl/liblapack/SRC/dggev.f b/dspl/liblapack/SRC/dggev.f new file mode 100644 index 0000000..fa86828 --- /dev/null +++ b/dspl/liblapack/SRC/dggev.f @@ -0,0 +1,592 @@ +*> \brief DGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, +* BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B . +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> alpha/beta. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,8*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ. +*> =N+2: error return from DTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK, + $ MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY,DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = MAX( 1, 8*N ) + MAXWRK = MAX( 1, N*( 7 + + $ ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) ) ) + MAXWRK = MAX( MAXWRK, N*( 7 + + $ ILAENV( 1, 'DORMQR', ' ', N, 1, N, 0 ) ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N*( 7 + + $ ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 ) ) ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* (Workspace: need 6*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + 110 CONTINUE +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGGEV +* + END diff --git a/dspl/liblapack/SRC/dggev3.f b/dspl/liblapack/SRC/dggev3.f new file mode 100644 index 0000000..43a853d --- /dev/null +++ b/dspl/liblapack/SRC/dggev3.f @@ -0,0 +1,594 @@ +*> \brief DGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGEV3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, +* $ ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, +* $ INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGEV3 computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B . +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> alpha/beta. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ. +*> =N+2: error return from DTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup doubleGEeigen +* +* ===================================================================== + SUBROUTINE DGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHD3, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL DGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX(1, 8*N, 3*N+INT( WORK( 1 ) ) ) + CALL DORMQR( 'L', 'T', N, N, N, B, LDB, WORK, A, LDA, WORK, -1, + $ IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + IF( ILVL ) THEN + CALL DORGQR( N, N, N, VL, LDVL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + END IF + IF( ILV ) THEN + CALL DGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + CALL DHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK ( 1 ) ) ) + ELSE + CALL DGGHD3( 'N', 'N', N, 1, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK ( 1 ) ) ) + CALL DHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK ( 1 ) ) ) + END IF + + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGEV3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHD3( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK( IWRK ), LWORK+1-IWRK, IERR ) + ELSE + CALL DGGHD3( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* + IF( ILVL ) THEN + CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + 110 CONTINUE +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DGGEV3 +* + END diff --git a/dspl/liblapack/SRC/dggevx.f b/dspl/liblapack/SRC/dggevx.f new file mode 100644 index 0000000..1f6962d --- /dev/null +++ b/dspl/liblapack/SRC/dggevx.f @@ -0,0 +1,868 @@ +*> \brief DGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, +* ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, +* IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, +* RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), LSCALE( * ), +* $ RCONDE( * ), RCONDV( * ), RSCALE( * ), +* $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +*> the eigenvalues (RCONDE), and reciprocal condition numbers for the +*> right eigenvectors (RCONDV). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j) . +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B. +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Specifies the balance option to be performed. +*> = 'N': do not diagonally scale or permute; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> Computed reciprocal condition numbers will be for the +*> matrices after permuting and/or balancing. Permuting does +*> not change condition numbers (in exact arithmetic), but +*> balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': none are computed; +*> = 'E': computed for eigenvalues only; +*> = 'V': computed for eigenvectors only; +*> = 'B': computed for eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then A contains the first part of the real Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then B contains the second part of the real Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> ALPHA/BETA. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector will be scaled so the largest component have +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector will be scaled so the largest component have +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If PL(j) is the index of the +*> row interchanged with row j, and DL(j) is the scaling +*> factor applied to row j, then +*> LSCALE(j) = PL(j) for j = 1,...,ILO-1 +*> = DL(j) for j = ILO,...,IHI +*> = PL(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If PR(j) is the index of the +*> column interchanged with column j, and DR(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = PR(j) for j = 1,...,ILO-1 +*> = DR(j) for j = ILO,...,IHI +*> = PR(j) for j = IHI+1,...,N +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix A. +*> \endverbatim +*> +*> \param[out] BBNRM +*> \verbatim +*> BBNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix B. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension (N) +*> If SENSE = 'E' or 'B', the reciprocal condition numbers of +*> the eigenvalues, stored in consecutive elements of the array. +*> For a complex conjugate pair of eigenvalues two consecutive +*> elements of RCONDE are set to the same value. Thus RCONDE(j), +*> RCONDV(j), and the j-th columns of VL and VR all correspond +*> to the j-th eigenpair. +*> If SENSE = 'N or 'V', RCONDE is not referenced. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension (N) +*> If SENSE = 'V' or 'B', the estimated reciprocal condition +*> numbers of the eigenvectors, stored in consecutive elements +*> of the array. For a complex eigenvector two consecutive +*> elements of RCONDV are set to the same value. If the +*> eigenvalues cannot be reordered to compute RCONDV(j), +*> RCONDV(j) is set to 0; this can only occur when the true +*> value would be very small anyway. +*> If SENSE = 'N' or 'E', RCONDV is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', +*> LWORK >= max(1,6*N). +*> If SENSE = 'E' or 'B', LWORK >= max(1,10*N). +*> If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+6) +*> If SENSE = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> If SENSE = 'N', BWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in DHGEQZ. +*> =N+2: error return from DTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing a matrix pair (A,B) includes, first, permuting rows and +*> columns to isolate eigenvalues, second, applying diagonal similarity +*> transformation to the rows and columns to make the rows and columns +*> as close in norm as possible. The computed reciprocal condition +*> numbers correspond to the balanced matrix. Permuting rows and columns +*> will not change the condition numbers (in exact arithmetic) but +*> diagonal scaling will. For further explanation of balancing, see +*> section 4.11.1.2 of LAPACK Users' Guide. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +*> +*> An approximate error bound for the angle between the i-th computed +*> eigenvector VL(i) or VR(i) is given by +*> +*> EPS * norm(ABNRM, BBNRM) / DIF(i). +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see section 4.11 of LAPACK User's Guide. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, + $ IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, + $ RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), LSCALE( * ), + $ RCONDE( * ), RCONDV( * ), RSCALE( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, NOSCL, + $ PAIR, WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, + $ MINWRK, MM + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD, + $ DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, + $ DTGSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + NOSCL = LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'P' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, + $ 'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + IF( NOSCL .AND. .NOT.ILV ) THEN + MINWRK = 2*N + ELSE + MINWRK = 6*N + END IF + IF( WANTSE .OR. WANTSB ) THEN + MINWRK = 10*N + END IF + IF( WANTSV .OR. WANTSB ) THEN + MINWRK = MAX( MINWRK, 2*N*( N + 4 ) + 16 ) + END IF + MAXWRK = MINWRK + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'DORMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N + + $ N*ILAENV( 1, 'DORGQR', ' ', N, 1, N, 0 ) ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -26 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = DLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = DLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) +* + CALL DGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ WORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = DLANGE( '1', N, N, A, LDA, WORK( 1 ) ) + IF( ILASCL ) THEN + WORK( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + ABNRM = WORK( 1 ) + END IF +* + BBNRM = DLANGE( '1', N, N, B, LDB, WORK( 1 ) ) + IF( ILBSCL ) THEN + WORK( 1 ) = BBNRM + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + BBNRM = WORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to A +* (Workspace: need N, prefer N*NB) +* + CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, + $ LWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 130 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* (Workspace: DTGEVC: need 6*N +* DTGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', +* need N otherwise ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (DTGEVC) and estimate condition +* numbers (DTGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to recalculate +* eigenvectors and estimate one condition numbers at a time. +* + PAIR = .FALSE. + DO 20 I = 1, N +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + END IF + MM = 1 + IF( I.LT.N ) THEN + IF( A( I+1, I ).NE.ZERO ) THEN + PAIR = .TRUE. + MM = 2 + END IF + END IF +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + IF( MM.EQ.1 ) THEN + BWORK( I ) = .TRUE. + ELSE IF( MM.EQ.2 ) THEN + BWORK( I ) = .TRUE. + BWORK( I+1 ) = .TRUE. + END IF +* + IWRK = MM*N + 1 + IWRK1 = IWRK + MM*N +* +* Compute a pair of left and right eigenvectors. +* (compute workspace: need up to 4*N + 6*N) +* + IF( WANTSE .OR. WANTSB ) THEN + CALL DTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, MM, M, + $ WORK( IWRK1 ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + CALL DTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), MM, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL DGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 70 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 70 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 30 CONTINUE + ELSE + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 40 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 70 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 50 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 50 CONTINUE + ELSE + DO 60 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 60 CONTINUE + END IF + 70 CONTINUE + END IF + IF( ILVR ) THEN + CALL DGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 120 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 120 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 80 CONTINUE + ELSE + DO 90 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 90 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 120 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 100 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 100 CONTINUE + ELSE + DO 110 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF +* +* Undo scaling if necessary +* + 130 CONTINUE +* + IF( ILASCL ) THEN + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of DGGEVX +* + END diff --git a/dspl/liblapack/SRC/dggglm.f b/dspl/liblapack/SRC/dggglm.f new file mode 100644 index 0000000..2e92912 --- /dev/null +++ b/dspl/liblapack/SRC/dggglm.f @@ -0,0 +1,348 @@ +*> \brief \b DGGGLM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGGLM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), +* $ X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGGLM solves a general Gauss-Markov linear model (GLM) problem: +*> +*> minimize || y ||_2 subject to d = A*x + B*y +*> x +*> +*> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +*> given N-vector. It is assumed that M <= N <= M+P, and +*> +*> rank(A) = M and rank( A B ) = N. +*> +*> Under these assumptions, the constrained equation is always +*> consistent, and there is a unique solution x and a minimal 2-norm +*> solution y, which is obtained using a generalized QR factorization +*> of the matrices (A, B) given by +*> +*> A = Q*(R), B = Q*T*Z. +*> (0) +*> +*> In particular, if matrix B is square nonsingular, then the problem +*> GLM is equivalent to the following weighted linear least squares +*> problem +*> +*> minimize || inv(B)*(d-A*x) ||_2 +*> x +*> +*> where inv(B) denotes the inverse of B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= N-M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the upper triangular part of the array A contains +*> the M-by-M upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D is the left hand side of the GLM equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (P) +*> +*> On exit, X and Y are the solutions of the GLM problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N+M+P). +*> For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> DGEQRF, SGERQF, DORMQR and SORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with A in the +*> generalized QR factorization of the pair (A, B) is +*> singular, so that rank(A) < M; the least squares +*> solution could not be computed. +*> = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal +*> factor T associated with B in the generalized QR +*> factorization of the pair (A, B) is singular, so that +*> rank( A B ) < N; the least squares solution could not +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3, + $ NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DGGQRF, DORMQR, DORMRQ, DTRTRS, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = M + NP + MAX( N, P )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q**T*A = ( R11 ) M, Q**T*B*Z**T = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* orthogonal. +* + CALL DGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q**T*d = ( d1 ) M +* ( d2 ) N-M +* + CALL DORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D, + $ MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + IF( N.GT.M ) THEN + CALL DTRTRS( 'Upper', 'No transpose', 'Non unit', N-M, 1, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), N-M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* + CALL DCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) + END IF +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = ZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL DGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, ONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + IF( M.GT.0 ) THEN + CALL DTRTRS( 'Upper', 'No Transpose', 'Non unit', M, 1, A, LDA, + $ D, M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Copy D to X +* + CALL DCOPY( M, D, 1, X, 1 ) + END IF +* +* Backward transformation y = Z**T *y +* + CALL DORMRQ( 'Left', 'Transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of DGGGLM +* + END diff --git a/dspl/liblapack/SRC/dgghd3.f b/dspl/liblapack/SRC/dgghd3.f new file mode 100644 index 0000000..283d914 --- /dev/null +++ b/dspl/liblapack/SRC/dgghd3.f @@ -0,0 +1,897 @@ +*> \brief \b DGGHD3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGHD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGHD3 reduces a pair of real matrices (A,B) to generalized upper +*> Hessenberg form using orthogonal transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the orthogonal matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**T*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**T*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**T*x. +*> +*> The orthogonal matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> +*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T +*> +*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T +*> +*> If Q1 is the orthogonal matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then DGGHD3 reduces the original +*> problem to generalized Hessenberg form. +*> +*> This is a blocked variant of DGGHRD, using matrix-matrix +*> multiplications for parts of the computation to enhance performance. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> orthogonal matrix Z is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to DGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**T B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1, +*> typically from the QR factorization of B. +*> On exit, if COMPQ='I', the orthogonal matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1. +*> On exit, if COMPZ='I', the orthogonal matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= 6*N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg form and maintains B in +*> using a blocked variant of Moler and Stewart's original algorithm, +*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti +*> (BIT 2008). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ + CHARACTER*1 COMPQ2, COMPZ2 + INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K, + $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN, + $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ + DOUBLE PRECISION C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGGHRD, DLARTG, DLASET, DORM22, DROT, DGEMM, + $ DGEMV, DTRMV, DLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'DGGHD3', ' ', N, ILO, IHI, -1 ) + LWKOPT = MAX( 6*N*NB, 1 ) + WORK( 1 ) = DBLE( LWKOPT ) + INITQ = LSAME( COMPQ, 'I' ) + WANTQ = INITQ .OR. LSAME( COMPQ, 'V' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( WANTQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( WANTZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGHD3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( INITQ ) + $ CALL DLASET( 'All', N, N, ZERO, ONE, Q, LDQ ) + IF( INITZ ) + $ CALL DLASET( 'All', N, N, ZERO, ONE, Z, LDZ ) +* +* Zero out lower triangle of B. +* + IF( N.GT.1 ) + $ CALL DLASET( 'Lower', N-1, N-1, ZERO, ZERO, B(2, 1), LDB ) +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* Determine the blocksize. +* + NBMIN = ILAENV( 2, 'DGGHD3', ' ', N, ILO, IHI, -1 ) + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to use unblocked instead of blocked code. +* + NX = MAX( NB, ILAENV( 3, 'DGGHD3', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF( LWORK.LT.LWKOPT ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'DGGHD3', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.6*N*NBMIN ) THEN + NB = LWORK / ( 6*N ) + ELSE + NB = 1 + END IF + END IF + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + JCOL = ILO +* + ELSE +* +* Use blocked code +* + KACC22 = ILAENV( 16, 'DGGHD3', ' ', N, ILO, IHI, -1 ) + BLK22 = KACC22.EQ.2 + DO JCOL = ILO, IHI-2, NB + NNB = MIN( NB, IHI-JCOL-1 ) +* +* Initialize small orthogonal factors that will hold the +* accumulated Givens rotations in workspace. +* N2NB denotes the number of 2*NNB-by-2*NNB factors +* NBLST denotes the (possibly smaller) order of the last +* factor. +* + N2NB = ( IHI-JCOL-1 ) / NNB - 1 + NBLST = IHI - JCOL - N2NB*NNB + CALL DLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK, NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL DLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. +* + DO J = JCOL, JCOL+NNB-1 +* +* Reduce Jth column of A. Store cosines and sines in Jth +* column of A and B, respectively. +* + DO I = IHI, J+2, -1 + TEMP = A( I-1, J ) + CALL DLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) ) + A( I, J ) = C + B( I, J ) = S + END DO +* +* Accumulate Givens rotations into workspace array. +* + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + C = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + C = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO +* +* TOP denotes the number of top rows in A and B that will +* not be updated during the next steps. +* + IF( JCOL.LE.2 ) THEN + TOP = 0 + ELSE + TOP = JCOL + END IF +* +* Propagate transformations through B and replace stored +* left sines/cosines by right sines/cosines. +* + DO JJ = N, J+1, -1 +* +* Update JJth column of B. +* + DO I = MIN( JJ+1, IHI ), J+2, -1 + C = A( I, J ) + S = B( I, J ) + TEMP = B( I, JJ ) + B( I, JJ ) = C*TEMP - S*B( I-1, JJ ) + B( I-1, JJ ) = S*TEMP + C*B( I-1, JJ ) + END DO +* +* Annihilate B( JJ+1, JJ ). +* + IF( JJ.LT.IHI ) THEN + TEMP = B( JJ+1, JJ+1 ) + CALL DLARTG( TEMP, B( JJ+1, JJ ), C, S, + $ B( JJ+1, JJ+1 ) ) + B( JJ+1, JJ ) = ZERO + CALL DROT( JJ-TOP, B( TOP+1, JJ+1 ), 1, + $ B( TOP+1, JJ ), 1, C, S ) + A( JJ+1, J ) = C + B( JJ+1, J ) = -S + END IF + END DO +* +* Update A by transformations from right. +* Explicit loop unrolling provides better performance +* compared to DLASR. +* CALL DLASR( 'Right', 'Variable', 'Backward', IHI-TOP, +* $ IHI-J, A( J+2, J ), B( J+2, J ), +* $ A( TOP+1, J+1 ), LDA ) +* + JJ = MOD( IHI-J-1, 3 ) + DO I = IHI-J-3, JJ+1, -3 + C = A( J+1+I, J ) + S = -B( J+1+I, J ) + C1 = A( J+2+I, J ) + S1 = -B( J+2+I, J ) + C2 = A( J+3+I, J ) + S2 = -B( J+3+I, J ) +* + DO K = TOP+1, IHI + TEMP = A( K, J+I ) + TEMP1 = A( K, J+I+1 ) + TEMP2 = A( K, J+I+2 ) + TEMP3 = A( K, J+I+3 ) + A( K, J+I+3 ) = C2*TEMP3 + S2*TEMP2 + TEMP2 = -S2*TEMP3 + C2*TEMP2 + A( K, J+I+2 ) = C1*TEMP2 + S1*TEMP1 + TEMP1 = -S1*TEMP2 + C1*TEMP1 + A( K, J+I+1 ) = C*TEMP1 + S*TEMP + A( K, J+I ) = -S*TEMP1 + C*TEMP + END DO + END DO +* + IF( JJ.GT.0 ) THEN + DO I = JJ, 1, -1 + CALL DROT( IHI-TOP, A( TOP+1, J+I+1 ), 1, + $ A( TOP+1, J+I ), 1, A( J+1+I, J ), + $ -B( J+1+I, J ) ) + END DO + END IF +* +* Update (J+1)th column of A by transformations from left. +* + IF ( J .LT. JCOL + NNB - 1 ) THEN + LEN = 1 + J - JCOL +* +* Multiply with the trailing accumulated orthogonal +* matrix, which takes the form +* +* [ U11 U12 ] +* U = [ ], +* [ U21 U22 ] +* +* where U21 is a LEN-by-LEN matrix and U12 is lower +* triangular. +* + JROW = IHI - NBLST + 1 + CALL DGEMV( 'Transpose', NBLST, LEN, ONE, WORK, + $ NBLST, A( JROW, J+1 ), 1, ZERO, + $ WORK( PW ), 1 ) + PPW = PW + LEN + DO I = JROW, JROW+NBLST-LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL DTRMV( 'Lower', 'Transpose', 'Non-unit', + $ NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST, + $ WORK( PW+LEN ), 1 ) + CALL DGEMV( 'Transpose', LEN, NBLST-LEN, ONE, + $ WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST, + $ A( JROW+NBLST-LEN, J+1 ), 1, ONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+NBLST-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO +* +* Multiply with the other accumulated orthogonal +* matrices, which take the form +* +* [ U11 U12 0 ] +* [ ] +* U = [ U21 U22 0 ], +* [ ] +* [ 0 0 I ] +* +* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity +* matrix, U21 is a LEN-by-LEN upper triangular matrix +* and U12 is an NNB-by-NNB lower triangular matrix. +* + PPWO = 1 + NBLST*NBLST + J0 = JROW - NNB + DO JROW = J0, JCOL+1, -NNB + PPW = PW + LEN + DO I = JROW, JROW+NNB-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + PPW = PW + DO I = JROW+NNB, JROW+NNB+LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL DTRMV( 'Upper', 'Transpose', 'Non-unit', LEN, + $ WORK( PPWO + NNB ), 2*NNB, WORK( PW ), + $ 1 ) + CALL DTRMV( 'Lower', 'Transpose', 'Non-unit', NNB, + $ WORK( PPWO + 2*LEN*NNB ), + $ 2*NNB, WORK( PW + LEN ), 1 ) + CALL DGEMV( 'Transpose', NNB, LEN, ONE, + $ WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1, + $ ONE, WORK( PW ), 1 ) + CALL DGEMV( 'Transpose', LEN, NNB, ONE, + $ WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB, + $ A( JROW+NNB, J+1 ), 1, ONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+LEN+NNB-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO +* +* Apply accumulated orthogonal matrices to A. +* + COLA = N - JCOL - NNB + 1 + J = IHI - NBLST + 1 + CALL DGEMM( 'Transpose', 'No Transpose', NBLST, + $ COLA, NBLST, ONE, WORK, NBLST, + $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ), + $ NBLST ) + CALL DLACPY( 'All', NBLST, COLA, WORK( PW ), NBLST, + $ A( J, JCOL+NNB ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of +* +* [ U11 U12 ] +* U = [ ] +* [ U21 U22 ], +* +* where all blocks are NNB-by-NNB, U21 is upper +* triangular and U12 is lower triangular. +* + CALL DORM22( 'Left', 'Transpose', 2*NNB, COLA, NNB, + $ NNB, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL DGEMM( 'Transpose', 'No Transpose', 2*NNB, + $ COLA, 2*NNB, ONE, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ), + $ 2*NNB ) + CALL DLACPY( 'All', 2*NNB, COLA, WORK( PW ), 2*NNB, + $ A( J, JCOL+NNB ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* +* Apply accumulated orthogonal matrices to Q. +* + IF( WANTQ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL DGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, ONE, Q( TOPQ, J ), LDQ, + $ WORK, NBLST, ZERO, WORK( PW ), NH ) + CALL DLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL DORM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Q( TOPQ, J ), LDQ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL DGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, ONE, Q( TOPQ, J ), LDQ, + $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ), + $ NH ) + CALL DLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Accumulate right Givens rotations if required. +* + IF ( WANTZ .OR. TOP.GT.0 ) THEN +* +* Initialize small orthogonal factors that will hold the +* accumulated Givens rotations in workspace. +* + CALL DLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK, + $ NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL DLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Accumulate Givens rotations into workspace array. +* + DO J = JCOL, JCOL+NNB-1 + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + C = A( I, J ) + A( I, J ) = ZERO + S = B( I, J ) + B( I, J ) = ZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + C = A( I, J ) + A( I, J ) = ZERO + S = B( I, J ) + B( I, J ) = ZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END DO + ELSE +* + CALL DLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO, + $ A( JCOL + 2, JCOL ), LDA ) + CALL DLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO, + $ B( JCOL + 2, JCOL ), LDB ) + END IF +* +* Apply accumulated orthogonal matrices to A and B. +* + IF ( TOP.GT.0 ) THEN + J = IHI - NBLST + 1 + CALL DGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, ONE, A( 1, J ), LDA, + $ WORK, NBLST, ZERO, WORK( PW ), TOP ) + CALL DLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL DORM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ A( 1, J ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL DGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, ONE, A( 1, J ), LDA, + $ WORK( PPWO ), 2*NNB, ZERO, + $ WORK( PW ), TOP ) + CALL DLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* + J = IHI - NBLST + 1 + CALL DGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, ONE, B( 1, J ), LDB, + $ WORK, NBLST, ZERO, WORK( PW ), TOP ) + CALL DLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL DORM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ B( 1, J ), LDB, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL DGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, ONE, B( 1, J ), LDB, + $ WORK( PPWO ), 2*NNB, ZERO, + $ WORK( PW ), TOP ) + CALL DLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Apply accumulated orthogonal matrices to Z. +* + IF( WANTZ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL DGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, ONE, Z( TOPQ, J ), LDZ, + $ WORK, NBLST, ZERO, WORK( PW ), NH ) + CALL DLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL DORM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Z( TOPQ, J ), LDZ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL DGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, ONE, Z( TOPQ, J ), LDZ, + $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ), + $ NH ) + CALL DLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO + END IF +* +* Use unblocked code to reduce the rest of the matrix +* Avoid re-initialization of modified Q and Z. +* + COMPQ2 = COMPQ + COMPZ2 = COMPZ + IF ( JCOL.NE.ILO ) THEN + IF ( WANTQ ) + $ COMPQ2 = 'V' + IF ( WANTZ ) + $ COMPZ2 = 'V' + END IF +* + IF ( JCOL.LT.IHI ) + $ CALL DGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, IERR ) + WORK( 1 ) = DBLE( LWKOPT ) +* + RETURN +* +* End of DGGHD3 +* + END diff --git a/dspl/liblapack/SRC/dgghrd.f b/dspl/liblapack/SRC/dgghrd.f new file mode 100644 index 0000000..3a74899 --- /dev/null +++ b/dspl/liblapack/SRC/dgghrd.f @@ -0,0 +1,361 @@ +*> \brief \b DGGHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGHRD reduces a pair of real matrices (A,B) to generalized upper +*> Hessenberg form using orthogonal transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the orthogonal matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**T*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**T*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**T*x. +*> +*> The orthogonal matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> +*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T +*> +*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T +*> +*> If Q1 is the orthogonal matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then DGGHRD reduces the original +*> problem to generalized Hessenberg form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> orthogonal matrix Z is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to DGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**T B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1, +*> typically from the QR factorization of B. +*> On exit, if COMPQ='I', the orthogonal matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1. +*> On exit, if COMPZ='I', the orthogonal matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg and B to triangular form by +*> an unblocked reduction, as described in _Matrix_Computations_, +*> by Golub and Van Loan (Johns Hopkins Press.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + DOUBLE PRECISION C, S, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + TEMP = A( JROW-1, JCOL ) + CALL DLARTG( TEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = ZERO + CALL DROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL DROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL DROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + TEMP = B( JROW, JROW ) + CALL DLARTG( TEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = ZERO + CALL DROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL DROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL DROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of DGGHRD +* + END diff --git a/dspl/liblapack/SRC/dgglse.f b/dspl/liblapack/SRC/dgglse.f new file mode 100644 index 0000000..5d5cac2 --- /dev/null +++ b/dspl/liblapack/SRC/dgglse.f @@ -0,0 +1,354 @@ +*> \brief DGGLSE solves overdetermined or underdetermined systems for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGLSE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( * ), D( * ), +* $ WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGLSE solves the linear equality-constrained least squares (LSE) +*> problem: +*> +*> minimize || c - A*x ||_2 subject to B*x = d +*> +*> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +*> M-vector, and d is a given P-vector. It is assumed that +*> P <= N <= M+P, and +*> +*> rank(B) = P and rank( (A) ) = N. +*> ( (B) ) +*> +*> These conditions ensure that the LSE problem has a unique solution, +*> which is obtained using a generalized RQ factorization of the +*> matrices (B, A) given by +*> +*> B = (0 R)*Q, A = Z*T*Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. 0 <= P <= N <= M+P. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the upper triangle of the subarray B(1:P,N-P+1:N) +*> contains the P-by-P upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (M) +*> On entry, C contains the right hand side vector for the +*> least squares part of the LSE problem. +*> On exit, the residual sum of squares for the solution +*> is given by the sum of squares of elements N-P+1 to M of +*> vector C. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (P) +*> On entry, D contains the right hand side vector for the +*> constrained equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On exit, X is the solution of the LSE problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M+N+P). +*> For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> DGEQRF, SGERQF, DORMQR and SORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with B in the +*> generalized RQ factorization of the pair (B, A) is +*> singular, so that rank(B) < P; the least squares +*> solution could not be computed. +*> = 2: the (N-P) by (N-P) part of the upper trapezoidal factor +*> T associated with A in the generalized RQ factorization +*> of the pair (B, A) is singular, so that +*> rank( (A) ) < N; the least squares solution could not +*> ( (B) ) +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERsolve +* +* ===================================================================== + SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKMIN, LWKOPT, MN, NB, NB1, NB2, NB3, + $ NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGGRQF, DORMQR, DORMRQ, + $ DTRMV, DTRTRS, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = P + MN + MAX( M, N )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q**T = ( 0 T12 ) P Z**T*A*Q**T = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* orthogonal. +* + CALL DGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z**T *c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL DORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ), + $ C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + IF( P.GT.0 ) THEN + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', P, 1, + $ B( 1, N-P+1 ), LDB, D, P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* +* Put the solution in X +* + CALL DCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Update c1 +* + CALL DGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, + $ D, 1, ONE, C, 1 ) + END IF +* +* Solve R11*x1 = c1 for x1 +* + IF( N.GT.P ) THEN + CALL DTRTRS( 'Upper', 'No transpose', 'Non-unit', N-P, 1, + $ A, LDA, C, N-P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Put the solutions in X +* + CALL DCOPY( N-P, C, 1, X, 1 ) + END IF +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + IF( NR.GT.0 ) + $ CALL DGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + IF( NR.GT.0 ) THEN + CALL DTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL DAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 ) + END IF +* +* Backward transformation x = Q**T*x +* + CALL DORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X, + $ N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of DGGLSE +* + END diff --git a/dspl/liblapack/SRC/dggqrf.f b/dspl/liblapack/SRC/dggqrf.f new file mode 100644 index 0000000..3ce21d8 --- /dev/null +++ b/dspl/liblapack/SRC/dggqrf.f @@ -0,0 +1,299 @@ +*> \brief \b DGGQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGQRF computes a generalized QR factorization of an N-by-M matrix A +*> and an N-by-P matrix B: +*> +*> A = Q*R, B = Q*T*Z, +*> +*> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +*> matrix, and R and T assume one of the forms: +*> +*> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +*> ( 0 ) N-M N M-N +*> M +*> +*> where R11 is upper triangular, and +*> +*> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +*> P-N N ( T21 ) P +*> P +*> +*> where T12 or T21 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GQR factorization +*> of A and B implicitly gives the QR factorization of inv(B)*A: +*> +*> inv(B)*A = Z**T*(inv(T)*R) +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the +*> transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(N,M)-by-M upper trapezoidal matrix R (R is +*> upper triangular if N >= M); the elements below the diagonal, +*> with the array TAUA, represent the orthogonal matrix Q as a +*> product of min(N,M) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is DOUBLE PRECISION array, dimension (min(N,M)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)-th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T; the remaining +*> elements, with the array TAUB, represent the orthogonal +*> matrix Z as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is DOUBLE PRECISION array, dimension (min(N,P)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the QR factorization +*> of an N-by-M matrix, NB2 is the optimal blocksize for the +*> RQ factorization of an N-by-P matrix, and NB3 is the optimal +*> blocksize for a call of DORMQR. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(n,m). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**T +*> +*> where taua is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine DORGQR. +*> To use Q to update another matrix, use LAPACK subroutine DORMQR. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(n,p). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**T +*> +*> where taub is a real scalar, and v is a real vector with +*> v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +*> B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine DORGRQ. +*> To use Z to update another matrix, use LAPACK subroutine DORMRQ. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGERQF, DORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'DGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL DGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q**T*B. +* + CALL DORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA, + $ B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL DGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of DGGQRF +* + END diff --git a/dspl/liblapack/SRC/dggrqf.f b/dspl/liblapack/SRC/dggrqf.f new file mode 100644 index 0000000..9c377cc --- /dev/null +++ b/dspl/liblapack/SRC/dggrqf.f @@ -0,0 +1,299 @@ +*> \brief \b DGGRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGRQF computes a generalized RQ factorization of an M-by-N matrix A +*> and a P-by-N matrix B: +*> +*> A = R*Q, B = Z*T*Q, +*> +*> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +*> matrix, and R and T assume one of the forms: +*> +*> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +*> N-M M ( R21 ) N +*> N +*> +*> where R12 or R21 is upper triangular, and +*> +*> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +*> ( 0 ) P-N P N-P +*> N +*> +*> where T11 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GRQ factorization +*> of A and B implicitly gives the RQ factorization of A*inv(B): +*> +*> A*inv(B) = (R*inv(T))*Z**T +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the +*> transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, if M <= N, the upper triangle of the subarray +*> A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +*> if M > N, the elements on and above the (M-N)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; the remaining +*> elements, with the array TAUA, represent the orthogonal +*> matrix Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(P,N)-by-N upper trapezoidal matrix T (T is +*> upper triangular if P >= N); the elements below the diagonal, +*> with the array TAUB, represent the orthogonal matrix Z as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is DOUBLE PRECISION array, dimension (min(P,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the RQ factorization +*> of an M-by-N matrix, NB2 is the optimal blocksize for the +*> QR factorization of a P-by-N matrix, and NB3 is the optimal +*> blocksize for a call of DORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INF0= -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**T +*> +*> where taua is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine DORGRQ. +*> To use Q to update another matrix, use LAPACK subroutine DORMRQ. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(p,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**T +*> +*> where taub is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +*> and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine DORGQR. +*> To use Z to update another matrix, use LAPACK subroutine DORMQR. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL DGEQRF, DGERQF, DORMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'DGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL DGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q**T +* + CALL DORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL DGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of DGGRQF +* + END diff --git a/dspl/liblapack/SRC/dggsvd3.f b/dspl/liblapack/SRC/dggsvd3.f new file mode 100644 index 0000000..f882139 --- /dev/null +++ b/dspl/liblapack/SRC/dggsvd3.f @@ -0,0 +1,503 @@ +*> \brief DGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGSVD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGSVD3 computes the generalized singular value decomposition (GSVD) +*> of an M-by-N real matrix A and P-by-N real matrix B: +*> +*> U**T*A*Q = D1*( 0 R ), V**T*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are orthogonal matrices. +*> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, +*> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +*> D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +*> following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the orthogonal +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**T. +*> If ( A**T,B**T)**T has orthonormal columns, then the GSVD of A and B is +*> also equal to the CS decomposition of A and B. Furthermore, the GSVD +*> can be used to derive the solution of the eigenvalue problem: +*> A**T*A x = lambda* B**T*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**T*A*X = ( 0 D1 ), V**T*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix R if M-K-L < 0. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine DTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA DOUBLE PRECISION +*> TOLB DOUBLE PRECISION +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**T,B**T)**T. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup doubleGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* +*> \par Further Details: +* ===================== +*> +*> DGGSVD3 replaces the deprecated subroutine DGGSVD. +*> +* ===================================================================== + SUBROUTINE DGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV, LQUERY + INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL LSAME, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGGSVP3, DTGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL DGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK, -1, INFO ) + LWKOPT = N + INT( WORK( 1 ) ) + LWKOPT = MAX( 2*N, LWKOPT ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = DBLE( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVD3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = DLANGE( '1', M, N, A, LDA, WORK ) + BNORM = DLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL DGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), LWORK-N, INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL DCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + WORK( 1 ) = DBLE( LWKOPT ) + RETURN +* +* End of DGGSVD3 +* + END diff --git a/dspl/liblapack/SRC/dggsvp3.f b/dspl/liblapack/SRC/dggsvp3.f new file mode 100644 index 0000000..0ff113b --- /dev/null +++ b/dspl/liblapack/SRC/dggsvp3.f @@ -0,0 +1,571 @@ +*> \brief \b DGGSVP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGGSVP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGGSVP3 computes orthogonal matrices U, V and Q such that +*> +*> N-K-L K L +*> U**T*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**T*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> DGGSVD3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,M) +*> If JOBU = 'U', U contains the orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,P) +*> If JOBV = 'V', V contains the orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The subroutine uses LAPACK subroutine DGEQP3 for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +*> DGGSVP3 replaces the deprecated subroutine DGGSVP. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV, LQUERY + INTEGER I, J, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEQP3, DGEQR2, DGERQ2, DLACPY, DLAPMT, + $ DLASET, DORG2R, DORM2R, DORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL DGEQP3( P, N, B, LDB, IWORK, TAU, WORK, -1, INFO ) + LWKOPT = INT( WORK ( 1 ) ) + IF( WANTV ) THEN + LWKOPT = MAX( LWKOPT, P ) + END IF + LWKOPT = MAX( LWKOPT, MIN( N, P ) ) + LWKOPT = MAX( LWKOPT, M ) + IF( WANTQ ) THEN + LWKOPT = MAX( LWKOPT, N ) + END IF + CALL DGEQP3( M, N, A, LDA, IWORK, TAU, WORK, -1, INFO ) + LWKOPT = MAX( LWKOPT, INT( WORK ( 1 ) ) ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = DBLE( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGGSVP3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL DGEQP3( P, N, B, LDB, IWORK, TAU, WORK, LWORK, INFO ) +* +* Update A := A*P +* + CALL DLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL DLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL DLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL DORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL DLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL DLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL DGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**T +* + CALL DORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z**T +* + CALL DORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL DLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**T +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL DGEQP3( M, N-L, A, LDA, IWORK, TAU, WORK, LWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL DORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL DLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL DLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL DORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL DLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL DLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL DGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T +* + CALL DORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL DLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL DGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL DORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + WORK( 1 ) = DBLE( LWKOPT ) + RETURN +* +* End of DGGSVP3 +* + END diff --git a/dspl/liblapack/SRC/dgsvj0.f b/dspl/liblapack/SRC/dgsvj0.f new file mode 100644 index 0000000..4fd38d3 --- /dev/null +++ b/dspl/liblapack/SRC/dgsvj0.f @@ -0,0 +1,1079 @@ +*> \brief \b DGSVJ0 pre-processor for the routine dgesvj. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGSVJ0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, +* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP +* DOUBLE PRECISION EPS, SFMIN, TOL +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGSVJ0 is called from DGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but +*> it does not check convergence (stopping criterion). Few tuning +*> parameters (marked by [TP]) are available for the implementer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> EPS = DLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is DOUBLE PRECISION +*> SFMIN = DLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> DGSVJ0 is used just to enable DGESVJ to call a simplified version of +*> itself to work on a submatrix of the original matrix. +*> +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +*> +*> \par Bugs, Examples and Comments: +* ================================= +*> +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +* +* ===================================================================== + SUBROUTINE DGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, + $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP + DOUBLE PRECISION EPS, SFMIN, TOL + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, ROOTEPS, + $ ROOTSFMIN, ROOTTOL, SMALL, SN, T, TEMP1, THETA, + $ THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, NBL, + $ NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. Local Arrays .. + DOUBLE PRECISION FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS, MAX, DBLE, MIN, DSIGN, DSQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DNRM2 + INTEGER IDAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, LSAME, DDOT, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( LDA.LT.M ) THEN + INFO = -5 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -8 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -10 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -13 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -14 + ELSE IF( LWORK.LT.M ) THEN + INFO = -16 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGSVJ0', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = DSQRT( EPS ) + ROOTSFMIN = DSQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + BIGTHETA = ONE / ROOTEPS + ROOTTOL = DSQRT( TOL ) +* +* -#- Row-cyclic Jacobi SVD algorithm with column pivoting -#- +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* -#- Row-cyclic pivot strategy with de Rijk's pivoting -#- +* + + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if SGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure +* ...... + + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 + + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. + SWBAND = 0 + PSKIPPED = 0 +* + DO 1993 i = 1, NSWEEP +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* + DO 2000 ibr = 1, NBL + + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) + +* .. de Rijk's pivoting + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL DSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL DSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Some BLAS implementations compute DNRM2(M,A(1,p),1) +* as DSQRT(DDOT(M,A(1,p),1,A(1,p),1)), which may result in +* overflow for ||A(:,p)||_2 > DSQRT(overflow_threshold), and +* undeflow for ||A(:,p)||_2 < DSQRT(underflow_threshold). +* Hence, DNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented DNRM2 is available, the IF-THEN-ELSE +* below should read "AAPP = DNRM2( M, A(1,p), 1 ) * D(p)". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = DNRM2( M, A( 1, p ), 1 )*D( p ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*DSQRT( AAPP )*D( p ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF + +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) + + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, DABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( DABS( AAPQ ).GT.TOL ) THEN +* +* .. rotate +* ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*DABS( AQOAP-APOAQ )/AAPQ +* + IF( DABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, DABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -DSIGN( ONE, AAPQ ) + T = ONE / ( THETA+THSIGN* + $ DSQRT( ONE+THETA*THETA ) ) + CS = DSQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, DABS( SN ) ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL DCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL DAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*DSQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*DSQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop + + SVA( p ) = AAPP + + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +*........................................................ +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* -#- M x 2 Jacobi SVD -#- +* +* -#- Safe Gram matrix computation -#- +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, DABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( DABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +* ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*DABS( AQOAP-APOAQ )/AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( DABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, DABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -DSIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ DSQRT( ONE+THETA*THETA ) ) + CS = DSQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, DABS( SN ) ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN +* + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL DCOPY( M, A( 1, p ), 1, WORK, + $ 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL DAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL DCOPY( M, A( 1, q ), 1, WORK, + $ 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( q ) / D( p ) + CALL DAXPY( M, TEMP1, WORK, 1, + $ A( 1, p ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*DSQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*DSQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 + END IF + + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = DABS( SVA( p ) ) + 2012 CONTINUE +* + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DNRM2( M, A( 1, N ), 1 )*D( N ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*DSQRT( AAPP )*D( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.DBLE( N )*TOL ) .AND. + $ ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 + + 1993 CONTINUE +* end i=1:NSWEEP loop +* #:) Reaching this point means that the procedure has comleted the given +* number of iterations. + INFO = NSWEEP - 1 + GO TO 1995 + 1994 CONTINUE +* #:) Reaching this point means that during the i-th sweep all pivots were +* below the given tolerance, causing early exit. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector D. + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + CALL DSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL DSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF DGSVJ0 +* .. + END diff --git a/dspl/liblapack/SRC/dgsvj1.f b/dspl/liblapack/SRC/dgsvj1.f new file mode 100644 index 0000000..376682c --- /dev/null +++ b/dspl/liblapack/SRC/dgsvj1.f @@ -0,0 +1,784 @@ +*> \brief \b DGSVJ1 pre-processor for the routine dgesvj, applies Jacobi rotations targeting only particular pivots. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGSVJ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, +* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION EPS, SFMIN, TOL +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGSVJ1 is called from DGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as DGESVJ does, but +*> it targets only particular pivots and it does not check convergence +*> (stopping criterion). Few tunning parameters (marked by [TP]) are +*> available for the implementer. +*> +*> Further Details +*> ~~~~~~~~~~~~~~~ +*> DGSVJ1 applies few sweeps of Jacobi rotations in the column space of +*> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) +*> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The +*> block-entries (tiles) of the (1,2) off-diagonal block are marked by the +*> [x]'s in the following scheme: +*> +*> | * * * [x] [x] [x]| +*> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +*> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> +*> In terms of the columns of A, the first N1 columns are rotated 'against' +*> the remaining N-N1 columns, trying to increase the angle between the +*> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is +*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. +*> The number of sweeps is given in NSWEEP and the orthogonality threshold +*> is given in TOL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> N1 specifies the 2 x 2 block partition, the first N1 columns are +*> rotated 'against' the remaining N-N1 columns of A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> EPS = DLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is DOUBLE PRECISION +*> SFMIN = DLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if DABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +* +* ===================================================================== + SUBROUTINE DGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, + $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION EPS, SFMIN, TOL + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, LARGE, MXAAPQ, MXSINJ, ROOTBIG, + $ ROOTEPS, ROOTSFMIN, ROOTTOL, SMALL, SN, T, + $ TEMP1, THETA, THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, igl, IERR, IJBLSK, + $ ISWROT, jbc, jgl, KBL, MVL, NOTROT, nblc, nblr, + $ p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. Local Arrays .. + DOUBLE PRECISION FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DABS, MAX, DBLE, MIN, DSIGN, DSQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DNRM2 + INTEGER IDAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, LSAME, DDOT, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLASCL, DLASSQ, DROTM, DSWAP, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( N1.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.M ) THEN + INFO = -6 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -9 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -14 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -15 + ELSE IF( LWORK.LT.M ) THEN + INFO = -17 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGSVJ1', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = DSQRT( EPS ) + ROOTSFMIN = DSQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + LARGE = BIG / DSQRT( DBLE( M*N ) ) + BIGTHETA = ONE / ROOTEPS + ROOTTOL = DSQRT( TOL ) +* +* .. Initialize the right singular vector matrix .. +* +* RSVEC = LSAME( JOBV, 'Y' ) +* + EMPTSW = N1*( N-N1 ) + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + KBL = MIN( 8, N ) + NBLR = N1 / KBL + IF( ( NBLR*KBL ).NE.N1 )NBLR = NBLR + 1 + +* .. the tiling is nblr-by-nblc [tiles] + + NBLC = ( N-N1 ) / KBL + IF( ( NBLC*KBL ).NE.( N-N1 ) )NBLC = NBLC + 1 + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if SGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm SGESVJ. +* +* +* | * * * [x] [x] [x]| +* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* +* + DO 1993 i = 1, NSWEEP +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* + DO 2000 ibr = 1, NBLR + + igl = ( ibr-1 )*KBL + 1 +* +* +*........................................................ +* ... go to the off diagonal blocks + + igl = ( ibr-1 )*KBL + 1 + + DO 2010 jbc = 1, NBLC + + jgl = N1 + ( jbc-1 )*KBL + 1 + +* doing the block at ( ibr, jbc ) + + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N1 ) + + AAPP = SVA( p ) + + IF( AAPP.GT.ZERO ) THEN + + PSKIPPED = 0 + + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* .. Safe Gram matrix computation .. +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( DDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL DCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = DDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF + + MXAAPQ = MAX( MXAAPQ, DABS( AAPQ ) ) + +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( DABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +* ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*DABS(AQOAP-APOAQ) / AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA + + IF( DABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, DABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -DSIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ DSQRT( ONE+THETA*THETA ) ) + CS = DSQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, DABS( SN ) ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*DSQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN +* + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL DROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL DROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL DAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL DAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL DAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL DAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL DAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL DAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL DAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL DAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF + + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL DCOPY( M, A( 1, p ), 1, WORK, + $ 1 ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL DAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL DCOPY( M, A( 1, q ), 1, WORK, + $ 1 ) + CALL DLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL DLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( q ) / D( p ) + CALL DAXPY( M, TEMP1, WORK, 1, + $ A( 1, p ), 1 ) + CALL DLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*DSQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL DLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*DSQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*DSQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +* SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + +* IF ( NOTROT .GE. EMPTSW ) GO TO 2011 + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF + +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE + + SVA( p ) = AAPP +* + ELSE + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +*** IF ( NOTROT .GE. EMPTSW ) GO TO 2011 + END IF + + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = DABS( SVA( p ) ) + 2012 CONTINUE +*** IF ( NOTROT .GE. EMPTSW ) GO TO 1994 + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DNRM2( M, A( 1, N ), 1 )*D( N ) + ELSE + T = ZERO + AAPP = ONE + CALL DLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*DSQRT( AAPP )*D( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i + + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.DBLE( N )*TOL ) .AND. + $ ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF + +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 + + 1993 CONTINUE +* end i=1:NSWEEP loop +* #:) Reaching this point means that the procedure has completed the given +* number of sweeps. + INFO = NSWEEP - 1 + GO TO 1995 + 1994 CONTINUE +* #:) Reaching this point means that during the i-th sweep all pivots were +* below the given threshold, causing early exit. + + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector D +* + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + CALL DSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL DSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF DGSVJ1 +* .. + END diff --git a/dspl/liblapack/SRC/dgtcon.f b/dspl/liblapack/SRC/dgtcon.f new file mode 100644 index 0000000..4271823 --- /dev/null +++ b/dspl/liblapack/SRC/dgtcon.f @@ -0,0 +1,255 @@ +*> \brief \b DGTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTCON estimates the reciprocal of the condition number of a real +*> tridiagonal matrix A using the LU factorization as computed by +*> DGTTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by DGTTRF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTcomputational +* +* ===================================================================== + SUBROUTINE DGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGTTRS, DLACN2, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL DGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L**T)*inv(U**T). +* + CALL DGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DGTCON +* + END diff --git a/dspl/liblapack/SRC/dgtrfs.f b/dspl/liblapack/SRC/dgtrfs.f new file mode 100644 index 0000000..7488935 --- /dev/null +++ b/dspl/liblapack/SRC/dgtrfs.f @@ -0,0 +1,474 @@ +*> \brief \b DGTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is tridiagonal, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] DLF +*> \verbatim +*> DLF is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by DGTTRF. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DUF +*> \verbatim +*> DUF is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTcomputational +* +* ===================================================================== + SUBROUTINE DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGTTRS, DLACN2, DLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'T' + ELSE + TRANSN = 'T' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK( N+1 ), N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DU( 1 )*X( 2, J ) ) + DO 30 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DL( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DU( I )*X( I+1, J ) ) + 30 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DL( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DL( 1 )*X( 2, J ) ) + DO 40 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DU( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DL( I )*X( I+1, J ) ) + 40 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DU( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 80 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 90 CONTINUE + CALL DGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of DGTRFS +* + END diff --git a/dspl/liblapack/SRC/dgtsv.f b/dspl/liblapack/SRC/dgtsv.f new file mode 100644 index 0000000..b683eaa --- /dev/null +++ b/dspl/liblapack/SRC/dgtsv.f @@ -0,0 +1,333 @@ +*> \brief DGTSV computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTSV solves the equation +*> +*> A*X = B, +*> +*> where A is an n by n tridiagonal matrix, by Gaussian elimination with +*> partial pivoting. +*> +*> Note that the equation A**T*X = B may be solved by interchanging the +*> order of the arguments DU and DL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-2) elements of the +*> second super-diagonal of the upper triangular matrix U from +*> the LU factorization of A, in DL(1), ..., DL(n-2). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of U. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N by NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero, and the solution +*> has not been computed. The factorization has not been +*> completed unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTsolve +* +* ===================================================================== + SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + IF( NRHS.EQ.1 ) THEN + DO 10 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + 10 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + ELSE + DO 40 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 20 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 20 CONTINUE + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + DO 30 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 30 CONTINUE + END IF + 40 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 50 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 50 CONTINUE + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + DO 60 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 60 CONTINUE + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + END IF +* +* Back solve with the matrix U from the factorization. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 70 CONTINUE + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 80 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 80 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 100 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 90 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 90 CONTINUE + 100 CONTINUE + END IF +* + RETURN +* +* End of DGTSV +* + END diff --git a/dspl/liblapack/SRC/dgtsvx.f b/dspl/liblapack/SRC/dgtsvx.f new file mode 100644 index 0000000..92bc9ea --- /dev/null +++ b/dspl/liblapack/SRC/dgtsvx.f @@ -0,0 +1,414 @@ +*> \brief DGTSVX computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, +* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTSVX uses the LU factorization to compute the solution to a real +*> system of linear equations A * X = B or A**T * X = B, +*> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +*> as A = L * U, where L is a product of permutation and unit lower +*> bidiagonal matrices and U is upper triangular with nonzeros in +*> only the main diagonal and first two superdiagonals. +*> +*> 2. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored +*> form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV +*> will not be modified. +*> = 'N': The matrix will be copied to DLF, DF, and DUF +*> and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in,out] DLF +*> \verbatim +*> DLF is DOUBLE PRECISION array, dimension (N-1) +*> If FACT = 'F', then DLF is an input argument and on entry +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A as computed by DGTTRF. +*> +*> If FACT = 'N', then DLF is an output argument and on exit +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DUF +*> \verbatim +*> DUF is DOUBLE PRECISION array, dimension (N-1) +*> If FACT = 'F', then DUF is an input argument and on entry +*> contains the (n-1) elements of the first superdiagonal of U. +*> +*> If FACT = 'N', then DUF is an output argument and on exit +*> contains the (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> If FACT = 'F', then DU2 is an input argument and on entry +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> +*> If FACT = 'N', then DU2 is an output argument and on exit +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the LU factorization of A as +*> computed by DGTTRF. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the LU factorization of A; +*> row i of the matrix was interchanged with row IPIV(i). +*> IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +*> a row interchange was not required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has not been completed unless i = N, but the +*> factor U is exactly singular, so the solution +*> and error bounds could not be computed. +*> RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTsolve +* +* ===================================================================== + SUBROUTINE DGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANGT + EXTERNAL LSAME, DLAMCH, DLANGT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGTCON, DGTRFS, DGTTRF, DGTTRS, DLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL DCOPY( N-1, DL, 1, DLF, 1 ) + CALL DCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL DGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = DLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DGTSVX +* + END diff --git a/dspl/liblapack/SRC/dgttrf.f b/dspl/liblapack/SRC/dgttrf.f new file mode 100644 index 0000000..3c9808f --- /dev/null +++ b/dspl/liblapack/SRC/dgttrf.f @@ -0,0 +1,237 @@ +*> \brief \b DGTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTTRF computes an LU factorization of a real tridiagonal matrix A +*> using elimination with partial pivoting and row interchanges. +*> +*> The factorization has the form +*> A = L * U +*> where L is a product of permutation and unit lower bidiagonal +*> matrices and U is upper triangular with nonzeros in only the main +*> diagonal and first two superdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-1) multipliers that +*> define the matrix L from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of the +*> upper triangular matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[out] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> On exit, DU2 is overwritten by the (n-2) elements of the +*> second super-diagonal of U. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTcomputational +* +* ===================================================================== + SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(I) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( D( I ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DGTTRF +* + END diff --git a/dspl/liblapack/SRC/dgttrs.f b/dspl/liblapack/SRC/dgttrs.f new file mode 100644 index 0000000..3bbeb1d --- /dev/null +++ b/dspl/liblapack/SRC/dgttrs.f @@ -0,0 +1,223 @@ +*> \brief \b DGTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTTRS solves one of the systems of equations +*> A*X = B or A**T*X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by DGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTcomputational +* +* ===================================================================== + SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE + ITRANS = 1 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'DGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL DGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of DGTTRS +* + END diff --git a/dspl/liblapack/SRC/dgtts2.f b/dspl/liblapack/SRC/dgtts2.f new file mode 100644 index 0000000..39e7b00 --- /dev/null +++ b/dspl/liblapack/SRC/dgtts2.f @@ -0,0 +1,274 @@ +*> \brief \b DGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization computed by sgttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGTTS2 solves one of the systems of equations +*> A*X = B or A**T*X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by DGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITRANS +*> \verbatim +*> ITRANS is INTEGER +*> Specifies the form of the system of equations. +*> = 0: A * X = B (No transpose) +*> = 1: A**T* X = B (Transpose) +*> = 2: A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is DOUBLE PRECISION array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGTcomputational +* +* ===================================================================== + SUBROUTINE DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IP, J + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IP = IPIV( I ) + TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J ) + B( I, J ) = B( IP, J ) + B( I+1, J ) = TEMP + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN +* +* Solve U**T*x = b. +* + J = 1 + 70 CONTINUE + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T*x = b. +* + DO 90 I = N - 1, 1, -1 + IP = IPIV( I ) + TEMP = B( I, J ) - DL( I )*B( I+1, J ) + B( I, J ) = B( IP, J ) + B( IP, J ) = TEMP + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF +* + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T*x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* +* End of DGTTS2 +* + END diff --git a/dspl/liblapack/SRC/dhgeqz.f b/dspl/liblapack/SRC/dhgeqz.f new file mode 100644 index 0000000..99557f2 --- /dev/null +++ b/dspl/liblapack/SRC/dhgeqz.f @@ -0,0 +1,1367 @@ +*> \brief \b DHGEQZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DHGEQZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, +* ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ, JOB +* INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION ALPHAI( * ), ALPHAR( * ), BETA( * ), +* $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DHGEQZ computes the eigenvalues of a real matrix pair (H,T), +*> where H is an upper Hessenberg matrix and T is upper triangular, +*> using the double-shift QZ method. +*> Matrix pairs of this type are produced by the reduction to +*> generalized upper Hessenberg form of a real matrix pair (A,B): +*> +*> A = Q1*H*Z1**T, B = Q1*T*Z1**T, +*> +*> as computed by DGGHRD. +*> +*> If JOB='S', then the Hessenberg-triangular pair (H,T) is +*> also reduced to generalized Schur form, +*> +*> H = Q*S*Z**T, T = Q*P*Z**T, +*> +*> where Q and Z are orthogonal matrices, P is an upper triangular +*> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 +*> diagonal blocks. +*> +*> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair +*> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of +*> eigenvalues. +*> +*> Additionally, the 2-by-2 upper triangular diagonal blocks of P +*> corresponding to 2-by-2 blocks of S are reduced to positive diagonal +*> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, +*> P(j,j) > 0, and P(j+1,j+1) > 0. +*> +*> Optionally, the orthogonal matrix Q from the generalized Schur +*> factorization may be postmultiplied into an input matrix Q1, and the +*> orthogonal matrix Z may be postmultiplied into an input matrix Z1. +*> If Q1 and Z1 are the orthogonal matrices from DGGHRD that reduced +*> the matrix pair (A,B) to generalized upper Hessenberg form, then the +*> output matrices Q1*Q and Z1*Z are the orthogonal factors from the +*> generalized Schur factorization of (A,B): +*> +*> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. +*> +*> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, +*> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is +*> complex and beta real. +*> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the +*> generalized nonsymmetric eigenvalue problem (GNEP) +*> A*x = lambda*B*x +*> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the +*> alternate form of the GNEP +*> mu*A*y = B*y. +*> Real eigenvalues can be read directly from the generalized Schur +*> form: +*> alpha = S(i,i), beta = P(i,i). +*> +*> Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +*> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +*> pp. 241--256. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': Compute eigenvalues only; +*> = 'S': Compute eigenvalues and the Schur form. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': Left Schur vectors (Q) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Q +*> of left Schur vectors of (H,T) is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry and +*> the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Right Schur vectors (Z) are not computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of right Schur vectors of (H,T) is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry and +*> the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices H, T, Q, and Z. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI mark the rows and columns of H which are in +*> Hessenberg form. It is assumed that A is already upper +*> triangular in rows and columns 1:ILO-1 and IHI+1:N. +*> If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH, N) +*> On entry, the N-by-N upper Hessenberg matrix H. +*> On exit, if JOB = 'S', H contains the upper quasi-triangular +*> matrix S from the generalized Schur factorization. +*> If JOB = 'E', the diagonal blocks of H match those of S, but +*> the rest of H is unspecified. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT, N) +*> On entry, the N-by-N upper triangular matrix T. +*> On exit, if JOB = 'S', T contains the upper triangular +*> matrix P from the generalized Schur factorization; +*> 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S +*> are reduced to positive diagonal form, i.e., if H(j+1,j) is +*> non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and +*> T(j+1,j+1) > 0. +*> If JOB = 'E', the diagonal blocks of T match those of P, but +*> the rest of T is unspecified. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue +*> of GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1 used in +*> the reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPQ = 'I', the orthogonal matrix of left Schur +*> vectors of (H,T), and if COMPQ = 'V', the orthogonal matrix +*> of left Schur vectors of (A,B). +*> Not referenced if COMPQ = 'N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If COMPQ='V' or 'I', then LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in +*> the reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPZ = 'I', the orthogonal matrix of +*> right Schur vectors of (H,T), and if COMPZ = 'V', the +*> orthogonal matrix of right Schur vectors of (A,B). +*> Not referenced if COMPZ = 'N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If COMPZ='V' or 'I', then LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1,...,N: the QZ iteration did not converge. (H,T) is not +*> in Schur form, but ALPHAR(i), ALPHAI(i), and +*> BETA(i), i=INFO+1,...,N should be correct. +*> = N+1,...,2*N: the shift calculation failed. (H,T) is not +*> in Schur form, but ALPHAR(i), ALPHAI(i), and +*> BETA(i), i=INFO-N+1,...,N should be correct. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Iteration counters: +*> +*> JITER -- counts iterations. +*> IITER -- counts iterations run since ILAST was last +*> changed. This is therefore reset only when a 1-by-1 or +*> 2-by-2 block deflates off the bottom. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION ALPHAI( * ), ALPHAR( * ), BETA( * ), + $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* $ SAFETY = 1.0E+0 ) + DOUBLE PRECISION HALF, ZERO, ONE, SAFETY + PARAMETER ( HALF = 0.5D+0, ZERO = 0.0D+0, ONE = 1.0D+0, + $ SAFETY = 1.0D+2 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ, + $ LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + DOUBLE PRECISION A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11, + $ AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L, + $ AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I, + $ B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE, + $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, + $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, + $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, + $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, + $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, + $ WR2 +* .. +* .. Local Arrays .. + DOUBLE PRECISION V( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANHS, DLAPY2, DLAPY3 + EXTERNAL LSAME, DLAMCH, DLANHS, DLAPY2, DLAPY3 +* .. +* .. External Subroutines .. + EXTERNAL DLAG2, DLARFG, DLARTG, DLASET, DLASV2, DROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDH.LT.N ) THEN + INFO = -8 + ELSE IF( LDT.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = DBLE( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + ULP = DLAMCH( 'E' )*DLAMCH( 'B' ) + ANORM = DLANHS( 'F', IN, H( ILO, ILO ), LDH, WORK ) + BNORM = DLANHS( 'F', IN, T( ILO, ILO ), LDT, WORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* Set Eigenvalues IHI+1:N +* + DO 30 J = IHI + 1, N + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 10 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 10 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 20 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 20 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 30 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 380 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever. +* Row operations modify columns whatever:ILASTM. +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = ZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 360 JITER = 1, MAXIT +* +* Split the matrix if possible. +* +* Two tests: +* 1: H(j,j-1)=0 or j=ILO +* 2: T(j,j)=0 +* + IF( ILAST.EQ.ILO ) THEN +* +* Special case: j=ILAST +* + GO TO 80 + ELSE + IF( ABS( H( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + H( ILAST, ILAST-1 ) = ZERO + GO TO 80 + END IF + END IF +* + IF( ABS( T( ILAST, ILAST ) ).LE.BTOL ) THEN + T( ILAST, ILAST ) = ZERO + GO TO 70 + END IF +* +* General case: j unfl ) +* __ +* (sA - wB) ( CZ -SZ ) +* ( SZ CZ ) +* + C11R = S1*A11 - WR*B11 + C11I = -WI*B11 + C12 = S1*A12 + C21 = S1*A21 + C22R = S1*A22 - WR*B22 + C22I = -WI*B22 +* + IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+ + $ ABS( C22R )+ABS( C22I ) ) THEN + T1 = DLAPY3( C12, C11R, C11I ) + CZ = C12 / T1 + SZR = -C11R / T1 + SZI = -C11I / T1 + ELSE + CZ = DLAPY2( C22R, C22I ) + IF( CZ.LE.SAFMIN ) THEN + CZ = ZERO + SZR = ONE + SZI = ZERO + ELSE + TEMPR = C22R / CZ + TEMPI = C22I / CZ + T1 = DLAPY2( CZ, C21 ) + CZ = CZ / T1 + SZR = -C21*TEMPR / T1 + SZI = C21*TEMPI / T1 + END IF + END IF +* +* Compute Givens rotation on left +* +* ( CQ SQ ) +* ( __ ) A or B +* ( -SQ CQ ) +* + AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 ) + BN = ABS( B11 ) + ABS( B22 ) + WABS = ABS( WR ) + ABS( WI ) + IF( S1*AN.GT.WABS*BN ) THEN + CQ = CZ*B11 + SQR = SZR*B22 + SQI = -SZI*B22 + ELSE + A1R = CZ*A11 + SZR*A12 + A1I = SZI*A12 + A2R = CZ*A21 + SZR*A22 + A2I = SZI*A22 + CQ = DLAPY2( A1R, A1I ) + IF( CQ.LE.SAFMIN ) THEN + CQ = ZERO + SQR = ONE + SQI = ZERO + ELSE + TEMPR = A1R / CQ + TEMPI = A1I / CQ + SQR = TEMPR*A2R + TEMPI*A2I + SQI = TEMPI*A2R - TEMPR*A2I + END IF + END IF + T1 = DLAPY3( CQ, SQR, SQI ) + CQ = CQ / T1 + SQR = SQR / T1 + SQI = SQI / T1 +* +* Compute diagonal elements of QBZ +* + TEMPR = SQR*SZR - SQI*SZI + TEMPI = SQR*SZI + SQI*SZR + B1R = CQ*CZ*B11 + TEMPR*B22 + B1I = TEMPI*B22 + B1A = DLAPY2( B1R, B1I ) + B2R = CQ*CZ*B22 + TEMPR*B11 + B2I = -TEMPI*B11 + B2A = DLAPY2( B2R, B2I ) +* +* Normalize so beta > 0, and Im( alpha1 ) > 0 +* + BETA( ILAST-1 ) = B1A + BETA( ILAST ) = B2A + ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV + ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV + ALPHAR( ILAST ) = ( WR*B2A )*S1INV + ALPHAI( ILAST ) = -( WI*B2A )*S1INV +* +* Step 3: Go to next block -- exit if finished. +* + ILAST = IFIRST - 1 + IF( ILAST.LT.ILO ) + $ GO TO 380 +* +* Reset counters +* + IITER = 0 + ESHIFT = ZERO + IF( .NOT.ILSCHR ) THEN + ILASTM = ILAST + IF( IFRSTM.GT.ILAST ) + $ IFRSTM = ILO + END IF + GO TO 350 + ELSE +* +* Usual case: 3x3 or larger block, using Francis implicit +* double-shift +* +* 2 +* Eigenvalue equation is w - c w + d = 0, +* +* -1 2 -1 +* so compute 1st column of (A B ) - c A B + d +* using the formula in QZIT (from EISPACK) +* +* We assume that the block is at least 3x3 +* + AD11 = ( ASCALE*H( ILAST-1, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD21 = ( ASCALE*H( ILAST, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD12 = ( ASCALE*H( ILAST-1, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + AD22 = ( ASCALE*H( ILAST, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + U12 = T( ILAST-1, ILAST ) / T( ILAST, ILAST ) + AD11L = ( ASCALE*H( IFIRST, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD21L = ( ASCALE*H( IFIRST+1, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD12L = ( ASCALE*H( IFIRST, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD22L = ( ASCALE*H( IFIRST+1, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD32L = ( ASCALE*H( IFIRST+2, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + U12L = T( IFIRST, IFIRST+1 ) / T( IFIRST+1, IFIRST+1 ) +* + V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 + + $ AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L + V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )- + $ ( AD22-AD11L )+AD21*U12 )*AD21L + V( 3 ) = AD32L*AD21L +* + ISTART = IFIRST +* + CALL DLARFG( 3, V( 1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE +* +* Sweep +* + DO 290 J = ISTART, ILAST - 2 +* +* All but last elements: use 3x3 Householder transforms. +* +* Zero (j-1)st column of A +* + IF( J.GT.ISTART ) THEN + V( 1 ) = H( J, J-1 ) + V( 2 ) = H( J+1, J-1 ) + V( 3 ) = H( J+2, J-1 ) +* + CALL DLARFG( 3, H( J, J-1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE + H( J+1, J-1 ) = ZERO + H( J+2, J-1 ) = ZERO + END IF +* + DO 230 JC = J, ILASTM + TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) ) + H( J, JC ) = H( J, JC ) - TEMP + H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) + H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) + TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) ) + T( J, JC ) = T( J, JC ) - TEMP2 + T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) + T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + 230 CONTINUE + IF( ILQ ) THEN + DO 240 JR = 1, N + TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) ) + Q( JR, J ) = Q( JR, J ) - TEMP + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + 240 CONTINUE + END IF +* +* Zero j-th column of B (see DLAGBC for details) +* +* Swap rows to pivot +* + ILPIVT = .FALSE. + TEMP = MAX( ABS( T( J+1, J+1 ) ), ABS( T( J+1, J+2 ) ) ) + TEMP2 = MAX( ABS( T( J+2, J+1 ) ), ABS( T( J+2, J+2 ) ) ) + IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN + SCALE = ZERO + U1 = ONE + U2 = ZERO + GO TO 250 + ELSE IF( TEMP.GE.TEMP2 ) THEN + W11 = T( J+1, J+1 ) + W21 = T( J+2, J+1 ) + W12 = T( J+1, J+2 ) + W22 = T( J+2, J+2 ) + U1 = T( J+1, J ) + U2 = T( J+2, J ) + ELSE + W21 = T( J+1, J+1 ) + W11 = T( J+2, J+1 ) + W22 = T( J+1, J+2 ) + W12 = T( J+2, J+2 ) + U2 = T( J+1, J ) + U1 = T( J+2, J ) + END IF +* +* Swap columns if nec. +* + IF( ABS( W12 ).GT.ABS( W11 ) ) THEN + ILPIVT = .TRUE. + TEMP = W12 + TEMP2 = W22 + W12 = W11 + W22 = W21 + W11 = TEMP + W21 = TEMP2 + END IF +* +* LU-factor +* + TEMP = W21 / W11 + U2 = U2 - TEMP*U1 + W22 = W22 - TEMP*W12 + W21 = ZERO +* +* Compute SCALE +* + SCALE = ONE + IF( ABS( W22 ).LT.SAFMIN ) THEN + SCALE = ZERO + U2 = ONE + U1 = -W12 / W11 + GO TO 250 + END IF + IF( ABS( W22 ).LT.ABS( U2 ) ) + $ SCALE = ABS( W22 / U2 ) + IF( ABS( W11 ).LT.ABS( U1 ) ) + $ SCALE = MIN( SCALE, ABS( W11 / U1 ) ) +* +* Solve +* + U2 = ( SCALE*U2 ) / W22 + U1 = ( SCALE*U1-W12*U2 ) / W11 +* + 250 CONTINUE + IF( ILPIVT ) THEN + TEMP = U2 + U2 = U1 + U1 = TEMP + END IF +* +* Compute Householder Vector +* + T1 = SQRT( SCALE**2+U1**2+U2**2 ) + TAU = ONE + SCALE / T1 + VS = -ONE / ( SCALE+T1 ) + V( 1 ) = ONE + V( 2 ) = VS*U1 + V( 3 ) = VS*U2 +* +* Apply transformations from the right. +* + DO 260 JR = IFRSTM, MIN( J+3, ILAST ) + TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) ) + H( JR, J ) = H( JR, J ) - TEMP + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + 260 CONTINUE + DO 270 JR = IFRSTM, J + 2 + TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) ) + T( JR, J ) = T( JR, J ) - TEMP + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + 270 CONTINUE + IF( ILZ ) THEN + DO 280 JR = 1, N + TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) ) + Z( JR, J ) = Z( JR, J ) - TEMP + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + 280 CONTINUE + END IF + T( J+1, J ) = ZERO + T( J+2, J ) = ZERO + 290 CONTINUE +* +* Last elements: Use Givens rotations +* +* Rotations from the left +* + J = ILAST - 1 + TEMP = H( J, J-1 ) + CALL DLARTG( TEMP, H( J+1, J-1 ), C, S, H( J, J-1 ) ) + H( J+1, J-1 ) = ZERO +* + DO 300 JC = J, ILASTM + TEMP = C*H( J, JC ) + S*H( J+1, JC ) + H( J+1, JC ) = -S*H( J, JC ) + C*H( J+1, JC ) + H( J, JC ) = TEMP + TEMP2 = C*T( J, JC ) + S*T( J+1, JC ) + T( J+1, JC ) = -S*T( J, JC ) + C*T( J+1, JC ) + T( J, JC ) = TEMP2 + 300 CONTINUE + IF( ILQ ) THEN + DO 310 JR = 1, N + TEMP = C*Q( JR, J ) + S*Q( JR, J+1 ) + Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 ) + Q( JR, J ) = TEMP + 310 CONTINUE + END IF +* +* Rotations from the right. +* + TEMP = T( J+1, J+1 ) + CALL DLARTG( TEMP, T( J+1, J ), C, S, T( J+1, J+1 ) ) + T( J+1, J ) = ZERO +* + DO 320 JR = IFRSTM, ILAST + TEMP = C*H( JR, J+1 ) + S*H( JR, J ) + H( JR, J ) = -S*H( JR, J+1 ) + C*H( JR, J ) + H( JR, J+1 ) = TEMP + 320 CONTINUE + DO 330 JR = IFRSTM, ILAST - 1 + TEMP = C*T( JR, J+1 ) + S*T( JR, J ) + T( JR, J ) = -S*T( JR, J+1 ) + C*T( JR, J ) + T( JR, J+1 ) = TEMP + 330 CONTINUE + IF( ILZ ) THEN + DO 340 JR = 1, N + TEMP = C*Z( JR, J+1 ) + S*Z( JR, J ) + Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J ) + Z( JR, J+1 ) = TEMP + 340 CONTINUE + END IF +* +* End of Double-Shift code +* + END IF +* + GO TO 350 +* +* End of iteration loop +* + 350 CONTINUE + 360 CONTINUE +* +* Drop-through = non-convergence +* + INFO = ILAST + GO TO 420 +* +* Successful completion of all QZ steps +* + 380 CONTINUE +* +* Set Eigenvalues 1:ILO-1 +* + DO 410 J = 1, ILO - 1 + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 390 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 390 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 400 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 400 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 410 CONTINUE +* +* Normal Termination +* + INFO = 0 +* +* Exit (other than argument error) -- return optimal workspace size +* + 420 CONTINUE + WORK( 1 ) = DBLE( N ) + RETURN +* +* End of DHGEQZ +* + END diff --git a/dspl/liblapack/SRC/dhsein.f b/dspl/liblapack/SRC/dhsein.f new file mode 100644 index 0000000..e71cdc8 --- /dev/null +++ b/dspl/liblapack/SRC/dhsein.f @@ -0,0 +1,530 @@ +*> \brief \b DHSEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DHSEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, +* VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, +* IFAILR, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EIGSRC, INITV, SIDE +* INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IFAILL( * ), IFAILR( * ) +* DOUBLE PRECISION H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DHSEIN uses inverse iteration to find specified right and/or left +*> eigenvectors of a real upper Hessenberg matrix H. +*> +*> The right eigenvector x and the left eigenvector y of the matrix H +*> corresponding to an eigenvalue w are defined by: +*> +*> H * x = w * x, y**h * H = w * y**h +*> +*> where y**h denotes the conjugate transpose of the vector y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] EIGSRC +*> \verbatim +*> EIGSRC is CHARACTER*1 +*> Specifies the source of eigenvalues supplied in (WR,WI): +*> = 'Q': the eigenvalues were found using DHSEQR; thus, if +*> H has zero subdiagonal elements, and so is +*> block-triangular, then the j-th eigenvalue can be +*> assumed to be an eigenvalue of the block containing +*> the j-th row/column. This property allows DHSEIN to +*> perform inverse iteration on just one diagonal block. +*> = 'N': no assumptions are made on the correspondence +*> between eigenvalues and diagonal blocks. In this +*> case, DHSEIN must always perform inverse iteration +*> using the whole matrix H. +*> \endverbatim +*> +*> \param[in] INITV +*> \verbatim +*> INITV is CHARACTER*1 +*> = 'N': no initial vectors are supplied; +*> = 'U': user-supplied initial vectors are stored in the arrays +*> VL and/or VR. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> Specifies the eigenvectors to be computed. To select the +*> real eigenvector corresponding to a real eigenvalue WR(j), +*> SELECT(j) must be set to .TRUE.. To select the complex +*> eigenvector corresponding to a complex eigenvalue +*> (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is +*> .FALSE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> If a NaN is detected in H, the routine will return with INFO=-6. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> +*> On entry, the real and imaginary parts of the eigenvalues of +*> H; a complex conjugate pair of eigenvalues must be stored in +*> consecutive elements of WR and WI. +*> On exit, WR may have been altered since close eigenvalues +*> are perturbed slightly in searching for independent +*> eigenvectors. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,MM) +*> On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +*> contain starting vectors for the inverse iteration for the +*> left eigenvectors; the starting vector for each eigenvector +*> must be in the same column(s) in which the eigenvector will +*> be stored. +*> On exit, if SIDE = 'L' or 'B', the left eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VL, in the same order as their eigenvalues. A +*> complex eigenvector corresponding to a complex eigenvalue is +*> stored in two consecutive columns, the first holding the real +*> part and the second the imaginary part. +*> If SIDE = 'R', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,MM) +*> On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +*> contain starting vectors for the inverse iteration for the +*> right eigenvectors; the starting vector for each eigenvector +*> must be in the same column(s) in which the eigenvector will +*> be stored. +*> On exit, if SIDE = 'R' or 'B', the right eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VR, in the same order as their eigenvalues. A +*> complex eigenvector corresponding to a complex eigenvalue is +*> stored in two consecutive columns, the first holding the real +*> part and the second the imaginary part. +*> If SIDE = 'L', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR required to +*> store the eigenvectors; each selected real eigenvector +*> occupies one column and each selected complex eigenvector +*> occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ((N+2)*N) +*> \endverbatim +*> +*> \param[out] IFAILL +*> \verbatim +*> IFAILL is INTEGER array, dimension (MM) +*> If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +*> eigenvector in the i-th column of VL (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +*> eigenvector converged satisfactorily. If the i-th and (i+1)th +*> columns of VL hold a complex eigenvector, then IFAILL(i) and +*> IFAILL(i+1) are set to the same value. +*> If SIDE = 'R', IFAILL is not referenced. +*> \endverbatim +*> +*> \param[out] IFAILR +*> \verbatim +*> IFAILR is INTEGER array, dimension (MM) +*> If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +*> eigenvector in the i-th column of VR (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +*> eigenvector converged satisfactorily. If the i-th and (i+1)th +*> columns of VR hold a complex eigenvector, then IFAILR(i) and +*> IFAILR(i+1) are set to the same value. +*> If SIDE = 'L', IFAILR is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, i is the number of eigenvectors which +*> failed to converge; see IFAILL and IFAILR for further +*> details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x|+|y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, + $ VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + DOUBLE PRECISION H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK + DOUBLE PRECISION BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI, + $ WKR +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH, DLANHS + EXTERNAL LSAME, DLAMCH, DLANHS, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors, and standardize the array SELECT. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( K ) = .FALSE. + ELSE + IF( WI( K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN + SELECT( K ) = .TRUE. + M = M + 2 + END IF + END IF + END IF + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( MM.LT.M ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = DLAMCH( 'Safe minimum' ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* + LDWORK = N + 1 +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KSR = 1 +* + DO 120 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = DLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK ) + IF( DISNAN( HNORM ) ) THEN + INFO = -6 + RETURN + ELSE IF( HNORM.GT.ZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WKR = WR( K ) + WKI = WI( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+ + $ ABS( WI( I )-WKI ).LT.EPS3 ) THEN + WKR = WKR + EPS3 + GO TO 60 + END IF + 70 CONTINUE + WR( K ) = WKR +* + PAIR = WKI.NE.ZERO + IF( PAIR ) THEN + KSI = KSR + 1 + ELSE + KSI = KSR + END IF + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL DLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WKR, WKI, VL( KL, KSR ), VL( KL, KSI ), + $ WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM, + $ BIGNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILL( KSR ) = K + IFAILL( KSI ) = K + ELSE + IFAILL( KSR ) = 0 + IFAILL( KSI ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KSR ) = ZERO + 80 CONTINUE + IF( PAIR ) THEN + DO 90 I = 1, KL - 1 + VL( I, KSI ) = ZERO + 90 CONTINUE + END IF + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL DLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI, + $ VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK, + $ WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM, + $ IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILR( KSR ) = K + IFAILR( KSI ) = K + ELSE + IFAILR( KSR ) = 0 + IFAILR( KSI ) = 0 + END IF + DO 100 I = KR + 1, N + VR( I, KSR ) = ZERO + 100 CONTINUE + IF( PAIR ) THEN + DO 110 I = KR + 1, N + VR( I, KSI ) = ZERO + 110 CONTINUE + END IF + END IF +* + IF( PAIR ) THEN + KSR = KSR + 2 + ELSE + KSR = KSR + 1 + END IF + END IF + 120 CONTINUE +* + RETURN +* +* End of DHSEIN +* + END diff --git a/dspl/liblapack/SRC/dhseqr.f b/dspl/liblapack/SRC/dhseqr.f new file mode 100644 index 0000000..4444b95 --- /dev/null +++ b/dspl/liblapack/SRC/dhseqr.f @@ -0,0 +1,516 @@ +*> \brief \b DHSEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DHSEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, +* LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DHSEQR computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': compute eigenvalues only; +*> = 'S': compute eigenvalues and the Schur form T. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': no Schur vectors are computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of Schur vectors of H is returned; +*> = 'V': Z must contain an orthogonal matrix Q on entry, and +*> the product Q*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to DGEBAL, and then passed to ZGEHRD +*> when the matrix output by DGEBAL is reduced to Hessenberg +*> form. Otherwise ILO and IHI should be set to 1 and N +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and JOB = 'S', then H contains the +*> upper quasi-triangular matrix T from the Schur decomposition +*> (the Schur form); 2-by-2 diagonal blocks (corresponding to +*> complex conjugate pairs of eigenvalues) are returned in +*> standard form, with H(i,i) = H(i+1,i+1) and +*> H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the +*> contents of H are unspecified on exit. (The output value of +*> H when INFO.GT.0 is given under the description of INFO +*> below.) +*> +*> Unlike earlier versions of DHSEQR, this subroutine may +*> explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +*> or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues. If two eigenvalues are computed as a complex +*> conjugate pair, they are stored in consecutive elements of +*> WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and +*> WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in +*> the same order as on the diagonal of the Schur form returned +*> in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 +*> diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> If COMPZ = 'N', Z is not referenced. +*> If COMPZ = 'I', on entry Z need not be set and on exit, +*> if INFO = 0, Z contains the orthogonal matrix Z of the Schur +*> vectors of H. If COMPZ = 'V', on entry Z must contain an +*> N-by-N matrix Q, which is assumed to be equal to the unit +*> matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +*> if INFO = 0, Z contains Q*Z. +*> Normally Q is the orthogonal matrix generated by DORGHR +*> after the call to DGEHRD which formed the Hessenberg matrix +*> H. (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if COMPZ = 'I' or +*> COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient and delivers very good and sometimes +*> optimal performance. However, LWORK as large as 11*N +*> may be required for optimal performance. A workspace +*> query is recommended to determine the optimal workspace +*> size. +*> +*> If LWORK = -1, then DHSEQR does a workspace query. +*> In this case, DHSEQR checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .LT. 0: if INFO = -i, the i-th argument had an illegal +*> value +*> .GT. 0: if INFO = i, DHSEQR failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and JOB = 'E', then on exit, the +*> remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and JOB = 'S', then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is an orthogonal matrix. The final +*> value of H is upper Hessenberg and quasi-triangular +*> in rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and COMPZ = 'V', then on exit +*> +*> (final value of Z) = (initial value of Z)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'I', then on exit +*> (final value of Z) = U +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'N', then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Default values supplied by +*> ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +*> It is suggested that these defaults be adjusted in order +*> to attain best performance in each particular +*> computational environment. +*> +*> ISPEC=12: The DLAHQR vs DLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> ISPEC=13: Recommended deflation window size. +*> This depends on ILO, IHI and NS. NS is the +*> number of simultaneous shifts returned +*> by ILAENV(ISPEC=15). (See ISPEC=15 below.) +*> The default for (IHI-ILO+1).LE.500 is NS. +*> The default for (IHI-ILO+1).GT.500 is 3*NS/2. +*> +*> ISPEC=14: Nibble crossover point. (See IPARMQ for +*> details.) Default: 14% of deflation window +*> size. +*> +*> ISPEC=15: Number of simultaneous shifts in a multishift +*> QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 1 30 NS = 2(+) +*> 30 60 NS = 4(+) +*> 60 150 NS = 10(+) +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default some or all matrices of this order +*> are passed to the implicit double shift routine +*> DLAHQR and this parameter is ignored. See +*> ISPEC=12 above and comments in IPARMQ for +*> details. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function of N increasing from 10 to 64. +*> +*> ISPEC=16: Select structured matrix multiply. +*> If the number of simultaneous shifts (specified +*> by ISPEC=15) is less than 14, then the default +*> for ISPEC=16 is 0. Otherwise the default for +*> ISPEC=16 is 2. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ===================================================================== + SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, + $ LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . DLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare DLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== + INTEGER NL + PARAMETER ( NL = 49 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Arrays .. + DOUBLE PRECISION HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER I, KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLAHQR, DLAQR0, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = DBLE( MAX( 1, N ) ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'DHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = MAX( DBLE( MAX( 1, N ) ), WORK( 1 ) ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by DGEBAL ==== +* + DO 10 I = 1, ILO - 1 + WR( I ) = H( I, I ) + WI( I ) = ZERO + 10 CONTINUE + DO 20 I = IHI + 1, N + WR( I ) = H( I, I ) + WI( I ) = ZERO + 20 CONTINUE +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL DLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== DLAHQR/DLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'DHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, + $ ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== DLAQR0 for big matrices; DLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare DLAHQR failure! DLAQR0 sometimes succeeds +* . when DLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call DLAQR0 directly. ==== +* + CALL DLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, WR, + $ WI, ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from DLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling DLAQR0. ==== +* + CALL DLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL DLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL DLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, WR, + $ WI, ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL DLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL DLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = MAX( DBLE( MAX( 1, N ) ), WORK( 1 ) ) + END IF +* +* ==== End of DHSEQR ==== +* + END diff --git a/dspl/liblapack/SRC/disnan.f b/dspl/liblapack/SRC/disnan.f new file mode 100644 index 0000000..a565ed3 --- /dev/null +++ b/dspl/liblapack/SRC/disnan.f @@ -0,0 +1,80 @@ +*> \brief \b DISNAN tests input for NaN. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DISNAN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* LOGICAL FUNCTION DISNAN( DIN ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION, INTENT(IN) :: DIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DISNAN returns .TRUE. if its argument is NaN, and .FALSE. +*> otherwise. To be replaced by the Fortran 2003 intrinsic in the +*> future. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIN +*> \verbatim +*> DIN is DOUBLE PRECISION +*> Input to test for NaN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION DISNAN( DIN ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION, INTENT(IN) :: DIN +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL DLAISNAN + EXTERNAL DLAISNAN +* .. +* .. Executable Statements .. + DISNAN = DLAISNAN(DIN,DIN) + RETURN + END diff --git a/dspl/liblapack/SRC/dla_gbamv.f b/dspl/liblapack/SRC/dla_gbamv.f new file mode 100644 index 0000000..350284c --- /dev/null +++ b/dspl/liblapack/SRC/dla_gbamv.f @@ -0,0 +1,411 @@ +*> \brief \b DLA_GBAMV performs a matrix-vector operation to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GBAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, +* INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_GBAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension ( LDAB, n ) +*> Before entry, the leading m by n part of the array AB must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> On entry, LDA specifies the first dimension of AB as declared +*> in the calling (sub) program. LDAB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, + $ INCX, BETA, Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY, KD, KE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN + INFO = 4 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = 5 + ELSE IF( LDAB.LT.KL+KU+1 )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DLA_GBAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + KD = KU + 1 + KE = KL + 1 + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of DLA_GBAMV +* + END diff --git a/dspl/liblapack/SRC/dla_gbrcond.f b/dspl/liblapack/SRC/dla_gbrcond.f new file mode 100644 index 0000000..e9713c9 --- /dev/null +++ b/dspl/liblapack/SRC/dla_gbrcond.f @@ -0,0 +1,353 @@ +*> \brief \b DLA_GBRCOND estimates the Skeel condition number for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GBRCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, +* AFB, LDAFB, IPIV, CMODE, C, +* INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDAB, LDAFB, INFO, KL, KU, CMODE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ), IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), +* $ C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_GBRCOND Estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by DGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (5*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, + $ AFB, LDAFB, IPIV, CMODE, C, + $ INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDAB, LDAFB, INFO, KL, KU, CMODE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), + $ C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J, KD, KE + DOUBLE PRECISION AINVNM, TMP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + DLA_GBRCOND = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') + $ .AND. .NOT. LSAME(TRANS, 'C') ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLA_GBRCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + DLA_GBRCOND = 1.0D+0 + RETURN + END IF +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + KD = KU + 1 + KE = KL + 1 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 + + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF ( NOTRANS ) THEN + CALL DGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL DGBTRS( 'Transpose', N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( NOTRANS ) THEN + CALL DGBTRS( 'Transpose', N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + ELSE + CALL DGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ DLA_GBRCOND = ( 1.0D+0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/dla_gbrfsx_extended.f b/dspl/liblapack/SRC/dla_gbrfsx_extended.f new file mode 100644 index 0000000..12b2a32 --- /dev/null +++ b/dspl/liblapack/SRC/dla_gbrfsx_extended.f @@ -0,0 +1,707 @@ +*> \brief \b DLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general banded matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GBRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, +* NRHS, AB, LDAB, AFB, LDAFB, IPIV, +* COLEQU, C, B, LDB, Y, LDY, +* BERR_OUT, N_NORMS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, +* $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) +* DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT(*), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_GBRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by DGBRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0 +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the N-by-N matrix AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDBA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by DGBTRF. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AF. LDAFB >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by DGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by DGBTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by DLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to DGBTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ COLEQU, C, B, LDB, Y, LDY, + $ BERR_OUT, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, + $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) + DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT(*), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, M, X_STATE, Z_STATE, Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGBTRS, DGBMV, BLAS_DGBMV_X, + $ BLAS_DGBMV2_X, DLA_GBAMV, DLA_WWADDW, DLAMCH, + $ CHLA_TRANSTYPE, DLA_LIN_BERR + DOUBLE PRECISION DLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS + M = KL+KU+1 + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL DGBMV( TRANS, M, N, KL, KU, -1.0D+0, AB, LDAB, + $ Y( 1, J ), 1, 1.0D+0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_DGBMV_X( TRANS_TYPE, N, N, KL, KU, + $ -1.0D+0, AB, LDAB, Y( 1, J ), 1, 1.0D+0, RES, 1, + $ PREC_TYPE ) + ELSE + CALL BLAS_DGBMV2_X( TRANS_TYPE, N, N, KL, KU, -1.0D+0, + $ AB, LDAB, Y( 1, J ), Y_TAIL, 1, 1.0D+0, RES, 1, + $ PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL DCOPY( N, RES, 1, DY, 1 ) + CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, DY, N, + $ INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( .NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL DAXPY( N, 1.0D+0, DY, 1, Y(1,J), 1 ) + ELSE + CALL DLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL DGBMV(TRANS, N, N, KL, KU, -1.0D+0, AB, LDAB, Y(1,J), + $ 1, 1.0D+0, RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL DLA_GBAMV( TRANS_TYPE, N, N, KL, KU, 1.0D+0, + $ AB, LDAB, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL DLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/dla_gbrpvgrw.f b/dspl/liblapack/SRC/dla_gbrpvgrw.f new file mode 100644 index 0000000..3d566c2 --- /dev/null +++ b/dspl/liblapack/SRC/dla_gbrpvgrw.f @@ -0,0 +1,160 @@ +*> \brief \b DLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GBRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_GBRPVGRW( N, KL, KU, NCOLS, AB, +* LDAB, AFB, LDAFB ) +* +* .. Scalar Arguments .. +* INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_GBRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_GBRPVGRW( N, KL, KU, NCOLS, AB, + $ LDAB, AFB, LDAFB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION AMAX, UMAX, RPVGRW +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0D+0 + + KD = KU + 1 + DO J = 1, NCOLS + AMAX = 0.0D+0 + UMAX = 0.0D+0 + DO I = MAX( J-KU, 1 ), MIN( J+KL, N ) + AMAX = MAX( ABS( AB( KD+I-J, J)), AMAX ) + END DO + DO I = MAX( J-KU, 1 ), J + UMAX = MAX( ABS( AFB( KD+I-J, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + DLA_GBRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/dla_geamv.f b/dspl/liblapack/SRC/dla_geamv.f new file mode 100644 index 0000000..0a83e4b --- /dev/null +++ b/dspl/liblapack/SRC/dla_geamv.f @@ -0,0 +1,396 @@ +*> \brief \b DLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, +* Y, INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDA, M, N, TRANS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_GEAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( LDA, n ) +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, +*> dimension at least +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, + $ Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N, TRANS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' )) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DLA_GEAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, LENX + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, LENX + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = 1, LENX + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = 1, LENX + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of DLA_GEAMV +* + END diff --git a/dspl/liblapack/SRC/dla_gercond.f b/dspl/liblapack/SRC/dla_gercond.f new file mode 100644 index 0000000..aa93ca5 --- /dev/null +++ b/dspl/liblapack/SRC/dla_gercond.f @@ -0,0 +1,329 @@ +*> \brief \b DLA_GERCOND estimates the Skeel condition number for a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GERCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_GERCOND ( TRANS, N, A, LDA, AF, +* LDAF, IPIV, CMODE, C, +* INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), +* $ C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_GERCOND ( TRANS, N, A, LDA, AF, + $ LDAF, IPIV, CMODE, C, + $ INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), + $ C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, TMP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + DLA_GERCOND = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') + $ .AND. .NOT. LSAME(TRANS, 'C') ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLA_GERCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + DLA_GERCOND = 1.0D+0 + RETURN + END IF +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF (NOTRANS) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, N + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 + + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK(I) = WORK(I) * WORK(2*N+I) + END DO + + IF (NOTRANS) THEN + CALL DGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL DGETRS( 'Transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF (NOTRANS) THEN + CALL DGETRS( 'Transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL DGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ DLA_GERCOND = ( 1.0D+0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/dla_gerfsx_extended.f b/dspl/liblapack/SRC/dla_gerfsx_extended.f new file mode 100644 index 0000000..082f810 --- /dev/null +++ b/dspl/liblapack/SRC/dla_gerfsx_extended.f @@ -0,0 +1,685 @@ +*> \brief \b DLA_GERFSX_EXTENDED improves the computed solution to a system of linear equations for general matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, +* LDA, AF, LDAF, IPIV, COLEQU, C, B, +* LDB, Y, LDY, BERR_OUT, N_NORMS, +* ERRS_N, ERRS_C, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_GERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by DGERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERRS_N +*> and ERRS_C for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERRS_N and ERRS_C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by DLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERRS_N +*> and ERRS_C). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERRS_N +*> \verbatim +*> ERRS_N is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERRS_N(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_N(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERRS_C +*> \verbatim +*> ERRS_C is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERRS_C is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERRS_C(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_C(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERRS_N and ERRS_C may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to DGETRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, + $ LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, Y, LDY, BERR_OUT, N_NORMS, + $ ERRS_N, ERRS_C, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, X_STATE, Z_STATE, Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGETRS, DGEMV, BLAS_DGEMV_X, + $ BLAS_DGEMV2_X, DLA_GEAMV, DLA_WWADDW, DLAMCH, + $ CHLA_TRANSTYPE, DLA_LIN_BERR + DOUBLE PRECISION DLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF ( INFO.NE.0 ) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS +* + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL DGEMV( TRANS, N, N, -1.0D+0, A, LDA, Y( 1, J ), 1, + $ 1.0D+0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_DGEMV_X( TRANS_TYPE, N, N, -1.0D+0, A, LDA, + $ Y( 1, J ), 1, 1.0D+0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_DGEMV2_X( TRANS_TYPE, N, N, -1.0D+0, A, LDA, + $ Y( 1, J ), Y_TAIL, 1, 1.0D+0, RES, 1, PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL DCOPY( N, RES, 1, DY, 1 ) + CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL +* + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL DAXPY( N, 1.0D+0, DY, 1, Y( 1, J ), 1 ) + ELSE + CALL DLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds +* + IF (N_NORMS .GE. 1) THEN + ERRS_N( J, LA_LINRX_ERR_I ) = FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERRS_C( J, LA_LINRX_ERR_I ) = FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL DGEMV( TRANS, N, N, -1.0D+0, A, LDA, Y(1,J), 1, 1.0D+0, + $ RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL DLA_GEAMV ( TRANS_TYPE, N, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL DLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/dla_gerpvgrw.f b/dspl/liblapack/SRC/dla_gerpvgrw.f new file mode 100644 index 0000000..88cc7be --- /dev/null +++ b/dspl/liblapack/SRC/dla_gerpvgrw.f @@ -0,0 +1,142 @@ +*> \brief \b DLA_GERPVGRW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_GERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_GERPVGRW( N, NCOLS, A, LDA, AF, +* LDAF ) +* +* .. Scalar Arguments .. +* INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_GERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_GERPVGRW( N, NCOLS, A, LDA, AF, + $ LDAF ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION AMAX, UMAX, RPVGRW +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0D+0 + + DO J = 1, NCOLS + AMAX = 0.0D+0 + UMAX = 0.0D+0 + DO I = 1, N + AMAX = MAX( ABS( A( I, J ) ), AMAX ) + END DO + DO I = 1, J + UMAX = MAX( ABS( AF( I, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + DLA_GERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/dla_lin_berr.f b/dspl/liblapack/SRC/dla_lin_berr.f new file mode 100644 index 0000000..0fec698 --- /dev/null +++ b/dspl/liblapack/SRC/dla_lin_berr.f @@ -0,0 +1,153 @@ +*> \brief \b DLA_LIN_BERR computes a component-wise relative backward error. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_LIN_BERR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* .. Scalar Arguments .. +* INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AYB( N, NRHS ), BERR( NRHS ) +* DOUBLE PRECISION RES( N, NRHS ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_LIN_BERR computes component-wise relative backward error from +*> the formula +*> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the component-wise absolute value of the matrix +*> or vector Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NZ +*> \verbatim +*> NZ is INTEGER +*> We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to +*> guard against spuriously zero residuals. Default value is N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices AYB, RES, and BERR. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is DOUBLE PRECISION array, dimension (N,NRHS) +*> The residual matrix, i.e., the matrix R in the relative backward +*> error formula above. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N, NRHS) +*> The denominator in the relative backward error formula above, i.e., +*> the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B +*> are from iterative refinement (see dla_gerfsx_extended.f). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The component-wise relative backward error from the formula above. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AYB( N, NRHS ), BERR( NRHS ) + DOUBLE PRECISION RES( N, NRHS ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION TMP + INTEGER I, J +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + EXTERNAL DLAMCH + DOUBLE PRECISION DLAMCH + DOUBLE PRECISION SAFE1 +* .. +* .. Executable Statements .. +* +* Adding SAFE1 to the numerator guards against spuriously zero +* residuals. A similar safeguard is in the SLA_yyAMV routine used +* to compute AYB. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (NZ+1)*SAFE1 + + DO J = 1, NRHS + BERR(J) = 0.0D+0 + DO I = 1, N + IF (AYB(I,J) .NE. 0.0D+0) THEN + TMP = (SAFE1+ABS(RES(I,J)))/AYB(I,J) + BERR(J) = MAX( BERR(J), TMP ) + END IF +* +* If AYB is exactly 0.0 (and if computed by SLA_yyAMV), then we know +* the true residual also must be exactly 0.0. +* + END DO + END DO + END diff --git a/dspl/liblapack/SRC/dla_porcond.f b/dspl/liblapack/SRC/dla_porcond.f new file mode 100644 index 0000000..498e707 --- /dev/null +++ b/dspl/liblapack/SRC/dla_porcond.f @@ -0,0 +1,328 @@ +*> \brief \b DLA_PORCOND estimates the Skeel condition number for a symmetric positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_PORCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, +* CMODE, C, INFO, WORK, +* IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO, CMODE +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), +* $ C( * ) +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_PORCOND Estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, + $ CMODE, C, INFO, WORK, + $ IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO, CMODE + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), + $ C( * ) +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, TMP + LOGICAL UP +* .. +* .. Array Arguments .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + DLA_PORCOND = 0.0D+0 +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLA_PORCOND', -INFO ) + RETURN + END IF + + IF( N.EQ.0 ) THEN + DLA_PORCOND = 1.0D+0 + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( J ,I ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ENDIF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 + + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF (UP) THEN + CALL DPOTRS( 'Upper', N, 1, AF, LDAF, WORK, N, INFO ) + ELSE + CALL DPOTRS( 'Lower', N, 1, AF, LDAF, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( UP ) THEN + CALL DPOTRS( 'Upper', N, 1, AF, LDAF, WORK, N, INFO ) + ELSE + CALL DPOTRS( 'Lower', N, 1, AF, LDAF, WORK, N, INFO ) + ENDIF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ DLA_PORCOND = ( 1.0D+0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/dla_porfsx_extended.f b/dspl/liblapack/SRC/dla_porfsx_extended.f new file mode 100644 index 0000000..8c0d6be --- /dev/null +++ b/dspl/liblapack/SRC/dla_porfsx_extended.f @@ -0,0 +1,679 @@ +*> \brief \b DLA_PORFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric or Hermitian positive-definite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_PORFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, COLEQU, C, B, LDB, Y, +* LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_PORFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by DPORFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by DPOTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by DLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to DPOTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, COLEQU, C, B, LDB, Y, + $ LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, Y_PREC_STATE, BASE_RESIDUAL, + $ EXTRA_RESIDUAL, EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DPOTRS, DSYMV, BLAS_DSYMV_X, + $ BLAS_DSYMV2_X, DLA_SYAMV, DLA_WWADDW, + $ DLA_LIN_BERR + DOUBLE PRECISION DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL DSYMV( UPLO, N, -1.0D+0, A, LDA, Y(1,J), 1, + $ 1.0D+0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_DSYMV_X( UPLO2, N, -1.0D+0, A, LDA, + $ Y( 1, J ), 1, 1.0D+0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_DSYMV2_X(UPLO2, N, -1.0D+0, A, LDA, + $ Y(1, J), Y_TAIL, 1, 1.0D+0, RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL DCOPY( N, RES, 1, DY, 1 ) + CALL DPOTRS( UPLO, N, 1, AF, LDAF, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL DAXPY( N, 1.0D+0, DY, 1, Y(1,J), 1 ) + ELSE + CALL DLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL DSYMV( UPLO, N, -1.0D+0, A, LDA, Y(1,J), 1, 1.0D+0, RES, + $ 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL DLA_SYAMV( UPLO2, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL DLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/dla_porpvgrw.f b/dspl/liblapack/SRC/dla_porpvgrw.f new file mode 100644 index 0000000..4fe1a19 --- /dev/null +++ b/dspl/liblapack/SRC/dla_porpvgrw.f @@ -0,0 +1,210 @@ +*> \brief \b DLA_PORPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric or Hermitian positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_PORPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, +* LDAF, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_PORPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, + $ LDAF, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION AMAX, UMAX, RPVGRW + LOGICAL UPPER +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) +* +* DPOTRF will have factored only the NCOLSxNCOLS leading minor, so +* we restrict the growth search to that minor and use only the first +* 2*NCOLS workspace entries. +* + RPVGRW = 1.0D+0 + DO I = 1, 2*NCOLS + WORK( I ) = 0.0D+0 + END DO +* +* Find the max magnitude entry of each column. +* + IF ( UPPER ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( NCOLS+J ) = + $ MAX( ABS( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( NCOLS+J ) = + $ MAX( ABS( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of the factor in +* AF. No pivoting, so no permutations. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( J ) = MAX( ABS( AF( I, J ) ), WORK( J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( J ) = MAX( ABS( AF( I, J ) ), WORK( J ) ) + END DO + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + DLA_PORPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/dla_syamv.f b/dspl/liblapack/SRC/dla_syamv.f new file mode 100644 index 0000000..29566a6 --- /dev/null +++ b/dspl/liblapack/SRC/dla_syamv.f @@ -0,0 +1,417 @@ +*> \brief \b DLA_SYAMV computes a matrix-vector product using a symmetric indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_SYAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of DLA_SYAMV +* + END diff --git a/dspl/liblapack/SRC/dla_syrcond.f b/dspl/liblapack/SRC/dla_syrcond.f new file mode 100644 index 0000000..91d5571 --- /dev/null +++ b/dspl/liblapack/SRC/dla_syrcond.f @@ -0,0 +1,341 @@ +*> \brief \b DLA_SYRCOND estimates the Skeel condition number for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_SYRCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, +* IPIV, CMODE, C, INFO, WORK, +* IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments +* INTEGER IWORK( * ), IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_SYRCOND estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, + $ IPIV, CMODE, C, INFO, WORK, + $ IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments + INTEGER IWORK( * ), IPIV( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ), C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER NORMIN + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, SMLNUM, TMP + LOGICAL UP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, XERBLA, DSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + DLA_SYRCOND = 0.0D+0 +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLA_SYRCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + DLA_SYRCOND = 1.0D+0 + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( I, J) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ENDIF +* +* Estimate the norm of inv(op(A)). +* + SMLNUM = DLAMCH( 'Safe minimum' ) + AINVNM = 0.0D+0 + NORMIN = 'N' + + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF ( UP ) THEN + CALL DSYTRS( 'U', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ELSE + CALL DSYTRS( 'L', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( UP ) THEN + CALL DSYTRS( 'U', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ELSE + CALL DSYTRS( 'L', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF +* + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ DLA_SYRCOND = ( 1.0D+0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/dla_syrfsx_extended.f b/dspl/liblapack/SRC/dla_syrfsx_extended.f new file mode 100644 index 0000000..f54d151 --- /dev/null +++ b/dspl/liblapack/SRC/dla_syrfsx_extended.f @@ -0,0 +1,708 @@ +*> \brief \b DLA_SYRFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_SYRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_SYRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by DSYRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by DSYTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by DLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is DOUBLE PRECISION array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to DLA_SYRFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, Y_PREC_STATE, BASE_RESIDUAL, + $ EXTRA_RESIDUAL, EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DSYTRS, DSYMV, BLAS_DSYMV_X, + $ BLAS_DSYMV2_X, DLA_SYAMV, DLA_WWADDW, + $ DLA_LIN_BERR + DOUBLE PRECISION DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLA_SYRFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N )*EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + IF (Y_PREC_STATE .EQ. BASE_RESIDUAL) THEN + CALL DSYMV( UPLO, N, -1.0D+0, A, LDA, Y(1,J), 1, + $ 1.0D+0, RES, 1 ) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_DSYMV_X( UPLO2, N, -1.0D+0, A, LDA, + $ Y( 1, J ), 1, 1.0D+0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_DSYMV2_X(UPLO2, N, -1.0D+0, A, LDA, + $ Y(1, J), Y_TAIL, 1, 1.0D+0, RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL DCOPY( N, RES, 1, DY, 1 ) + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL DAXPY( N, 1.0D+0, DY, 1, Y(1,J), 1 ) + ELSE + CALL DLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). + CALL DCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL DSYMV( UPLO, N, -1.0D+0, A, LDA, Y(1,J), 1, 1.0D+0, RES, + $ 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL DLA_SYAMV( UPLO2, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL DLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/dla_syrpvgrw.f b/dspl/liblapack/SRC/dla_syrpvgrw.f new file mode 100644 index 0000000..c2e5cb0 --- /dev/null +++ b/dspl/liblapack/SRC/dla_syrpvgrw.f @@ -0,0 +1,320 @@ +*> \brief \b DLA_SYRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_SYRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, +* LDAF, IPIV, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> DLA_SYRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from DSYTRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + DOUBLE PRECISION AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0D+0 + DO I = 1, 2*N + WORK( I ) = 0.0D+0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( ABS( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( ABS( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( ABS( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( ABS( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from dsytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = MAX( ABS( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( ABS( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = MAX( ABS( AF(I, K+1 ) ), WORK( K+1 ) ) + END DO + WORK( K ) = MAX( ABS( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + DLA_SYRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/dla_wwaddw.f b/dspl/liblapack/SRC/dla_wwaddw.f new file mode 100644 index 0000000..99a86c5 --- /dev/null +++ b/dspl/liblapack/SRC/dla_wwaddw.f @@ -0,0 +1,111 @@ +*> \brief \b DLA_WWADDW adds a vector into a doubled-single vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLA_WWADDW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLA_WWADDW( N, X, Y, W ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ), Y( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLA_WWADDW adds a vector W into a doubled-single vector (X, Y). +*> +*> This works for all extant IBM's hex and binary floating point +*> arithmetics, but not for decimal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of vectors X, Y, and W. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> The first part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (N) +*> The second part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The vector to be added. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLA_WWADDW( N, X, Y, W ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ), Y( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION S + INTEGER I +* .. +* .. Executable Statements .. +* + DO 10 I = 1, N + S = X(I) + W(I) + S = (S + S) - S + Y(I) = ((X(I) - S) + W(I)) + Y(I) + X(I) = S + 10 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/dlabad.f b/dspl/liblapack/SRC/dlabad.f new file mode 100644 index 0000000..01b8158 --- /dev/null +++ b/dspl/liblapack/SRC/dlabad.f @@ -0,0 +1,105 @@ +*> \brief \b DLABAD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLABAD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLABAD( SMALL, LARGE ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION LARGE, SMALL +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLABAD takes as input the values computed by DLAMCH for underflow and +*> overflow, and returns the square root of each of these values if the +*> log of LARGE is sufficiently large. This subroutine is intended to +*> identify machines with a large exponent range, such as the Crays, and +*> redefine the underflow and overflow limits to be the square roots of +*> the values computed by DLAMCH. This subroutine is needed because +*> DLAMCH does not compensate for poor arithmetic in the upper half of +*> the exponent range, as is found on a Cray. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] SMALL +*> \verbatim +*> SMALL is DOUBLE PRECISION +*> On entry, the underflow threshold as computed by DLAMCH. +*> On exit, if LOG10(LARGE) is sufficiently large, the square +*> root of SMALL, otherwise unchanged. +*> \endverbatim +*> +*> \param[in,out] LARGE +*> \verbatim +*> LARGE is DOUBLE PRECISION +*> On entry, the overflow threshold as computed by DLAMCH. +*> On exit, if LOG10(LARGE) is sufficiently large, the square +*> root of LARGE, otherwise unchanged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION LARGE, SMALL +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000.D0 ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of DLABAD +* + END diff --git a/dspl/liblapack/SRC/dlabrd.f b/dspl/liblapack/SRC/dlabrd.f new file mode 100644 index 0000000..b5e734d --- /dev/null +++ b/dspl/liblapack/SRC/dlabrd.f @@ -0,0 +1,381 @@ +*> \brief \b DLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLABRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, +* LDY ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLABRD reduces the first NB rows and columns of a real general +*> m by n matrix A to upper or lower bidiagonal form by an orthogonal +*> transformation Q**T * A * P, and returns the matrices X and Y which +*> are needed to apply the transformation to the unreduced part of A. +*> +*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +*> bidiagonal form. +*> +*> This is an auxiliary routine called by DGEBRD +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of leading rows and columns of A to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, the first NB rows and columns of the matrix are +*> overwritten; the rest of the array is unchanged. +*> If m >= n, elements on and below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors; and +*> elements above the diagonal in the first NB rows, with the +*> array TAUP, represent the orthogonal matrix P as a product +*> of elementary reflectors. +*> If m < n, elements below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors, and +*> elements on and above the diagonal in the first NB rows, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (NB) +*> The diagonal elements of the first NB rows and columns of +*> the reduced matrix. D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (NB) +*> The off-diagonal elements of the first NB rows and columns of +*> the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is DOUBLE PRECISION array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is DOUBLE PRECISION array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NB) +*> The m-by-nb matrix X required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,M). +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NB) +*> The n-by-nb matrix Y required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors. +*> +*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The elements of the vectors v and u together form the m-by-nb matrix +*> V and the nb-by-n matrix U**T which are needed, with X and Y, to apply +*> the transformation to the unreduced part of the matrix, using a block +*> update of the form: A := A - V*Y**T - X*U**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with nb = 2: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +*> ( v1 v2 a a a ) ( v1 1 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix which is unchanged, +*> vi denotes an element of the vector defining H(i), and ui an element +*> of the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLARFG, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DLABRD +* + END diff --git a/dspl/liblapack/SRC/dlacn2.f b/dspl/liblapack/SRC/dlacn2.f new file mode 100644 index 0000000..9528540 --- /dev/null +++ b/dspl/liblapack/SRC/dlacn2.f @@ -0,0 +1,294 @@ +*> \brief \b DLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLACN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* DOUBLE PRECISION EST +* .. +* .. Array Arguments .. +* INTEGER ISGN( * ), ISAVE( 3 ) +* DOUBLE PRECISION V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLACN2 estimates the 1-norm of a square, real matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**T * X, if KASE=2, +*> and DLACN2 must be re-called with all the other parameters +*> unchanged. +*> \endverbatim +*> +*> \param[out] ISGN +*> \verbatim +*> ISGN is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is DOUBLE PRECISION +*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +*> unchanged from the previous call to DLACN2. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to DLACN2, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**T * X. +*> On the final return from DLACN2, KASE will again be 0. +*> \endverbatim +*> +*> \param[in,out] ISAVE +*> \verbatim +*> ISAVE is INTEGER array, dimension (3) +*> ISAVE is used to save variables between calls to DLACN2 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Originally named SONEST, dated March 16, 1988. +*> +*> This is a thread safe version of DLACON, which uses the array ISAVE +*> in place of a SAVE statement, as follows: +*> +*> DLACON DLACN2 +*> JUMP ISAVE(1) +*> J ISAVE(2) +*> ITER ISAVE(3) +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE DLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ), ISAVE( 3 ) + DOUBLE PRECISION V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + DOUBLE PRECISION ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM + EXTERNAL IDAMAX, DASUM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, NINT, SIGN +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / DBLE( N ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = DASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = IDAMAX( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = ONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL DCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = IDAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL DCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of DLACN2 +* + END diff --git a/dspl/liblapack/SRC/dlacon.f b/dspl/liblapack/SRC/dlacon.f new file mode 100644 index 0000000..0077f7c --- /dev/null +++ b/dspl/liblapack/SRC/dlacon.f @@ -0,0 +1,275 @@ +*> \brief \b DLACON estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLACON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLACON( N, V, X, ISGN, EST, KASE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* DOUBLE PRECISION EST +* .. +* .. Array Arguments .. +* INTEGER ISGN( * ) +* DOUBLE PRECISION V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLACON estimates the 1-norm of a square, real matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**T * X, if KASE=2, +*> and DLACON must be re-called with all the other parameters +*> unchanged. +*> \endverbatim +*> +*> \param[out] ISGN +*> \verbatim +*> ISGN is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is DOUBLE PRECISION +*> On entry with KASE = 1 or 2 and JUMP = 3, EST should be +*> unchanged from the previous call to DLACON. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to DLACON, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**T * X. +*> On the final return from DLACON, KASE will again be 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester. \n +*> Originally named SONEST, dated March 16, 1988. +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE DLACON( N, V, X, ISGN, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ) + DOUBLE PRECISION V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + DOUBLE PRECISION ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM + EXTERNAL IDAMAX, DASUM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, NINT, SIGN +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / DBLE( N ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = DASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + J = IDAMAX( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( J ) = ONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL DCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = J + J = IDAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL DCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of DLACON +* + END diff --git a/dspl/liblapack/SRC/dlacpy.f b/dspl/liblapack/SRC/dlacpy.f new file mode 100644 index 0000000..d1c3967 --- /dev/null +++ b/dspl/liblapack/SRC/dlacpy.f @@ -0,0 +1,156 @@ +*> \brief \b DLACPY copies all or part of one two-dimensional array to another. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLACPY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLACPY copies all or part of a two-dimensional matrix A to another +*> matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper triangle +*> or trapezoid is accessed; if UPLO = 'L', only the lower +*> triangle or trapezoid is accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of DLACPY +* + END diff --git a/dspl/liblapack/SRC/dladiv.f b/dspl/liblapack/SRC/dladiv.f new file mode 100644 index 0000000..dd8110a --- /dev/null +++ b/dspl/liblapack/SRC/dladiv.f @@ -0,0 +1,256 @@ +*> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLADIV performs complex division in real arithmetic +*> +*> a + i*b +*> p + i*q = --------- +*> c + i*d +*> +*> The algorithm is due to Michael Baudin and Robert L. Smith +*> and can be found in the paper +*> "A Robust Complex Division in Scilab" +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION +*> The scalars a, b, c, and d in the above expression. +*> \endverbatim +*> +*> \param[out] P +*> \verbatim +*> P is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION +*> The scalars p and q in the above expression. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2013 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION BS + PARAMETER ( BS = 2.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLADIV1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + AA = A + BB = B + CC = C + DD = D + AB = MAX( ABS(A), ABS(B) ) + CD = MAX( ABS(C), ABS(D) ) + S = 1.0D0 + + OV = DLAMCH( 'Overflow threshold' ) + UN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Epsilon' ) + BE = BS / (EPS*EPS) + + IF( AB >= HALF*OV ) THEN + AA = HALF * AA + BB = HALF * BB + S = TWO * S + END IF + IF( CD >= HALF*OV ) THEN + CC = HALF * CC + DD = HALF * DD + S = HALF * S + END IF + IF( AB <= UN*BS/EPS ) THEN + AA = AA * BE + BB = BB * BE + S = S / BE + END IF + IF( CD <= UN*BS/EPS ) THEN + CC = CC * BE + DD = DD * BE + S = S * BE + END IF + IF( ABS( D ).LE.ABS( C ) ) THEN + CALL DLADIV1(AA, BB, CC, DD, P, Q) + ELSE + CALL DLADIV1(BB, AA, DD, CC, P, Q) + Q = -Q + END IF + P = P * S + Q = Q * S +* + RETURN +* +* End of DLADIV +* + END + +*> \ingroup doubleOTHERauxiliary + + + SUBROUTINE DLADIV1( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* +* .. Local Scalars .. + DOUBLE PRECISION R, T +* .. +* .. External Functions .. + DOUBLE PRECISION DLADIV2 + EXTERNAL DLADIV2 +* .. +* .. Executable Statements .. +* + R = D / C + T = ONE / (C + D * R) + P = DLADIV2(A, B, C, D, R, T) + A = -A + Q = DLADIV2(B, A, C, D, R, T) +* + RETURN +* +* End of DLADIV1 +* + END + +*> \ingroup doubleOTHERauxiliary + + DOUBLE PRECISION FUNCTION DLADIV2( A, B, C, D, R, T ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, R, T +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* +* .. Local Scalars .. + DOUBLE PRECISION BR +* .. +* .. Executable Statements .. +* + IF( R.NE.ZERO ) THEN + BR = B * R + IF( BR.NE.ZERO ) THEN + DLADIV2 = (A + BR) * T + ELSE + DLADIV2 = A * T + (B * T) * R + END IF + ELSE + DLADIV2 = (A + D * (B / C)) * T + END IF +* + RETURN +* +* End of DLADIV12 +* + END diff --git a/dspl/liblapack/SRC/dlae2.f b/dspl/liblapack/SRC/dlae2.f new file mode 100644 index 0000000..ed77ff6 --- /dev/null +++ b/dspl/liblapack/SRC/dlae2.f @@ -0,0 +1,185 @@ +*> \brief \b DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAE2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, RT1, RT2 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, and RT2 +*> is the eigenvalue of smaller absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> The (1,2) and (2,1) elements of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is DOUBLE PRECISION +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is DOUBLE PRECISION +*> The eigenvalue of smaller absolute value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, RT1, RT2 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of DLAE2 +* + END diff --git a/dspl/liblapack/SRC/dlaebz.f b/dspl/liblapack/SRC/dlaebz.f new file mode 100644 index 0000000..f36a82c --- /dev/null +++ b/dspl/liblapack/SRC/dlaebz.f @@ -0,0 +1,649 @@ +*> \brief \b DLAEBZ computes the number of eigenvalues of a real symmetric tridiagonal matrix which are less than or equal to a given value, and performs other tasks required by the routine sstebz. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEBZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, +* RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, +* NAB, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX +* DOUBLE PRECISION ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) +* DOUBLE PRECISION AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEBZ contains the iteration loops which compute and use the +*> function N(w), which is the count of eigenvalues of a symmetric +*> tridiagonal matrix T less than or equal to its argument w. It +*> performs a choice of two types of loops: +*> +*> IJOB=1, followed by +*> IJOB=2: It takes as input a list of intervals and returns a list of +*> sufficiently small intervals whose union contains the same +*> eigenvalues as the union of the original intervals. +*> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. +*> The output interval (AB(j,1),AB(j,2)] will contain +*> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. +*> +*> IJOB=3: It performs a binary search in each input interval +*> (AB(j,1),AB(j,2)] for a point w(j) such that +*> N(w(j))=NVAL(j), and uses C(j) as the starting point of +*> the search. If such a w(j) is found, then on output +*> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output +*> (AB(j,1),AB(j,2)] will be a small interval containing the +*> point where N(w) jumps through NVAL(j), unless that point +*> lies outside the initial interval. +*> +*> Note that the intervals are in all cases half-open intervals, +*> i.e., of the form (a,b] , which includes b but not a . +*> +*> To avoid underflow, the matrix should be scaled so that its largest +*> element is no greater than overflow**(1/2) * underflow**(1/4) +*> in absolute value. To assure the most accurate computation +*> of small eigenvalues, the matrix should be scaled to be +*> not much smaller than that, either. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966 +*> +*> Note: the arguments are, in general, *not* checked for unreasonable +*> values. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what is to be done: +*> = 1: Compute NAB for the initial intervals. +*> = 2: Perform bisection iteration to find eigenvalues of T. +*> = 3: Perform bisection iteration to invert N(w), i.e., +*> to find a point which has a specified number of +*> eigenvalues of T to its left. +*> Other values will cause DLAEBZ to return with INFO=-1. +*> \endverbatim +*> +*> \param[in] NITMAX +*> \verbatim +*> NITMAX is INTEGER +*> The maximum number of "levels" of bisection to be +*> performed, i.e., an interval of width W will not be made +*> smaller than 2^(-NITMAX) * W. If not all intervals +*> have converged after NITMAX iterations, then INFO is set +*> to the number of non-converged intervals. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension n of the tridiagonal matrix T. It must be at +*> least 1. +*> \endverbatim +*> +*> \param[in] MMAX +*> \verbatim +*> MMAX is INTEGER +*> The maximum number of intervals. If more than MMAX intervals +*> are generated, then DLAEBZ will quit with INFO=MMAX+1. +*> \endverbatim +*> +*> \param[in] MINP +*> \verbatim +*> MINP is INTEGER +*> The initial number of intervals. It may not be greater than +*> MMAX. +*> \endverbatim +*> +*> \param[in] NBMIN +*> \verbatim +*> NBMIN is INTEGER +*> The smallest number of intervals that should be processed +*> using a vector loop. If zero, then only the scalar loop +*> will be used. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The minimum (absolute) width of an interval. When an +*> interval is narrower than ABSTOL, or than RELTOL times the +*> larger (in magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. This must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is DOUBLE PRECISION +*> The minimum relative width of an interval. When an interval +*> is narrower than ABSTOL, or than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum absolute value of a "pivot" in the Sturm +*> sequence loop. +*> This must be at least max |e(j)**2|*safe_min and at +*> least safe_min, where safe_min is at least +*> the smallest number that can divide one without overflow. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> The offdiagonal elements of the tridiagonal matrix T in +*> positions 1 through N-1. E(N) is arbitrary. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N) +*> The squares of the offdiagonal elements of the tridiagonal +*> matrix T. E2(N) is ignored. +*> \endverbatim +*> +*> \param[in,out] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (MINP) +*> If IJOB=1 or 2, not referenced. +*> If IJOB=3, the desired values of N(w). The elements of NVAL +*> will be reordered to correspond with the intervals in AB. +*> Thus, NVAL(j) on output will not, in general be the same as +*> NVAL(j) on input, but it will correspond with the interval +*> (AB(j,1),AB(j,2)] on output. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (MMAX,2) +*> The endpoints of the intervals. AB(j,1) is a(j), the left +*> endpoint of the j-th interval, and AB(j,2) is b(j), the +*> right endpoint of the j-th interval. The input intervals +*> will, in general, be modified, split, and reordered by the +*> calculation. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (MMAX) +*> If IJOB=1, ignored. +*> If IJOB=2, workspace. +*> If IJOB=3, then on input C(j) should be initialized to the +*> first search point in the binary search. +*> \endverbatim +*> +*> \param[out] MOUT +*> \verbatim +*> MOUT is INTEGER +*> If IJOB=1, the number of eigenvalues in the intervals. +*> If IJOB=2 or 3, the number of intervals output. +*> If IJOB=3, MOUT will equal MINP. +*> \endverbatim +*> +*> \param[in,out] NAB +*> \verbatim +*> NAB is INTEGER array, dimension (MMAX,2) +*> If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). +*> If IJOB=2, then on input, NAB(i,j) should be set. It must +*> satisfy the condition: +*> N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), +*> which means that in interval i only eigenvalues +*> NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, +*> NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with +*> IJOB=1. +*> On output, NAB(i,j) will contain +*> max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of +*> the input interval that the output interval +*> (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the +*> the input values of NAB(k,1) and NAB(k,2). +*> If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), +*> unless N(w) > NVAL(i) for all search points w , in which +*> case NAB(i,1) will not be modified, i.e., the output +*> value will be the same as the input value (modulo +*> reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) +*> for all search points w , in which case NAB(i,2) will +*> not be modified. Normally, NAB should be set to some +*> distinctive value(s) before DLAEBZ is called. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MMAX) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MMAX) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: All intervals converged. +*> = 1--MMAX: The last INFO intervals did not converge. +*> = MMAX+1: More than MMAX intervals were generated. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine is intended to be called only by other LAPACK +*> routines, thus the interface is less user-friendly. It is intended +*> for two purposes: +*> +*> (a) finding eigenvalues. In this case, DLAEBZ should have one or +*> more initial intervals set up in AB, and DLAEBZ should be called +*> with IJOB=1. This sets up NAB, and also counts the eigenvalues. +*> Intervals with no eigenvalues would usually be thrown out at +*> this point. Also, if not all the eigenvalues in an interval i +*> are desired, NAB(i,1) can be increased or NAB(i,2) decreased. +*> For example, set NAB(i,1)=NAB(i,2)-1 to get the largest +*> eigenvalue. DLAEBZ is then called with IJOB=2 and MMAX +*> no smaller than the value of MOUT returned by the call with +*> IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 +*> through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the +*> tolerance specified by ABSTOL and RELTOL. +*> +*> (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). +*> In this case, start with a Gershgorin interval (a,b). Set up +*> AB to contain 2 search intervals, both initially (a,b). One +*> NVAL element should contain f-1 and the other should contain l +*> , while C should contain a and b, resp. NAB(i,1) should be -1 +*> and NAB(i,2) should be N+1, to flag an error if the desired +*> interval does not lie in (a,b). DLAEBZ is then called with +*> IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- +*> j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while +*> if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r +*> >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and +*> N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and +*> w(l-r)=...=w(l+k) are handled similarly. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, + $ RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, + $ NAB, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX + DOUBLE PRECISION ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) + DOUBLE PRECISION AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0D0, TWO = 2.0D0, + $ HALF = 1.0D0 / TWO ) +* .. +* .. Local Scalars .. + INTEGER ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL, + $ KLNEW + DOUBLE PRECISION TMP1, TMP2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Check for Errors +* + INFO = 0 + IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN + INFO = -1 + RETURN + END IF +* +* Initialize NAB +* + IF( IJOB.EQ.1 ) THEN +* +* Compute the number of eigenvalues in the initial intervals. +* + MOUT = 0 + DO 30 JI = 1, MINP + DO 20 JP = 1, 2 + TMP1 = D( 1 ) - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + NAB( JI, JP ) = 0 + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = 1 +* + DO 10 J = 2, N + TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = NAB( JI, JP ) + 1 + 10 CONTINUE + 20 CONTINUE + MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 ) + 30 CONTINUE + RETURN + END IF +* +* Initialize for loop +* +* KF and KL have the following meaning: +* Intervals 1,...,KF-1 have converged. +* Intervals KF,...,KL still need to be refined. +* + KF = 1 + KL = MINP +* +* If IJOB=2, initialize C. +* If IJOB=3, use the user-supplied starting point. +* + IF( IJOB.EQ.2 ) THEN + DO 40 JI = 1, MINP + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 40 CONTINUE + END IF +* +* Iteration loop +* + DO 130 JIT = 1, NITMAX +* +* Loop over intervals +* + IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN +* +* Begin of Parallel Version of the loop +* + DO 60 JI = KF, KL +* +* Compute N(c), the number of eigenvalues less than c +* + WORK( JI ) = D( 1 ) - C( JI ) + IWORK( JI ) = 0 + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF +* + DO 50 J = 2, N + WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI ) + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = IWORK( JI ) + 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF + 50 CONTINUE + 60 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* + KLNEW = KL + DO 70 JI = KF, KL +* +* Insure that N(w) is monotone +* + IWORK( JI ) = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), IWORK( JI ) ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = C( JI ) +* + ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = C( JI ) + ELSE + KLNEW = KLNEW + 1 + IF( KLNEW.LE.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to +* queue. +* + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = C( JI ) + NAB( KLNEW, 1 ) = IWORK( JI ) + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + ELSE + INFO = MMAX + 1 + END IF + END IF + 70 CONTINUE + IF( INFO.NE.0 ) + $ RETURN + KL = KLNEW + ELSE +* +* IJOB=3: Binary search. Keep only the interval containing +* w s.t. N(w) = NVAL +* + DO 80 JI = KF, KL + IF( IWORK( JI ).LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = C( JI ) + NAB( JI, 1 ) = IWORK( JI ) + END IF + IF( IWORK( JI ).GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + END IF + 80 CONTINUE + END IF +* + ELSE +* +* End of Parallel Version of the loop +* +* Begin of Serial Version of the loop +* + KLNEW = KL + DO 100 JI = KF, KL +* +* Compute N(w), the number of eigenvalues less than w +* + TMP1 = C( JI ) + TMP2 = D( 1 ) - TMP1 + ITMP1 = 0 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF +* + DO 90 J = 2, N + TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = ITMP1 + 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF + 90 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* +* Insure that N(w) is monotone +* + ITMP1 = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), ITMP1 ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = TMP1 +* + ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = TMP1 + ELSE IF( KLNEW.LT.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to queue. +* + KLNEW = KLNEW + 1 + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = TMP1 + NAB( KLNEW, 1 ) = ITMP1 + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + ELSE + INFO = MMAX + 1 + RETURN + END IF + ELSE +* +* IJOB=3: Binary search. Keep only the interval +* containing w s.t. N(w) = NVAL +* + IF( ITMP1.LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = TMP1 + NAB( JI, 1 ) = ITMP1 + END IF + IF( ITMP1.GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + END IF + END IF + 100 CONTINUE + KL = KLNEW +* + END IF +* +* Check for convergence +* + KFNEW = KF + DO 110 JI = KF, KL + TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) ) + TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) ) + IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR. + $ NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN +* +* Converged -- Swap with position KFNEW, +* then increment KFNEW +* + IF( JI.GT.KFNEW ) THEN + TMP1 = AB( JI, 1 ) + TMP2 = AB( JI, 2 ) + ITMP1 = NAB( JI, 1 ) + ITMP2 = NAB( JI, 2 ) + AB( JI, 1 ) = AB( KFNEW, 1 ) + AB( JI, 2 ) = AB( KFNEW, 2 ) + NAB( JI, 1 ) = NAB( KFNEW, 1 ) + NAB( JI, 2 ) = NAB( KFNEW, 2 ) + AB( KFNEW, 1 ) = TMP1 + AB( KFNEW, 2 ) = TMP2 + NAB( KFNEW, 1 ) = ITMP1 + NAB( KFNEW, 2 ) = ITMP2 + IF( IJOB.EQ.3 ) THEN + ITMP1 = NVAL( JI ) + NVAL( JI ) = NVAL( KFNEW ) + NVAL( KFNEW ) = ITMP1 + END IF + END IF + KFNEW = KFNEW + 1 + END IF + 110 CONTINUE + KF = KFNEW +* +* Choose Midpoints +* + DO 120 JI = KF, KL + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 120 CONTINUE +* +* If no more intervals to refine, quit. +* + IF( KF.GT.KL ) + $ GO TO 140 + 130 CONTINUE +* +* Converged +* + 140 CONTINUE + INFO = MAX( KL+1-KF, 0 ) + MOUT = KL +* + RETURN +* +* End of DLAEBZ +* + END diff --git a/dspl/liblapack/SRC/dlaed0.f b/dspl/liblapack/SRC/dlaed0.f new file mode 100644 index 0000000..4e92da9 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed0.f @@ -0,0 +1,434 @@ +*> \brief \b DLAED0 used by sstedc. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED0 computes all eigenvalues and corresponding eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> = 2: Compute eigenvalues and eigenvectors of tridiagonal +*> matrix. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the main diagonal of the tridiagonal matrix. +*> On exit, its eigenvalues. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, Q must contain an N-by-N orthogonal matrix. +*> If ICOMPQ = 0 Q is not referenced. +*> If ICOMPQ = 1 On entry, Q is a subset of the columns of the +*> orthogonal matrix used to reduce the full +*> matrix to tridiagonal form corresponding to +*> the subset of the full matrix which is being +*> decomposed at this time. +*> If ICOMPQ = 2 On entry, Q will be the identity matrix. +*> On exit, Q contains the eigenvectors of the +*> tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If eigenvectors are +*> desired, then LDQ >= max(1,N). In any case, LDQ >= 1. +*> \endverbatim +*> +*> \param[out] QSTORE +*> \verbatim +*> QSTORE is DOUBLE PRECISION array, dimension (LDQS, N) +*> Referenced only when ICOMPQ = 1. Used to store parts of +*> the eigenvector matrix when the updating matrix multiplies +*> take place. +*> \endverbatim +*> +*> \param[in] LDQS +*> \verbatim +*> LDQS is INTEGER +*> The leading dimension of the array QSTORE. If ICOMPQ = 1, +*> then LDQS >= max(1,N). In any case, LDQS >= 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> If ICOMPQ = 0 or 1, the dimension of WORK must be at least +*> 1 + 3*N + 2*N*lg N + 3*N**2 +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of WORK must be at least +*> 4*N + N**2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> If ICOMPQ = 0 or 1, the dimension of IWORK must be at least +*> 6 + 6*N + 5*N*lg N. +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of IWORK must be at least +*> 3 + 5*N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1, + $ SPM2, SUBMAT, SUBPBS, TLVLS + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 + IF( ICOMPQ.NE.2 ) THEN +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( DBLE( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* +* Initialize pointers +* + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 + END IF +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + ELSE + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE, + $ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+ + $ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ), + $ LDQS ) + END IF + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. +* DLAED1 is used only for the full eigensystem of a tridiagonal +* matrix. +* DLAED7 handles the cases in which eigenvalues only or eigenvalues +* and eigenvectors of a full symmetric matrix (which was reduced to +* tridiagonal form) are desired. +* + IF( ICOMPQ.EQ.2 ) THEN + CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ), + $ LDQ, IWORK( INDXQ+SUBMAT ), + $ E( SUBMAT+MSD2-1 ), MSD2, WORK, + $ IWORK( SUBPBS+1 ), INFO ) + ELSE + CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ), + $ MSD2, WORK( IQ ), IWORK( IQPTR ), + $ IWORK( IPRMPT ), IWORK( IPERM ), + $ IWORK( IGIVPT ), IWORK( IGIVCL ), + $ WORK( IGIVNM ), WORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + END IF + IF( INFO.NE.0 ) + $ GO TO 130 + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + IF( ICOMPQ.EQ.1 ) THEN + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + ELSE IF( ICOMPQ.EQ.2 ) THEN + DO 110 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 ) + 110 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ ) + ELSE + DO 120 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + 120 CONTINUE + CALL DCOPY( N, WORK, 1, D, 1 ) + END IF + GO TO 140 +* + 130 CONTINUE + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 +* + 140 CONTINUE + RETURN +* +* End of DLAED0 +* + END diff --git a/dspl/liblapack/SRC/dlaed1.f b/dspl/liblapack/SRC/dlaed1.f new file mode 100644 index 0000000..30e71fa --- /dev/null +++ b/dspl/liblapack/SRC/dlaed1.f @@ -0,0 +1,274 @@ +*> \brief \b DLAED1 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, INFO, LDQ, N +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER INDXQ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED1 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and eigenvectors of a tridiagonal matrix. DLAED7 handles +*> the case in which eigenvalues only or eigenvalues and eigenvectors +*> of a full symmetric matrix (which was reduced to tridiagonal form) +*> are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**T*u, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLAED2. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine DLAED4 (as called by DLAED3). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> On entry, the permutation which separately sorts the two +*> subproblems in D into ascending order. +*> On exit, the permutation which will reintegrate the +*> subproblems back into sorted order, +*> i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The subdiagonal entry used to create the rank-1 modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= CUTPNT <= N/2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N + N**2) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, INFO, LDQ, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER INDXQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS, + $ IW, IZ, K, N1, N2, ZPP1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED1', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are integer pointers which indicate +* the portion of the workspace +* used by a particular array in DLAED2 and DLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 ) + ZPP1 = CUTPNT + 1 + CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 ) +* +* Deflate eigenvalues. +* + CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ), + $ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ), + $ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ), + $ IWORK( COLTYP ), INFO ) +* + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT + + $ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2 + CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ), + $ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ), + $ WORK( IW ), WORK( IS ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + DO 10 I = 1, N + INDXQ( I ) = I + 10 CONTINUE + END IF +* + 20 CONTINUE + RETURN +* +* End of DLAED1 +* + END diff --git a/dspl/liblapack/SRC/dlaed2.f b/dspl/liblapack/SRC/dlaed2.f new file mode 100644 index 0000000..fbcc87a --- /dev/null +++ b/dspl/liblapack/SRC/dlaed2.f @@ -0,0 +1,539 @@ +*> \brief \b DLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, +* Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), +* $ INDXQ( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED2 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny entry in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. 0 <= K <=N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the eigenvalues of the two submatrices to +*> be combined. +*> On exit, D contains the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, Q contains the eigenvectors of two submatrices in +*> the two square blocks with corners at (1,1), (N1,N1) +*> and (N1+1, N1+1), (N,N). +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have N1 added to their +*> values. Destroyed on exit. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> DLAED3. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On entry, Z contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> On exit, the contents of Z have been destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> DLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first k values of the final deflation-altered z-vector +*> which will be passed to DLAED3. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2) +*> A copy of the first K eigenvectors which will be used by +*> DLAED3 in a matrix multiply (DGEMM) to solve for the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of DLAMDA into +*> ascending order. +*> \endverbatim +*> +*> \param[out] INDXC +*> \verbatim +*> INDXC is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups: the first group contains non-zero +*> elements only at and above N1, the second contains +*> non-zero elements only below N1, and the third is dense. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] COLTYP +*> \verbatim +*> COLTYP is INTEGER array, dimension (N) +*> During execution, a label which will indicate which of the +*> following types a column in the Q2 matrix is: +*> 1 : non-zero in the upper half only; +*> 2 : dense; +*> 3 : non-zero in the lower half only; +*> 4 : deflated. +*> On exit, COLTYP(i) is the number of columns of type i, +*> for i=1 to 4 only. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, + $ Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), + $ INDXQ( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1, + $ N2, NJ, PJ + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1. Since z is the concatenation of +* two normalized vectors, norm2(z) = sqrt(2). +* + T = ONE / SQRT( TWO ) + CALL DSCAL( N, T, Z, 1 ) +* +* RHO = ABS( norm(z)**2 * RHO ) +* + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 10 I = N1P1, N + INDXQ( I ) = INDXQ( I ) + N1 + 10 CONTINUE +* +* re-integrate the deflated parts from the last pass +* + DO 20 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + 20 CONTINUE + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC ) + DO 30 I = 1, N + INDX( I ) = INDXQ( INDXC( I ) ) + 30 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IQ2 = 1 + DO 40 J = 1, N + I = INDX( J ) + CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 ) + DLAMDA( J ) = D( I ) + IQ2 = IQ2 + N + 40 CONTINUE + CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ ) + CALL DCOPY( N, DLAMDA, 1, D, 1 ) + GO TO 190 + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + DO 50 I = 1, N1 + COLTYP( I ) = 1 + 50 CONTINUE + DO 60 I = N1P1, N + COLTYP( I ) = 3 + 60 CONTINUE +* +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + NJ = INDX( J ) + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + IF( J.EQ.N ) + $ GO TO 100 + ELSE + PJ = NJ + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + NJ = INDX( J ) + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( PJ ) + C = Z( NJ ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( NJ ) - D( PJ ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( NJ ) = TAU + Z( PJ ) = ZERO + IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) + $ COLTYP( NJ ) = 2 + COLTYP( PJ ) = 4 + CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S ) + T = D( PJ )*C**2 + D( NJ )*S**2 + D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 + D( PJ ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = PJ + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = PJ + END IF + ELSE + INDXP( K2+I-1 ) = PJ + END IF + PJ = NJ + ELSE + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ + PJ = NJ + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four uniform groups (although one or more of these groups may be +* empty). +* + DO 110 J = 1, 4 + CTOT( J ) = 0 + 110 CONTINUE + DO 120 J = 1, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 120 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 1 + PSM( 2 ) = 1 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) + K = N - CTOT( 4 ) +* +* Fill out the INDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's. +* + DO 130 J = 1, N + JS = INDXP( J ) + CT = COLTYP( JS ) + INDX( PSM( CT ) ) = JS + INDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 130 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + I = 1 + IQ1 = 1 + IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1 + DO 140 J = 1, CTOT( 1 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + 140 CONTINUE +* + DO 150 J = 1, CTOT( 2 ) + JS = INDX( I ) + CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + IQ2 = IQ2 + N2 + 150 CONTINUE +* + DO 160 J = 1, CTOT( 3 ) + JS = INDX( I ) + CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ2 = IQ2 + N2 + 160 CONTINUE +* + IQ1 = IQ2 + DO 170 J = 1, CTOT( 4 ) + JS = INDX( I ) + CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 ) + IQ2 = IQ2 + N + Z( I ) = D( JS ) + I = I + 1 + 170 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, + $ Q( 1, K+1 ), LDQ ) + CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 ) + END IF +* +* Copy CTOT into COLTYP for referencing in DLAED3. +* + DO 180 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 180 CONTINUE +* + 190 CONTINUE + RETURN +* +* End of DLAED2 +* + END diff --git a/dspl/liblapack/SRC/dlaed3.f b/dspl/liblapack/SRC/dlaed3.f new file mode 100644 index 0000000..d200fc0 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed3.f @@ -0,0 +1,353 @@ +*> \brief \b DLAED3 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, +* CTOT, W, S, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER CTOT( * ), INDX( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ S( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED3 finds the roots of the secular equation, as defined by the +*> values in D, W, and RHO, between 1 and K. It makes the +*> appropriate calls to DLAED4 and then updates the eigenvectors by +*> multiplying the matrix of eigenvectors of the pair of eigensystems +*> being combined by the matrix of eigenvectors of the K-by-K system +*> which is solved here. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> DLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (deflation may result in N>K). +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading submatrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> D(I) contains the updated eigenvalues for +*> 1 <= I <= K. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> Initially the first K columns are used as workspace. +*> On output the columns 1 to K contain +*> the updated eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in,out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. May be changed on output by +*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP, +*> Cray-2, or Cray C-90, as described above. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2*N) +*> The first K columns of this matrix contain the non-deflated +*> eigenvectors for the split problem. +*> \endverbatim +*> +*> \param[in] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups (see DLAED2). +*> The rows of the eigenvectors found by DLAED4 must be likewise +*> permuted before the matrix multiply can take place. +*> \endverbatim +*> +*> \param[in] CTOT +*> \verbatim +*> CTOT is INTEGER array, dimension (4) +*> A count of the total number of the various types of columns +*> in Q, as described in INDX. The fourth column type is any +*> column which has been deflated. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. Destroyed on +*> output. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N1 + 1)*K +*> Will contain the eigenvectors of the repaired matrix which +*> will be multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, + $ CTOT, W, S, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), INDX( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ S( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, IQ2, J, N12, N2, N23 + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.K ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = 1, K + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 ) + $ GO TO 110 + IF( K.EQ.2 ) THEN + DO 30 J = 1, K + W( 1 ) = Q( 1, J ) + W( 2 ) = Q( 2, J ) + II = INDX( 1 ) + Q( 1, J ) = W( II ) + II = INDX( 2 ) + Q( 2, J ) = W( II ) + 30 CONTINUE + GO TO 110 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 60 J = 1, K + DO 40 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 40 CONTINUE + DO 50 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + 60 CONTINUE + DO 70 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I ) ) + 70 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 100 J = 1, K + DO 80 I = 1, K + S( I ) = W( I ) / Q( I, J ) + 80 CONTINUE + TEMP = DNRM2( K, S, 1 ) + DO 90 I = 1, K + II = INDX( I ) + Q( I, J ) = S( II ) / TEMP + 90 CONTINUE + 100 CONTINUE +* +* Compute the updated eigenvectors. +* + 110 CONTINUE +* + N2 = N - N1 + N12 = CTOT( 1 ) + CTOT( 2 ) + N23 = CTOT( 2 ) + CTOT( 3 ) +* + CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 ) + IQ2 = N1*N12 + 1 + IF( N23.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23, + $ ZERO, Q( N1+1, 1 ), LDQ ) + ELSE + CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ ) + END IF +* + CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 ) + IF( N12.NE.0 ) THEN + CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q, + $ LDQ ) + ELSE + CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ ) + END IF +* +* + 120 CONTINUE + RETURN +* +* End of DLAED3 +* + END diff --git a/dspl/liblapack/SRC/dlaed4.f b/dspl/liblapack/SRC/dlaed4.f new file mode 100644 index 0000000..e7dc839 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed4.f @@ -0,0 +1,917 @@ +*> \brief \b DLAED4 used by sstedc. Finds a single root of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER I, INFO, N +* DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th updated eigenvalue of a symmetric +*> rank-one modification to a diagonal matrix whose elements are +*> given in the array d, and that +*> +*> D(i) < D(j) for i < j +*> +*> and that RHO > 0. This is arranged by the calling routine, and is +*> no loss in generality. The rank-one modified system is thus +*> +*> diag( D ) + RHO * Z * Z_transpose. +*> +*> where we assume the Euclidean norm of Z is 1. +*> +*> The method consists of approximating the rational functions in the +*> secular equation by simpler interpolating rational functions. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of all arrays. +*> \endverbatim +*> +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. 1 <= I <= N. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The original eigenvalues. It is assumed that they are in +*> order, D(I) < D(J) for I < J. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension (N) +*> If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th +*> component. If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5 +*> for detail. The vector DELTA contains the information necessary +*> to construct the eigenvectors by DLAED3 and DLAED9. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is DOUBLE PRECISION +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, the updating process failed. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> Logical variable ORGATI (origin-at-i?) is used for distinguishing +*> whether D(i) or D(i+1) is treated as the origin. +*> +*> ORGATI = .true. origin at i +*> ORGATI = .false. origin at i+1 +*> +*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting +*> if we are working with THREE poles! +*> +*> MAXIT is the maximum number of iterations allowed for each +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DELTA( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0, + $ TEN = 10.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + DOUBLE PRECISION A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW, + $ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI, + $ RHOINV, TAU, TEMP, TEMP1, W +* .. +* .. Local Arrays .. + DOUBLE PRECISION ZZ( 3 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAED5, DLAED6 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 ) + DELTA( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL DLAED5( I, D, Z, DELTA, RHO, DLAM ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = DLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + MIDPT = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + DO 10 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / DELTA( II ) + + $ Z( N )*Z( N ) / DELTA( N ) +* + IF( W.LE.ZERO ) THEN + TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) + + $ Z( N )*Z( N ) / RHO + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +* + DLTLB = MIDPT + DLTUB = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +* + DLTLB = ZERO + DLTUB = MIDPT + END IF +* + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN +* ETA = B/A +* ETA = RHO - TAU + ETA = DLTUB - TAU + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 50 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 70 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + DLAM = D( I ) + TAU + GO TO 250 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DEL = D( IP1 ) - D( I ) + MIDPT = DEL / TWO + DO 100 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / DELTA( J ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / DELTA( I ) + + $ Z( IP1 )*Z( IP1 ) / DELTA( IP1 ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DEL + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = ZERO + DLTUB = MIDPT + ELSE +* +* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = -MIDPT + DLTUB = ZERO + END IF +* + IF( ORGATI ) THEN + DO 130 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 130 CONTINUE + ELSE + DO 140 J = 1, N + DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU + 140 CONTINUE + END IF + IF( ORGATI ) THEN + II = I + ELSE + II = I + 1 + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )* + $ ( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )* + $ ( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + PREW = W +* + DO 180 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 180 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 190 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 190 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 200 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 200 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* + TAU = TAU + ETA +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 240 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - + $ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + ELSE + TEMP = Z( II ) / DELTA( II ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )* + $ DELTA( IP1 )*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DELTA( I )*DELTA( I )*( DPSI+DPHI ) + END IF + ELSE + A = DELTA( I )*DELTA( I )*DPSI + + $ DELTA( IP1 )*DELTA( IP1 )*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + DO 210 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 210 CONTINUE +* + TAU = TAU + ETA + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 220 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 220 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 230 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 230 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 240 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF +* + END IF +* + 250 CONTINUE +* + RETURN +* +* End of DLAED4 +* + END diff --git a/dspl/liblapack/SRC/dlaed5.f b/dspl/liblapack/SRC/dlaed5.f new file mode 100644 index 0000000..3ea9e40 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed5.f @@ -0,0 +1,189 @@ +*> \brief \b DLAED5 used by sstedc. Solves the 2-by-2 secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* .. Scalar Arguments .. +* INTEGER I +* DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th eigenvalue of a symmetric rank-one +*> modification of a 2-by-2 diagonal matrix +*> +*> diag( D ) + RHO * Z * transpose(Z) . +*> +*> The diagonal elements in the array D are assumed to satisfy +*> +*> D(i) < D(j) for i < j . +*> +*> We also assume RHO > 0 and that the Euclidean norm of the vector +*> Z is one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. I = 1 or I = 2. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (2) +*> The original eigenvalues. We assume D(1) < D(2). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (2) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension (2) +*> The vector DELTA contains the information necessary +*> to construct the eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is DOUBLE PRECISION +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I + DOUBLE PRECISION DLAM, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION B, C, DEL, TAU, TEMP, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + IF( I.EQ.1 ) THEN + W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DEL +* +* B > ZERO, always +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) + DLAM = D( 1 ) + TAU + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + END IF + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End OF DLAED5 +* + END diff --git a/dspl/liblapack/SRC/dlaed6.f b/dspl/liblapack/SRC/dlaed6.f new file mode 100644 index 0000000..daa8db3 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed6.f @@ -0,0 +1,410 @@ +*> \brief \b DLAED6 used by sstedc. Computes one Newton step in solution of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL ORGATI +* INTEGER INFO, KNITER +* DOUBLE PRECISION FINIT, RHO, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( 3 ), Z( 3 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED6 computes the positive or negative root (closest to the origin) +*> of +*> z(1) z(2) z(3) +*> f(x) = rho + --------- + ---------- + --------- +*> d(1)-x d(2)-x d(3)-x +*> +*> It is assumed that +*> +*> if ORGATI = .true. the root is between d(2) and d(3); +*> otherwise it is between d(1) and d(2) +*> +*> This routine will be called by DLAED4 when necessary. In most cases, +*> the root sought is the smallest in magnitude, though it might not be +*> in some extremely rare situations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] KNITER +*> \verbatim +*> KNITER is INTEGER +*> Refer to DLAED4 for its significance. +*> \endverbatim +*> +*> \param[in] ORGATI +*> \verbatim +*> ORGATI is LOGICAL +*> If ORGATI is true, the needed root is between d(2) and +*> d(3); otherwise it is between d(1) and d(2). See +*> DLAED4 for further details. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> Refer to the equation f(x) above. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (3) +*> D satisfies d(1) < d(2) < d(3). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (3) +*> Each of the elements in z must be positive. +*> \endverbatim +*> +*> \param[in] FINIT +*> \verbatim +*> FINIT is DOUBLE PRECISION +*> The value of f at 0. It is more accurate than the one +*> evaluated inside this routine (if someone wants to do +*> so). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The root of the equation f(x). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, failure to converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 10/02/03: This version has a few statements commented out for thread +*> safety (machine parameters are computed on each entry). SJH. +*> +*> 05/10/06: Modified from a new version of Ren-Cang Li, use +*> Gragg-Thornton-Warner cubic convergent scheme for better stability. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + DOUBLE PRECISION FINIT, RHO, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 3 ), Z( 3 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Local Arrays .. + DOUBLE PRECISION DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL SCALE + INTEGER I, ITER, NITER + DOUBLE PRECISION A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4, + $ LBD, UBD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* + IF( ORGATI ) THEN + LBD = D(2) + UBD = D(3) + ELSE + LBD = D(1) + UBD = D(2) + END IF + IF( FINIT .LT. ZERO )THEN + LBD = ZERO + ELSE + UBD = ZERO + END IF +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD+UBD )/TWO + IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN + TAU = ZERO + ELSE + TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) + + $ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) + + $ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) ) + IF( TEMP .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF + END IF +* +* get machine parameters for possible scaling to avoid overflow +* +* modified by Sven: parameters SMALL1, SMINV1, SMALL2, +* SMINV2, EPS are not SAVEd anymore between one call to the +* others but recomputed at each call +* + EPS = DLAMCH( 'Epsilon' ) + BASE = DLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + LBD = LBD*SCLFAC + UBD = UBD*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF +* +* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent +* scheme +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TAU = TAU + ETA + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD + UBD )/TWO +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + ELSE + GO TO 60 + END IF + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR. + $ ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) ) ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of DLAED6 +* + END diff --git a/dspl/liblapack/SRC/dlaed7.f b/dspl/liblapack/SRC/dlaed7.f new file mode 100644 index 0000000..9c528ad --- /dev/null +++ b/dspl/liblapack/SRC/dlaed7.f @@ -0,0 +1,407 @@ +*> \brief \b DLAED7 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, +* LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, +* PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, +* $ QSIZ, TLVLS +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), +* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) +* DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), +* $ QSTORE( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED7 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and optionally eigenvectors of a dense symmetric matrix +*> that has been reduced to tridiagonal form. DLAED1 handles +*> the case in which all eigenvalues and eigenvectors of a symmetric +*> tridiagonal matrix are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**Tu, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLAED8. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine DLAED4 (as called by DLAED9). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= CURLVL <= TLVLS. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which will reintegrate the subproblem just +*> solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) +*> will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The subdiagonal element used to create the rank-1 +*> modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in,out] QSTORE +*> \verbatim +*> QSTORE is DOUBLE PRECISION array, dimension (N**2+1) +*> Stores eigenvectors of submatrices encountered during +*> divide and conquer, packed together. QPTR points to +*> beginning of the submatrices. +*> \endverbatim +*> +*> \param[in,out] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> List of indices pointing to beginning of submatrices stored +*> in QSTORE. The submatrices are numbered starting at the +*> bottom left of the divide and conquer tree, from left to +*> right and bottom to top. +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and also the size of +*> the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N+2*QSIZ*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, + $ PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, + $ QSIZ, TLVLS + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION D( * ), GIVNUM( 2, * ), Q( LDQ, * ), + $ QSTORE( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLAED8, DLAED9, DLAEDA, DLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLAED8 and DLAED9. +* + IF( ICOMPQ.EQ.1 ) THEN + LDQ2 = QSIZ + ELSE + LDQ2 = N + END IF +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N + IS = IQ2 + N*LDQ2 +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ), + $ WORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT, + $ WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2, + $ WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ), + $ IWORK( INDX ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL DLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ), + $ WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( ICOMPQ.EQ.1 ) THEN + CALL DGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2, + $ QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ ) + END IF + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + 30 CONTINUE + RETURN +* +* End of DLAED7 +* + END diff --git a/dspl/liblapack/SRC/dlaed8.f b/dspl/liblapack/SRC/dlaed8.f new file mode 100644 index 0000000..c053347 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed8.f @@ -0,0 +1,524 @@ +*> \brief \b DLAED8 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, +* CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, +* GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, +* $ QSIZ +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), +* $ INDXQ( * ), PERM( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), +* $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED8 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny element in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the two submatrices to be +*> combined. On exit, the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> If ICOMPQ = 0, Q is not referenced. Otherwise, +*> on entry, Q contains the eigenvectors of the partially solved +*> system which has been previously updated in matrix +*> multiplies with other partially solved eigensystems. +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have CUTPNT added to +*> their values in order to be accurate. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> DLAED3. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On entry, Z contains the updating vector (the last row of +*> the first sub-eigenvector matrix and the first row of the +*> second sub-eigenvector matrix). +*> On exit, the contents of Z are destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> DLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2,N) +*> If ICOMPQ = 0, Q2 is not referenced. Otherwise, +*> a copy of the first K eigenvectors which will be used by +*> DLAED7 in a matrix multiply (DGEMM) to update the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of the array Q2. LDQ2 >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first k values of the final deflation-altered z-vector and +*> will be passed to DLAED3. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N) +*> The permutations (from deflation and sorting) to be applied +*> to each eigenblock. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of D into ascending +*> order. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, + $ CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, + $ QSIZ + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), + $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -10 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED8', -INFO ) + RETURN + END IF +* +* Need to initialize GIVPTR to O here in case of quick exit +* to prevent an unspecified code behavior (usually sigfault) +* when IWORK array on entry to *stedc is not zeroed +* (or at least some IWORK entries which used in *laed7 for GIVPTR). +* + GIVPTR = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL DSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerence +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IF( ICOMPQ.EQ.0 ) THEN + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + 50 CONTINUE + ELSE + DO 60 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 60 CONTINUE + CALL DLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), + $ LDQ ) + END IF + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 110 + ELSE + JLAM = J + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + IF( ICOMPQ.EQ.1 ) THEN + CALL DROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + END IF + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 110 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + IF( ICOMPQ.EQ.0 ) THEN + DO 120 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + 120 CONTINUE + ELSE + DO 130 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 130 CONTINUE + END IF +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + ELSE + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL DLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, + $ Q( 1, K+1 ), LDQ ) + END IF + END IF +* + RETURN +* +* End of DLAED8 +* + END diff --git a/dspl/liblapack/SRC/dlaed9.f b/dspl/liblapack/SRC/dlaed9.f new file mode 100644 index 0000000..d3be225 --- /dev/null +++ b/dspl/liblapack/SRC/dlaed9.f @@ -0,0 +1,294 @@ +*> \brief \b DLAED9 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAED9 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, +* S, LDS, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), +* $ W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAED9 finds the roots of the secular equation, as defined by the +*> values in D, Z, and RHO, between KSTART and KSTOP. It makes the +*> appropriate calls to DLAED4 and then stores the new matrix of +*> eigenvectors for use in calculating the next level of Z vectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> DLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] KSTART +*> \verbatim +*> KSTART is INTEGER +*> \endverbatim +*> +*> \param[in] KSTOP +*> \verbatim +*> KSTOP is INTEGER +*> The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP +*> are to be computed. 1 <= KSTART <= KSTOP <= K. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (delation may result in N > K). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> D(I) contains the updated eigenvalues +*> for KSTART <= I <= KSTOP. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max( 1, N ). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (LDS, K) +*> Will contain the eigenvectors of the repaired matrix which +*> will be stored for subsequent Z vector calculation and +*> multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of S. LDS >= max( 1, K ). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, + $ S, LDS, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), + $ W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAED4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN + INFO = -2 + ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.K ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDS.LT.MAX( 1, K ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAED9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, N + DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = KSTART, KSTOP + CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 .OR. K.EQ.2 ) THEN + DO 40 I = 1, K + DO 30 J = 1, K + S( J, I ) = Q( J, I ) + 30 CONTINUE + 40 CONTINUE + GO TO 120 + END IF +* +* Compute updated W. +* + CALL DCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL DCOPY( K, Q, LDQ+1, W, 1 ) + DO 70 J = 1, K + DO 50 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + DO 60 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 60 CONTINUE + 70 CONTINUE + DO 80 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) ) + 80 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 110 J = 1, K + DO 90 I = 1, K + Q( I, J ) = W( I ) / Q( I, J ) + 90 CONTINUE + TEMP = DNRM2( K, Q( 1, J ), 1 ) + DO 100 I = 1, K + S( I, J ) = Q( I, J ) / TEMP + 100 CONTINUE + 110 CONTINUE +* + 120 CONTINUE + RETURN +* +* End of DLAED9 +* + END diff --git a/dspl/liblapack/SRC/dlaeda.f b/dspl/liblapack/SRC/dlaeda.f new file mode 100644 index 0000000..4ca08a0 --- /dev/null +++ b/dspl/liblapack/SRC/dlaeda.f @@ -0,0 +1,308 @@ +*> \brief \b DLAEDA used by sstedc. Computes the Z vector determining the rank-one modification of the diagonal matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEDA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, +* GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), +* $ PRMPTR( * ), QPTR( * ) +* DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEDA computes the Z vector corresponding to the merge step in the +*> CURLVLth step of the merge process with TLVLS steps for the CURPBMth +*> problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= curlvl <= tlvls. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and incidentally the +*> size of the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (N**2) +*> Contains the square eigenblocks from previous levels, the +*> starting positions for blocks are given by QPTR. +*> \endverbatim +*> +*> \param[in] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> Contains a list of pointers which indicate where in Q an +*> eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates +*> the size of the block. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On output this vector contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> \endverbatim +*> +*> \param[out] ZTEMP +*> \verbatim +*> ZTEMP is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), + $ PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2, + $ PTR, ZPTR1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAEDA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine location of first number in second half. +* + MID = N / 2 + 1 +* +* Gather last/first rows of appropriate eigenblocks into center of Z +* + PTR = 1 +* +* Determine location of lowest level subproblem in the full storage +* scheme +* + CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these square +* roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) ) + DO 10 K = 1, MID - BSIZ1 - 1 + Z( K ) = ZERO + 10 CONTINUE + CALL DCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1, + $ Z( MID-BSIZ1 ), 1 ) + CALL DCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 ) + DO 20 K = MID + BSIZ2, N + Z( K ) = ZERO + 20 CONTINUE +* +* Loop through remaining levels 1 -> CURLVL applying the Givens +* rotations and permutation and then multiplying the center matrices +* against the current Z. +* + PTR = 2**TLVLS + 1 + DO 70 K = 1, CURLVL - 1 + CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1 + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + ZPTR1 = MID - PSIZ1 +* +* Apply Givens at CURR and CURR+1 +* + DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1 + CALL DROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1, + $ Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 30 CONTINUE + DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1 + CALL DROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1, + $ Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 40 CONTINUE + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + DO 50 I = 0, PSIZ1 - 1 + ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 ) + 50 CONTINUE + DO 60 I = 0, PSIZ2 - 1 + ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 ) + 60 CONTINUE +* +* Multiply Blocks at CURR and CURR+1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these +* square roots. +* + BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+ + $ 1 ) ) ) ) + IF( BSIZ1.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ), + $ BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 ) + END IF + CALL DCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ), + $ 1 ) + IF( BSIZ2.GT.0 ) THEN + CALL DGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ), + $ BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 ) + END IF + CALL DCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1, + $ Z( MID+BSIZ2 ), 1 ) +* + PTR = PTR + 2**( TLVLS-K ) + 70 CONTINUE +* + RETURN +* +* End of DLAEDA +* + END diff --git a/dspl/liblapack/SRC/dlaein.f b/dspl/liblapack/SRC/dlaein.f new file mode 100644 index 0000000..d35e186 --- /dev/null +++ b/dspl/liblapack/SRC/dlaein.f @@ -0,0 +1,632 @@ +*> \brief \b DLAEIN computes a specified right or left eigenvector of an upper Hessenberg matrix by inverse iteration. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, +* LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL NOINIT, RIGHTV +* INTEGER INFO, LDB, LDH, N +* DOUBLE PRECISION BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEIN uses inverse iteration to find a right or left eigenvector +*> corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg +*> matrix H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RIGHTV +*> \verbatim +*> RIGHTV is LOGICAL +*> = .TRUE. : compute right eigenvector; +*> = .FALSE.: compute left eigenvector. +*> \endverbatim +*> +*> \param[in] NOINIT +*> \verbatim +*> NOINIT is LOGICAL +*> = .TRUE. : no initial vector supplied in (VR,VI). +*> = .FALSE.: initial vector supplied in (VR,VI). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in] WR +*> \verbatim +*> WR is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is DOUBLE PRECISION +*> The real and imaginary parts of the eigenvalue of H whose +*> corresponding right or left eigenvector is to be computed. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] VI +*> \verbatim +*> VI is DOUBLE PRECISION array, dimension (N) +*> On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain +*> a real starting vector for inverse iteration using the real +*> eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI +*> must contain the real and imaginary parts of a complex +*> starting vector for inverse iteration using the complex +*> eigenvalue (WR,WI); otherwise VR and VI need not be set. +*> On exit, if WI = 0.0 (real eigenvalue), VR contains the +*> computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), +*> VR and VI contain the real and imaginary parts of the +*> computed complex eigenvector. The eigenvector is normalized +*> so that the component of largest magnitude has magnitude 1; +*> here the magnitude of a complex number (x,y) is taken to be +*> |x| + |y|. +*> VI is not referenced if WI = 0.0. +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= N+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[in] EPS3 +*> \verbatim +*> EPS3 is DOUBLE PRECISION +*> A small machine-dependent value which is used to perturb +*> close eigenvalues, and to replace zero pivots. +*> \endverbatim +*> +*> \param[in] SMLNUM +*> \verbatim +*> SMLNUM is DOUBLE PRECISION +*> A machine-dependent value close to the underflow threshold. +*> \endverbatim +*> +*> \param[in] BIGNUM +*> \verbatim +*> BIGNUM is DOUBLE PRECISION +*> A machine-dependent value close to the overflow threshold. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: inverse iteration did not converge; VR is set to the +*> last iterate, and so is VI if WI.ne.0.0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, + $ LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + DOUBLE PRECISION BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TENTH + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TENTH = 1.0D-1 ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, I1, I2, I3, IERR, ITS, J + DOUBLE PRECISION ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML, + $ REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W, + $ W1, X, XI, XR, Y +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DLAPY2, DNRM2 + EXTERNAL IDAMAX, DASUM, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLADIV, DLATRS, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( DBLE( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - (WR,WI)*I (except that the subdiagonal elements and +* the imaginary parts of the diagonal elements are not stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - WR + 20 CONTINUE +* + IF( WI.EQ.ZERO ) THEN +* +* Real eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 30 I = 1, N + VR( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = DNRM2( N, VR, 1 ) + CALL DSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR, + $ 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = B( I, I ) / EI + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = EI / B( I, I ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = B( J, J ) / EJ + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = EJ / B( J, J ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'T' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U**T*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL DLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, + $ VR, SCALE, WORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = DASUM( N, VR, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + TEMP = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + DO 100 I = 2, N + VR( I ) = TEMP + 100 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = IDAMAX( N, VR, 1 ) + CALL DSCAL( N, ONE / ABS( VR( I ) ), VR, 1 ) + ELSE +* +* Complex eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 130 I = 1, N + VR( I ) = EPS3 + VI( I ) = ZERO + 130 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + NORM = DLAPY2( DNRM2( N, VR, 1 ), DNRM2( N, VI, 1 ) ) + REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML ) + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( 2, 1 ) = -WI + DO 140 I = 2, N + B( I+1, 1 ) = ZERO + 140 CONTINUE +* + DO 170 I = 1, N - 1 + ABSBII = DLAPY2( B( I, I ), B( I+1, I ) ) + EI = H( I+1, I ) + IF( ABSBII.LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + XR = B( I, I ) / EI + XI = B( I+1, I ) / EI + B( I, I ) = EI + B( I+1, I ) = ZERO + DO 150 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - XR*TEMP + B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 150 CONTINUE + B( I+2, I ) = -WI + B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI + B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI + ELSE +* +* Eliminate without interchanging rows. +* + IF( ABSBII.EQ.ZERO ) THEN + B( I, I ) = EPS3 + B( I+1, I ) = ZERO + ABSBII = EPS3 + END IF + EI = ( EI / ABSBII ) / ABSBII + XR = B( I, I )*EI + XI = -B( I+1, I )*EI + DO 160 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J ) + 160 CONTINUE + B( I+2, I+1 ) = B( I+2, I+1 ) - WI + END IF +* +* Compute 1-norm of offdiagonal elements of i-th row. +* + WORK( I ) = DASUM( N-I, B( I, I+1 ), LDB ) + + $ DASUM( N-I, B( I+2, I ), 1 ) + 170 CONTINUE + IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 + WORK( N ) = ZERO +* + I1 = N + I2 = 1 + I3 = -1 + ELSE +* +* UL decomposition with partial pivoting of conjg(B), +* replacing zero pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( N+1, N ) = WI + DO 180 J = 1, N - 1 + B( N+1, J ) = ZERO + 180 CONTINUE +* + DO 210 J = N, 2, -1 + EJ = H( J, J-1 ) + ABSBJJ = DLAPY2( B( J, J ), B( J+1, J ) ) + IF( ABSBJJ.LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate +* + XR = B( J, J ) / EJ + XI = B( J+1, J ) / EJ + B( J, J ) = EJ + B( J+1, J ) = ZERO + DO 190 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - XR*TEMP + B( J, I ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 190 CONTINUE + B( J+1, J-1 ) = WI + B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI + B( J, J-1 ) = B( J, J-1 ) - XR*WI + ELSE +* +* Eliminate without interchange. +* + IF( ABSBJJ.EQ.ZERO ) THEN + B( J, J ) = EPS3 + B( J+1, J ) = ZERO + ABSBJJ = EPS3 + END IF + EJ = ( EJ / ABSBJJ ) / ABSBJJ + XR = B( J, J )*EJ + XI = -B( J+1, J )*EJ + DO 200 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J ) + 200 CONTINUE + B( J, J-1 ) = B( J, J-1 ) + WI + END IF +* +* Compute 1-norm of offdiagonal elements of j-th column. +* + WORK( J ) = DASUM( J-1, B( 1, J ), 1 ) + + $ DASUM( J-1, B( J+1, 1 ), LDB ) + 210 CONTINUE + IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 + WORK( 1 ) = ZERO +* + I1 = 1 + I2 = N + I3 = 1 + END IF +* + DO 270 ITS = 1, N + SCALE = ONE + VMAX = ONE + VCRIT = BIGNUM +* +* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, +* or U**T*(xr,xi) = scale*(vr,vi) for a left eigenvector, +* overwriting (xr,xi) on (vr,vi). +* + DO 250 I = I1, I2, I3 +* + IF( WORK( I ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + SCALE = SCALE*REC + VMAX = ONE + VCRIT = BIGNUM + END IF +* + XR = VR( I ) + XI = VI( I ) + IF( RIGHTV ) THEN + DO 220 J = I + 1, N + XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J ) + XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J ) + 220 CONTINUE + ELSE + DO 230 J = 1, I - 1 + XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J ) + XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J ) + 230 CONTINUE + END IF +* + W = ABS( B( I, I ) ) + ABS( B( I+1, I ) ) + IF( W.GT.SMLNUM ) THEN + IF( W.LT.ONE ) THEN + W1 = ABS( XR ) + ABS( XI ) + IF( W1.GT.W*BIGNUM ) THEN + REC = ONE / W1 + CALL DSCAL( N, REC, VR, 1 ) + CALL DSCAL( N, REC, VI, 1 ) + XR = VR( I ) + XI = VI( I ) + SCALE = SCALE*REC + VMAX = VMAX*REC + END IF + END IF +* +* Divide by diagonal element of B. +* + CALL DLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ), + $ VI( I ) ) + VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX ) + VCRIT = BIGNUM / VMAX + ELSE + DO 240 J = 1, N + VR( J ) = ZERO + VI( J ) = ZERO + 240 CONTINUE + VR( I ) = ONE + VI( I ) = ONE + SCALE = ZERO + VMAX = ONE + VCRIT = BIGNUM + END IF + 250 CONTINUE +* +* Test for sufficient growth in the norm of (VR,VI). +* + VNORM = DASUM( N, VR, 1 ) + DASUM( N, VI, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 280 +* +* Choose a new orthogonal starting vector and try again. +* + Y = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + VI( 1 ) = ZERO +* + DO 260 I = 2, N + VR( I ) = Y + VI( I ) = ZERO + 260 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 270 CONTINUE +* +* Failure to find eigenvector in N iterations +* + INFO = 1 +* + 280 CONTINUE +* +* Normalize eigenvector. +* + VNORM = ZERO + DO 290 I = 1, N + VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) ) + 290 CONTINUE + CALL DSCAL( N, ONE / VNORM, VR, 1 ) + CALL DSCAL( N, ONE / VNORM, VI, 1 ) +* + END IF +* + RETURN +* +* End of DLAEIN +* + END diff --git a/dspl/liblapack/SRC/dlaev2.f b/dspl/liblapack/SRC/dlaev2.f new file mode 100644 index 0000000..4906f1a --- /dev/null +++ b/dspl/liblapack/SRC/dlaev2.f @@ -0,0 +1,238 @@ +*> \brief \b DLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +*> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +*> eigenvector for RT1, giving the decomposition +*> +*> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +*> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> The (1,2) element and the conjugate of the (2,1) element of +*> the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is DOUBLE PRECISION +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is DOUBLE PRECISION +*> The eigenvalue of smaller absolute value. +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is DOUBLE PRECISION +*> The vector (CS1, SN1) is a unit right eigenvector for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> CS1 and SN1 are accurate to a few ulps barring over/underflow. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + DOUBLE PRECISION AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of DLAEV2 +* + END diff --git a/dspl/liblapack/SRC/dlaexc.f b/dspl/liblapack/SRC/dlaexc.f new file mode 100644 index 0000000..fc4f4a7 --- /dev/null +++ b/dspl/liblapack/SRC/dlaexc.f @@ -0,0 +1,436 @@ +*> \brief \b DLAEXC swaps adjacent diagonal blocks of a real upper quasi-triangular matrix in Schur canonical form, by an orthogonal similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ +* INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in +*> an upper quasi-triangular matrix T by an orthogonal similarity +*> transformation. +*> +*> T must be in Schur canonical form, that is, block upper triangular +*> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block +*> has its diagonal elemnts equal and its off-diagonal elements of +*> opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> = .TRUE. : accumulate the transformation in the matrix Q; +*> = .FALSE.: do not accumulate the transformation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> canonical form. +*> On exit, the updated matrix T, again in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if WANTQ is .TRUE., the orthogonal matrix Q. +*> On exit, if WANTQ is .TRUE., the updated matrix Q. +*> If WANTQ is .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index of the first row of the first block T11. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The order of the first block T11. N1 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> The order of the second block T22. N2 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: the transformed matrix T would be too far from Schur +*> form; the blocks are not swapped and T and Q are +*> unchanged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ + INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 1.0D+1 ) + INTEGER LDD, LDX + PARAMETER ( LDD = 4, LDX = 2 ) +* .. +* .. Local Scalars .. + INTEGER IERR, J2, J3, J4, K, ND + DOUBLE PRECISION CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22, + $ T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2, + $ WR1, WR2, XNORM +* .. +* .. Local Arrays .. + DOUBLE PRECISION D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ), + $ X( LDX, 2 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLANV2, DLARFG, DLARFX, DLARTG, DLASY2, + $ DROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN + IF( J1+N1.GT.N ) + $ RETURN +* + J2 = J1 + 1 + J3 = J1 + 2 + J4 = J1 + 3 +* + IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + T11 = T( J1, J1 ) + T22 = T( J2, J2 ) +* +* Determine the transformation to perform the interchange. +* + CALL DLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( J3.LE.N ) + $ CALL DROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS, + $ SN ) + CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) +* + T( J1, J1 ) = T22 + T( J2, J2 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + ELSE +* +* Swapping involves at least one 2-by-2 block. +* +* Copy the diagonal block of order N1+N2 to the local array D +* and compute its norm. +* + ND = N1 + N2 + CALL DLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD ) + DNORM = DLANGE( 'Max', ND, ND, D, LDD, WORK ) +* +* Compute machine-dependent threshold for test for accepting +* swap. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* +* Solve T11*X - X*T22 = scale*T12 for X. +* + CALL DLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD, + $ D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X, + $ LDX, XNORM, IERR ) +* +* Swap the adjacent diagonal blocks. +* + K = N1 + N1 + N2 - 3 + GO TO ( 10, 20, 30 )K +* + 10 CONTINUE +* +* N1 = 1, N2 = 2: generate elementary reflector H so that: +* +* ( scale, X11, X12 ) H = ( 0, 0, * ) +* + U( 1 ) = SCALE + U( 2 ) = X( 1, 1 ) + U( 3 ) = X( 1, 2 ) + CALL DLARFG( 3, U( 3 ), U, 1, TAU ) + U( 3 ) = ONE + T11 = T( J1, J1 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3, + $ 3 )-T11 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J3, J3 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 20 CONTINUE +* +* N1 = 2, N2 = 1: generate elementary reflector H so that: +* +* H ( -X11 ) = ( * ) +* ( -X21 ) = ( 0 ) +* ( scale ) = ( 0 ) +* + U( 1 ) = -X( 1, 1 ) + U( 2 ) = -X( 2, 1 ) + U( 3 ) = SCALE + CALL DLARFG( 3, U( 1 ), U( 2 ), 1, TAU ) + U( 1 ) = ONE + T33 = T( J3, J3 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1, + $ 1 )-T33 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK ) + CALL DLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK ) +* + T( J1, J1 ) = T33 + T( J2, J1 ) = ZERO + T( J3, J1 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 30 CONTINUE +* +* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so +* that: +* +* H(2) H(1) ( -X11 -X12 ) = ( * * ) +* ( -X21 -X22 ) ( 0 * ) +* ( scale 0 ) ( 0 0 ) +* ( 0 scale ) ( 0 0 ) +* + U1( 1 ) = -X( 1, 1 ) + U1( 2 ) = -X( 2, 1 ) + U1( 3 ) = SCALE + CALL DLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 ) + U1( 1 ) = ONE +* + TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) ) + U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 ) + U2( 2 ) = -TEMP*U1( 3 ) + U2( 3 ) = SCALE + CALL DLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 ) + U2( 1 ) = ONE +* +* Perform swap provisionally on diagonal block in D. +* + CALL DLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK ) + CALL DLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK ) + CALL DLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK ) + CALL DLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ), + $ ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL DLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK ) + CALL DLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK ) + CALL DLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J4, J1 ) = ZERO + T( J4, J2 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL DLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK ) + CALL DLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK ) + END IF +* + 40 CONTINUE +* + IF( N2.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T11 +* + CALL DLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ), + $ T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN ) + CALL DROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT, + $ CS, SN ) + CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + IF( N1.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T22 +* + J3 = J1 + N2 + J4 = J3 + 1 + CALL DLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ), + $ T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN ) + IF( J3+2.LE.N ) + $ CALL DROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ), + $ LDT, CS, SN ) + CALL DROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN ) + END IF +* + END IF + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 50 CONTINUE + INFO = 1 + RETURN +* +* End of DLAEXC +* + END diff --git a/dspl/liblapack/SRC/dlag2.f b/dspl/liblapack/SRC/dlag2.f new file mode 100644 index 0000000..7f123b2 --- /dev/null +++ b/dspl/liblapack/SRC/dlag2.f @@ -0,0 +1,379 @@ +*> \brief \b DLAG2 computes the eigenvalues of a 2-by-2 generalized eigenvalue problem, with scaling as necessary to avoid over-/underflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAG2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, +* WR2, WI ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB +* DOUBLE PRECISION SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue +*> problem A - w B, with scaling as necessary to avoid over-/underflow. +*> +*> The scaling factor "s" results in a modified eigenvalue equation +*> +*> s A - w B +*> +*> where s is a non-negative scaling factor chosen so that w, w B, +*> and s A do not overflow and, if possible, do not underflow, either. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, 2) +*> On entry, the 2 x 2 matrix A. It is assumed that its 1-norm +*> is less than 1/SAFMIN. Entries less than +*> sqrt(SAFMIN)*norm(A) are subject to being treated as zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= 2. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, 2) +*> On entry, the 2 x 2 upper triangular matrix B. It is +*> assumed that the one-norm of B is less than 1/SAFMIN. The +*> diagonals should be at least sqrt(SAFMIN) times the largest +*> element of B (in absolute value); if a diagonal is smaller +*> than that, then +/- sqrt(SAFMIN) will be used instead of +*> that diagonal. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= 2. +*> \endverbatim +*> +*> \param[in] SAFMIN +*> \verbatim +*> SAFMIN is DOUBLE PRECISION +*> The smallest positive number s.t. 1/SAFMIN does not +*> overflow. (This should always be DLAMCH('S') -- it is an +*> argument in order to avoid having to call DLAMCH frequently.) +*> \endverbatim +*> +*> \param[out] SCALE1 +*> \verbatim +*> SCALE1 is DOUBLE PRECISION +*> A scaling factor used to avoid over-/underflow in the +*> eigenvalue equation which defines the first eigenvalue. If +*> the eigenvalues are complex, then the eigenvalues are +*> ( WR1 +/- WI i ) / SCALE1 (which may lie outside the +*> exponent range of the machine), SCALE1=SCALE2, and SCALE1 +*> will always be positive. If the eigenvalues are real, then +*> the first (real) eigenvalue is WR1 / SCALE1 , but this may +*> overflow or underflow, and in fact, SCALE1 may be zero or +*> less than the underflow threshold if the exact eigenvalue +*> is sufficiently large. +*> \endverbatim +*> +*> \param[out] SCALE2 +*> \verbatim +*> SCALE2 is DOUBLE PRECISION +*> A scaling factor used to avoid over-/underflow in the +*> eigenvalue equation which defines the second eigenvalue. If +*> the eigenvalues are complex, then SCALE2=SCALE1. If the +*> eigenvalues are real, then the second (real) eigenvalue is +*> WR2 / SCALE2 , but this may overflow or underflow, and in +*> fact, SCALE2 may be zero or less than the underflow +*> threshold if the exact eigenvalue is sufficiently large. +*> \endverbatim +*> +*> \param[out] WR1 +*> \verbatim +*> WR1 is DOUBLE PRECISION +*> If the eigenvalue is real, then WR1 is SCALE1 times the +*> eigenvalue closest to the (2,2) element of A B**(-1). If the +*> eigenvalue is complex, then WR1=WR2 is SCALE1 times the real +*> part of the eigenvalues. +*> \endverbatim +*> +*> \param[out] WR2 +*> \verbatim +*> WR2 is DOUBLE PRECISION +*> If the eigenvalue is real, then WR2 is SCALE2 times the +*> other eigenvalue. If the eigenvalue is complex, then +*> WR1=WR2 is SCALE1 times the real part of the eigenvalues. +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION +*> If the eigenvalue is real, then WI is zero. If the +*> eigenvalue is complex, then WI is SCALE1 times the imaginary +*> part of the eigenvalues. WI will always be non-negative. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, + $ WR2, WI ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + DOUBLE PRECISION SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = ONE / TWO ) + DOUBLE PRECISION FUZZY1 + PARAMETER ( FUZZY1 = ONE+1.0D-5 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A11, A12, A21, A22, ABI22, ANORM, AS11, AS12, + $ AS22, ASCALE, B11, B12, B22, BINV11, BINV22, + $ BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5, + $ DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2, + $ SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET, + $ WSCALE, WSIZE, WSMALL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + RTMIN = SQRT( SAFMIN ) + RTMAX = ONE / RTMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A11 = ASCALE*A( 1, 1 ) + A21 = ASCALE*A( 2, 1 ) + A12 = ASCALE*A( 1, 2 ) + A22 = ASCALE*A( 2, 2 ) +* +* Perturb B if necessary to insure non-singularity +* + B11 = B( 1, 1 ) + B12 = B( 1, 2 ) + B22 = B( 2, 2 ) + BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN ) + IF( ABS( B11 ).LT.BMIN ) + $ B11 = SIGN( BMIN, B11 ) + IF( ABS( B22 ).LT.BMIN ) + $ B22 = SIGN( BMIN, B22 ) +* +* Scale B +* + BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN ) + BSIZE = MAX( ABS( B11 ), ABS( B22 ) ) + BSCALE = ONE / BSIZE + B11 = B11*BSCALE + B12 = B12*BSCALE + B22 = B22*BSCALE +* +* Compute larger eigenvalue by method described by C. van Loan +* +* ( AS is A shifted by -SHIFT*B ) +* + BINV11 = ONE / B11 + BINV22 = ONE / B22 + S1 = A11*BINV11 + S2 = A22*BINV22 + IF( ABS( S1 ).LE.ABS( S2 ) ) THEN + AS12 = A12 - S1*B12 + AS22 = A22 - S1*B22 + SS = A21*( BINV11*BINV22 ) + ABI22 = AS22*BINV22 - SS*B12 + PP = HALF*ABI22 + SHIFT = S1 + ELSE + AS12 = A12 - S2*B12 + AS11 = A11 - S2*B11 + SS = A21*( BINV11*BINV22 ) + ABI22 = -SS*B12 + PP = HALF*( AS11*BINV11+ABI22 ) + SHIFT = S2 + END IF + QQ = SS*AS12 + IF( ABS( PP*RTMIN ).GE.ONE ) THEN + DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN + R = SQRT( ABS( DISCR ) )*RTMAX + ELSE + IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN + DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX + R = SQRT( ABS( DISCR ) )*RTMIN + ELSE + DISCR = PP**2 + QQ + R = SQRT( ABS( DISCR ) ) + END IF + END IF +* +* Note: the test of R in the following IF is to cover the case when +* DISCR is small and negative and is flushed to zero during +* the calculation of R. On machines which have a consistent +* flush-to-zero threshold and handle numbers above that +* threshold correctly, it would not be necessary. +* + IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN + SUM = PP + SIGN( R, PP ) + DIFF = PP - SIGN( R, PP ) + WBIG = SHIFT + SUM +* +* Compute smaller eigenvalue +* + WSMALL = SHIFT + DIFF + IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN + WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 ) + WSMALL = WDET / WBIG + END IF +* +* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) +* for WR1. +* + IF( PP.GT.ABI22 ) THEN + WR1 = MIN( WBIG, WSMALL ) + WR2 = MAX( WBIG, WSMALL ) + ELSE + WR1 = MAX( WBIG, WSMALL ) + WR2 = MIN( WBIG, WSMALL ) + END IF + WI = ZERO + ELSE +* +* Complex eigenvalues +* + WR1 = SHIFT + PP + WR2 = WR1 + WI = R + END IF +* +* Further scaling to avoid underflow and overflow in computing +* SCALE1 and overflow in computing w*B. +* +* This scale factor (WSCALE) is bounded from above using C1 and C2, +* and from below using C3 and C4. +* C1 implements the condition s A must never overflow. +* C2 implements the condition w B must never overflow. +* C3, with C2, +* implement the condition that s A - w B must never overflow. +* C4 implements the condition s should not underflow. +* C5 implements the condition max(s,|w|) should be at least 2. +* + C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) ) + C2 = SAFMIN*MAX( ONE, BNORM ) + C3 = BSIZE*SAFMIN + IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN + C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE ) + ELSE + C4 = ONE + END IF + IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN + C5 = MIN( ONE, ASCALE*BSIZE ) + ELSE + C5 = ONE + END IF +* +* Scale first eigenvalue +* + WABS = ABS( WR1 ) + ABS( WI ) + WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ), + $ MIN( C4, HALF*MAX( WABS, C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR1 = WR1*WSCALE + IF( WI.NE.ZERO ) THEN + WI = WI*WSCALE + WR2 = WR1 + SCALE2 = SCALE1 + END IF + ELSE + SCALE1 = ASCALE*BSIZE + SCALE2 = SCALE1 + END IF +* +* Scale second eigenvalue (if real) +* + IF( WI.EQ.ZERO ) THEN + WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ), + $ MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR2 = WR2*WSCALE + ELSE + SCALE2 = ASCALE*BSIZE + END IF + END IF +* +* End of DLAG2 +* + RETURN + END diff --git a/dspl/liblapack/SRC/dlag2s.f b/dspl/liblapack/SRC/dlag2s.f new file mode 100644 index 0000000..09e2ac7 --- /dev/null +++ b/dspl/liblapack/SRC/dlag2s.f @@ -0,0 +1,152 @@ +*> \brief \b DLAG2S converts a double precision matrix to a single precision matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAG2S + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAG2S( M, N, A, LDA, SA, LDSA, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. +* REAL SA( LDSA, * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAG2S converts a DOUBLE PRECISION matrix, SA, to a SINGLE +*> PRECISION matrix, A. +*> +*> RMAX is the overflow for the SINGLE PRECISION arithmetic +*> DLAG2S checks that all the entries of A are between -RMAX and +*> RMAX. If not the conversion is aborted and a flag is raised. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of lines of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SA +*> \verbatim +*> SA is REAL array, dimension (LDSA,N) +*> On exit, if INFO=0, the M-by-N coefficient matrix SA; if +*> INFO>0, the content of SA is unspecified. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> = 1: an entry of the matrix A is greater than the SINGLE +*> PRECISION overflow threshold, in this case, the content +*> of SA in exit is unspecified. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAG2S( M, N, A, LDA, SA, LDSA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. + REAL SA( LDSA, * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION RMAX +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + RMAX = SLAMCH( 'O' ) + DO 20 J = 1, N + DO 10 I = 1, M + IF( ( A( I, J ).LT.-RMAX ) .OR. ( A( I, J ).GT.RMAX ) ) THEN + INFO = 1 + GO TO 30 + END IF + SA( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + INFO = 0 + 30 CONTINUE + RETURN +* +* End of DLAG2S +* + END diff --git a/dspl/liblapack/SRC/dlags2.f b/dspl/liblapack/SRC/dlags2.f new file mode 100644 index 0000000..62a8334 --- /dev/null +++ b/dspl/liblapack/SRC/dlags2.f @@ -0,0 +1,362 @@ +*> \brief \b DLAGS2 computes 2-by-2 orthogonal matrices U, V, and Q, and applies them to matrices A and B such that the rows of the transformed A and B are parallel. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, +* SNV, CSQ, SNQ ) +* +* .. Scalar Arguments .. +* LOGICAL UPPER +* DOUBLE PRECISION A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, +* $ SNU, SNV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such +*> that if ( UPPER ) then +*> +*> U**T *A*Q = U**T *( A1 A2 )*Q = ( x 0 ) +*> ( 0 A3 ) ( x x ) +*> and +*> V**T*B*Q = V**T *( B1 B2 )*Q = ( x 0 ) +*> ( 0 B3 ) ( x x ) +*> +*> or if ( .NOT.UPPER ) then +*> +*> U**T *A*Q = U**T *( A1 0 )*Q = ( x x ) +*> ( A2 A3 ) ( 0 x ) +*> and +*> V**T*B*Q = V**T*( B1 0 )*Q = ( x x ) +*> ( B2 B3 ) ( 0 x ) +*> +*> The rows of the transformed A and B are parallel, where +*> +*> U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) +*> ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) +*> +*> Z**T denotes the transpose of Z. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPPER +*> \verbatim +*> UPPER is LOGICAL +*> = .TRUE.: the input matrices A and B are upper triangular. +*> = .FALSE.: the input matrices A and B are lower triangular. +*> \endverbatim +*> +*> \param[in] A1 +*> \verbatim +*> A1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] A2 +*> \verbatim +*> A2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] A3 +*> \verbatim +*> A3 is DOUBLE PRECISION +*> On entry, A1, A2 and A3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix A. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B2 +*> \verbatim +*> B2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B3 +*> \verbatim +*> B3 is DOUBLE PRECISION +*> On entry, B1, B2 and B3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix B. +*> \endverbatim +*> +*> \param[out] CSU +*> \verbatim +*> CSU is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNU +*> \verbatim +*> SNU is DOUBLE PRECISION +*> The desired orthogonal matrix U. +*> \endverbatim +*> +*> \param[out] CSV +*> \verbatim +*> CSV is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNV +*> \verbatim +*> SNV is DOUBLE PRECISION +*> The desired orthogonal matrix V. +*> \endverbatim +*> +*> \param[out] CSQ +*> \verbatim +*> CSQ is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNQ +*> \verbatim +*> SNQ is DOUBLE PRECISION +*> The desired orthogonal matrix Q. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL UPPER + DOUBLE PRECISION A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, + $ SNU, SNV +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, B, C, CSL, CSR, D, R, S1, S2, + $ SNL, SNR, UA11, UA11R, UA12, UA21, UA22, UA22R, + $ VB11, VB11R, VB12, VB21, VB22, VB22R +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U**T *A and V**T *B, +* and (1,2) element of |U|**T *|A| and |V|**T *|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + SNR*B3 +* + AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U**T *A and V**T *B +* + IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS( VB12 ) ) ) THEN + CALL DLARTG( -UA11R, UA12, CSQ, SNQ, R ) + ELSE + CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF +* + CSU = CSL + SNU = -SNL + CSV = CSR + SNV = -SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U**T *A and V**T *B, +* and (2,2) element of |U|**T *|A| and |V|**T *|B|. +* + UA21 = -SNL*A1 + UA22 = -SNL*A2 + CSL*A3 +* + VB21 = -SNR*B1 + VB22 = -SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U**T*A and V**T*B, and then swap. +* + IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN + IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 / + $ ( ABS( VB21 )+ABS( VB22 ) ) ) THEN + CALL DLARTG( -UA21, UA22, CSQ, SNQ, R ) + ELSE + CALL DLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = CSL + CSV = SNR + SNV = CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U**T *A and V**T *B, +* and (2,1) element of |U|**T *|A| and |V|**T *|B|. +* + UA21 = -SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 ) +* +* zero (2,1) elements of U**T *A and V**T *B. +* + IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN + IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS( VB21 )+ABS( VB22R ) ) ) THEN + CALL DLARTG( UA22R, UA21, CSQ, SNQ, R ) + ELSE + CALL DLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -SNR + CSV = CSL + SNV = -SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U**T *A and V**T *B, +* and (1,1) element of |U|**T *|A| and |V|**T *|B|. +* + UA11 = CSR*A1 + SNR*A2 + UA12 = SNR*A3 +* + VB11 = CSL*B1 + SNL*B2 + VB12 = SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 ) +* +* zero (1,1) elements of U**T*A and V**T*B, and then swap. +* + IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 / + $ ( ABS( VB11 )+ABS( VB12 ) ) ) THEN + CALL DLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL DLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF + ELSE + CALL DLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CSR + CSV = SNL + SNV = CSL +* + END IF +* + END IF +* + RETURN +* +* End of DLAGS2 +* + END diff --git a/dspl/liblapack/SRC/dlagtf.f b/dspl/liblapack/SRC/dlagtf.f new file mode 100644 index 0000000..4b257c6 --- /dev/null +++ b/dspl/liblapack/SRC/dlagtf.f @@ -0,0 +1,266 @@ +*> \brief \b DLAGTF computes an LU factorization of a matrix T-λI, where T is a general tridiagonal matrix, and λ a scalar, using partial pivoting with row interchanges. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAGTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* DOUBLE PRECISION LAMBDA, TOL +* .. +* .. Array Arguments .. +* INTEGER IN( * ) +* DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n +*> tridiagonal matrix and lambda is a scalar, as +*> +*> T - lambda*I = PLU, +*> +*> where P is a permutation matrix, L is a unit lower tridiagonal matrix +*> with at most one non-zero sub-diagonal elements per column and U is +*> an upper triangular matrix with at most two non-zero super-diagonal +*> elements per column. +*> +*> The factorization is obtained by Gaussian elimination with partial +*> pivoting and implicit row scaling. +*> +*> The parameter LAMBDA is included in the routine so that DLAGTF may +*> be used, in conjunction with DLAGTS, to obtain eigenvectors of T by +*> inverse iteration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (N) +*> On entry, A must contain the diagonal elements of T. +*> +*> On exit, A is overwritten by the n diagonal elements of the +*> upper triangular matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is DOUBLE PRECISION +*> On entry, the scalar lambda. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (N-1) +*> On entry, B must contain the (n-1) super-diagonal elements of +*> T. +*> +*> On exit, B is overwritten by the (n-1) super-diagonal +*> elements of the matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N-1) +*> On entry, C must contain the (n-1) sub-diagonal elements of +*> T. +*> +*> On exit, C is overwritten by the (n-1) sub-diagonal elements +*> of the matrix L of the factorization of T. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> On entry, a relative tolerance used to indicate whether or +*> not the matrix (T - lambda*I) is nearly singular. TOL should +*> normally be chose as approximately the largest relative error +*> in the elements of T. For example, if the elements of T are +*> correct to about 4 significant figures, then TOL should be +*> set to about 5*10**(-4). If TOL is supplied as less than eps, +*> where eps is the relative machine precision, then the value +*> eps is used in place of TOL. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N-2) +*> On exit, D is overwritten by the (n-2) second super-diagonal +*> elements of the matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[out] IN +*> \verbatim +*> IN is INTEGER array, dimension (N) +*> On exit, IN contains details of the permutation matrix P. If +*> an interchange occurred at the kth step of the elimination, +*> then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) +*> returns the smallest positive integer j such that +*> +*> abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, +*> +*> where norm( A(j) ) denotes the sum of the absolute values of +*> the jth row of the matrix A. If no such j exists then IN(n) +*> is returned as zero. If IN(n) is returned as positive, then a +*> diagonal element of U is small, indicating that +*> (T - lambda*I) is singular or nearly singular, +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> .lt. 0: if INFO = -k, the kth argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION LAMBDA, TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER K + DOUBLE PRECISION EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLAGTF', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + A( 1 ) = A( 1 ) - LAMBDA + IN( N ) = 0 + IF( N.EQ.1 ) THEN + IF( A( 1 ).EQ.ZERO ) + $ IN( 1 ) = 1 + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* + TL = MAX( TOL, EPS ) + SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) ) + DO 10 K = 1, N - 1 + A( K+1 ) = A( K+1 ) - LAMBDA + SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) ) + IF( K.LT.( N-1 ) ) + $ SCALE2 = SCALE2 + ABS( B( K+1 ) ) + IF( A( K ).EQ.ZERO ) THEN + PIV1 = ZERO + ELSE + PIV1 = ABS( A( K ) ) / SCALE1 + END IF + IF( C( K ).EQ.ZERO ) THEN + IN( K ) = 0 + PIV2 = ZERO + SCALE1 = SCALE2 + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + PIV2 = ABS( C( K ) ) / SCALE2 + IF( PIV2.LE.PIV1 ) THEN + IN( K ) = 0 + SCALE1 = SCALE2 + C( K ) = C( K ) / A( K ) + A( K+1 ) = A( K+1 ) - C( K )*B( K ) + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + IN( K ) = 1 + MULT = A( K ) / C( K ) + A( K ) = C( K ) + TEMP = A( K+1 ) + A( K+1 ) = B( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + D( K ) = B( K+1 ) + B( K+1 ) = -MULT*D( K ) + END IF + B( K ) = TEMP + C( K ) = MULT + END IF + END IF + IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = K + 10 CONTINUE + IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = N +* + RETURN +* +* End of DLAGTF +* + END diff --git a/dspl/liblapack/SRC/dlagtm.f b/dspl/liblapack/SRC/dlagtm.f new file mode 100644 index 0000000..bb330e8 --- /dev/null +++ b/dspl/liblapack/SRC/dlagtm.f @@ -0,0 +1,278 @@ +*> \brief \b DLAGTM performs a matrix-matrix product of the form C = αAB+βC, where A is a tridiagonal matrix, B and C are rectangular matrices, and α and β are scalars, which may be 0, 1, or -1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAGTM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER LDB, LDX, N, NRHS +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAGTM performs a matrix-vector product of the form +*> +*> B := alpha * A * X + beta * B +*> +*> where A is a tridiagonal matrix of order N, B and X are N by NRHS +*> matrices, and alpha and beta are real scalars, each of which may be +*> 0., 1., or -1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': No transpose, B := alpha * A * X + beta * B +*> = 'T': Transpose, B := alpha * A'* X + beta * B +*> = 'C': Conjugate transpose = Transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices X and B. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) sub-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of T. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) super-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> The N by NRHS matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(N,1). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> The scalar beta. BETA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix B. +*> On exit, B is overwritten by the matrix expression +*> B := alpha * A * X + beta * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(N,1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE +* +* Compute B := B + A**T*X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + ELSE +* +* Compute B := B - A**T*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + RETURN +* +* End of DLAGTM +* + END diff --git a/dspl/liblapack/SRC/dlagts.f b/dspl/liblapack/SRC/dlagts.f new file mode 100644 index 0000000..9260758 --- /dev/null +++ b/dspl/liblapack/SRC/dlagts.f @@ -0,0 +1,383 @@ +*> \brief \b DLAGTS solves the system of equations (T-λI)x = y or (T-λI)Tx = y,where T is a general tridiagonal matrix and λ a scalar, using the LU factorization computed by slagtf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAGTS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, JOB, N +* DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. +* INTEGER IN( * ) +* DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAGTS may be used to solve one of the systems of equations +*> +*> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, +*> +*> where T is an n by n tridiagonal matrix, for x, following the +*> factorization of (T - lambda*I) as +*> +*> (T - lambda*I) = P*L*U , +*> +*> by routine DLAGTF. The choice of equation to be solved is +*> controlled by the argument JOB, and in each case there is an option +*> to perturb zero or very small diagonal elements of U, this option +*> being intended for use in applications such as inverse iteration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> Specifies the job to be performed by DLAGTS as follows: +*> = 1: The equations (T - lambda*I)x = y are to be solved, +*> but diagonal elements of U are not to be perturbed. +*> = -1: The equations (T - lambda*I)x = y are to be solved +*> and, if overflow would otherwise occur, the diagonal +*> elements of U are to be perturbed. See argument TOL +*> below. +*> = 2: The equations (T - lambda*I)**Tx = y are to be solved, +*> but diagonal elements of U are not to be perturbed. +*> = -2: The equations (T - lambda*I)**Tx = y are to be solved +*> and, if overflow would otherwise occur, the diagonal +*> elements of U are to be perturbed. See argument TOL +*> below. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (N) +*> On entry, A must contain the diagonal elements of U as +*> returned from DLAGTF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (N-1) +*> On entry, B must contain the first super-diagonal elements of +*> U as returned from DLAGTF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N-1) +*> On entry, C must contain the sub-diagonal elements of L as +*> returned from DLAGTF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N-2) +*> On entry, D must contain the second super-diagonal elements +*> of U as returned from DLAGTF. +*> \endverbatim +*> +*> \param[in] IN +*> \verbatim +*> IN is INTEGER array, dimension (N) +*> On entry, IN must contain details of the matrix P as returned +*> from DLAGTF. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (N) +*> On entry, the right hand side vector y. +*> On exit, Y is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[in,out] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> On entry, with JOB .lt. 0, TOL should be the minimum +*> perturbation to be made to very small diagonal elements of U. +*> TOL should normally be chosen as about eps*norm(U), where eps +*> is the relative machine precision, but if TOL is supplied as +*> non-positive, then it is reset to eps*max( abs( u(i,j) ) ). +*> If JOB .gt. 0 then TOL is not referenced. +*> +*> On exit, TOL is changed as described above, only if TOL is +*> non-positive on entry. Otherwise TOL is unchanged. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> .lt. 0: if INFO = -i, the i-th argument had an illegal value +*> .gt. 0: overflow would occur when computing the INFO(th) +*> element of the solution vector x. This can only occur +*> when JOB is supplied as positive and either means +*> that a diagonal element of U is very small, or that +*> the elements of the right-hand side vector y are very +*> large. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, JOB, N + DOUBLE PRECISION TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + DOUBLE PRECISION A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER K + DOUBLE PRECISION ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAGTS', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + EPS = DLAMCH( 'Epsilon' ) + SFMIN = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SFMIN +* + IF( JOB.LT.0 ) THEN + IF( TOL.LE.ZERO ) THEN + TOL = ABS( A( 1 ) ) + IF( N.GT.1 ) + $ TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) ) + DO 10 K = 3, N + TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ), + $ ABS( D( K-2 ) ) ) + 10 CONTINUE + TOL = TOL*EPS + IF( TOL.EQ.ZERO ) + $ TOL = EPS + END IF + END IF +* + IF( ABS( JOB ).EQ.1 ) THEN + DO 20 K = 2, N + IF( IN( K-1 ).EQ.0 ) THEN + Y( K ) = Y( K ) - C( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 20 CONTINUE + IF( JOB.EQ.1 ) THEN + DO 30 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 30 CONTINUE + ELSE + DO 50 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 40 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + END IF + END IF + Y( K ) = TEMP / AK + 50 CONTINUE + END IF + ELSE +* +* Come to here if JOB = 2 or -2 +* + IF( JOB.EQ.2 ) THEN + DO 60 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 60 CONTINUE + ELSE + DO 80 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 70 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + END IF + END IF + Y( K ) = TEMP / AK + 80 CONTINUE + END IF +* + DO 90 K = N, 2, -1 + IF( IN( K-1 ).EQ.0 ) THEN + Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 90 CONTINUE + END IF +* +* End of DLAGTS +* + END diff --git a/dspl/liblapack/SRC/dlagv2.f b/dspl/liblapack/SRC/dlagv2.f new file mode 100644 index 0000000..16c6082 --- /dev/null +++ b/dspl/liblapack/SRC/dlagv2.f @@ -0,0 +1,374 @@ +*> \brief \b DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 matrix pencil (A,B) where B is upper triangular. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAGV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, +* CSR, SNR ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB +* DOUBLE PRECISION CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), +* $ B( LDB, * ), BETA( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAGV2 computes the Generalized Schur factorization of a real 2-by-2 +*> matrix pencil (A,B) where B is upper triangular. This routine +*> computes orthogonal (rotation) matrices given by CSL, SNL and CSR, +*> SNR such that +*> +*> 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 +*> types), then +*> +*> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +*> [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +*> +*> [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +*> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], +*> +*> 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, +*> then +*> +*> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +*> [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +*> +*> [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +*> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] +*> +*> where b11 >= b22 > 0. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, 2) +*> On entry, the 2 x 2 matrix A. +*> On exit, A is overwritten by the ``A-part'' of the +*> generalized Schur form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> THe leading dimension of the array A. LDA >= 2. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, 2) +*> On entry, the upper triangular 2 x 2 matrix B. +*> On exit, B is overwritten by the ``B-part'' of the +*> generalized Schur form. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> THe leading dimension of the array B. LDB >= 2. +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (2) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (2) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (2) +*> (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the +*> pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may +*> be zero. +*> \endverbatim +*> +*> \param[out] CSL +*> \verbatim +*> CSL is DOUBLE PRECISION +*> The cosine of the left rotation matrix. +*> \endverbatim +*> +*> \param[out] SNL +*> \verbatim +*> SNL is DOUBLE PRECISION +*> The sine of the left rotation matrix. +*> \endverbatim +*> +*> \param[out] CSR +*> \verbatim +*> CSR is DOUBLE PRECISION +*> The cosine of the right rotation matrix. +*> \endverbatim +*> +*> \param[out] SNR +*> \verbatim +*> SNR is DOUBLE PRECISION +*> The sine of the right rotation matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, + $ CSR, SNR ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + DOUBLE PRECISION CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), + $ B( LDB, * ), BETA( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ, + $ R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1, + $ WR2 +* .. +* .. External Subroutines .. + EXTERNAL DLAG2, DLARTG, DLASV2, DROT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'S' ) + ULP = DLAMCH( 'P' ) +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A( 1, 1 ) = ASCALE*A( 1, 1 ) + A( 1, 2 ) = ASCALE*A( 1, 2 ) + A( 2, 1 ) = ASCALE*A( 2, 1 ) + A( 2, 2 ) = ASCALE*A( 2, 2 ) +* +* Scale B +* + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ), + $ SAFMIN ) + BSCALE = ONE / BNORM + B( 1, 1 ) = BSCALE*B( 1, 1 ) + B( 1, 2 ) = BSCALE*B( 1, 2 ) + B( 2, 2 ) = BSCALE*B( 2, 2 ) +* +* Check if A can be deflated +* + IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN + CSL = ONE + SNL = ZERO + CSR = ONE + SNR = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + WI = ZERO +* +* Check if B is singular +* + ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN + CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) + CSR = ONE + SNR = ZERO + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + A( 2, 1 ) = ZERO + B( 1, 1 ) = ZERO + B( 2, 1 ) = ZERO + WI = ZERO +* + ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN + CALL DLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T ) + SNR = -SNR + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) + CSL = ONE + SNL = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + B( 2, 2 ) = ZERO + WI = ZERO +* + ELSE +* +* B is nonsingular, first compute the eigenvalues of (A,B) +* + CALL DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2, + $ WI ) +* + IF( WI.EQ.ZERO ) THEN +* +* two real eigenvalues, compute s*A-w*B +* + H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 ) + H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 ) + H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 ) +* + RR = DLAPY2( H1, H2 ) + QQ = DLAPY2( SCALE1*A( 2, 1 ), H3 ) +* + IF( RR.GT.QQ ) THEN +* +* find right rotation matrix to zero 1,1 element of +* (sA - wB) +* + CALL DLARTG( H2, H1, CSR, SNR, T ) +* + ELSE +* +* find right rotation matrix to zero 2,1 element of +* (sA - wB) +* + CALL DLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T ) +* + END IF +* + SNR = -SNR + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* +* compute inf norms of A and B +* + H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ), + $ ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) ) + H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) +* + IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN +* +* find left rotation matrix Q to zero out B(2,1) +* + CALL DLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R ) +* + ELSE +* +* find left rotation matrix Q to zero out A(2,1) +* + CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) +* + END IF +* + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) +* + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE +* +* a pair of complex conjugate eigenvalues +* first compute the SVD of the matrix B +* + CALL DLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR, + $ CSR, SNL, CSL ) +* +* Form (A,B) := Q(A,B)Z**T where Q is left rotation matrix and +* Z is right rotation matrix computed from DLASV2 +* + CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* + B( 2, 1 ) = ZERO + B( 1, 2 ) = ZERO +* + END IF +* + END IF +* +* Unscaling +* + A( 1, 1 ) = ANORM*A( 1, 1 ) + A( 2, 1 ) = ANORM*A( 2, 1 ) + A( 1, 2 ) = ANORM*A( 1, 2 ) + A( 2, 2 ) = ANORM*A( 2, 2 ) + B( 1, 1 ) = BNORM*B( 1, 1 ) + B( 2, 1 ) = BNORM*B( 2, 1 ) + B( 1, 2 ) = BNORM*B( 1, 2 ) + B( 2, 2 ) = BNORM*B( 2, 2 ) +* + IF( WI.EQ.ZERO ) THEN + ALPHAR( 1 ) = A( 1, 1 ) + ALPHAR( 2 ) = A( 2, 2 ) + ALPHAI( 1 ) = ZERO + ALPHAI( 2 ) = ZERO + BETA( 1 ) = B( 1, 1 ) + BETA( 2 ) = B( 2, 2 ) + ELSE + ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM + ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM + ALPHAR( 2 ) = ALPHAR( 1 ) + ALPHAI( 2 ) = -ALPHAI( 1 ) + BETA( 1 ) = ONE + BETA( 2 ) = ONE + END IF +* + RETURN +* +* End of DLAGV2 +* + END diff --git a/dspl/liblapack/SRC/dlahqr.f b/dspl/liblapack/SRC/dlahqr.f new file mode 100644 index 0000000..f7365d2 --- /dev/null +++ b/dspl/liblapack/SRC/dlahqr.f @@ -0,0 +1,613 @@ +*> \brief \b DLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix, using the double-shift/single-shift QR algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAHQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAHQR is an auxiliary routine called by DHSEQR to update the +*> eigenvalues and Schur decomposition already computed by DHSEQR, by +*> dealing with the Hessenberg submatrix in rows and columns ILO to +*> IHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper quasi-triangular in +*> rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless +*> ILO = 1). DLAHQR works primarily with the Hessenberg +*> submatrix in rows and columns ILO to IHI, but applies +*> transformations to all of H if WANTT is .TRUE.. +*> 1 <= ILO <= max(1,IHI); IHI <= N. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO is zero and if WANTT is .TRUE., H is upper +*> quasi-triangular in rows and columns ILO:IHI, with any +*> 2-by-2 diagonal blocks in standard form. If INFO is zero +*> and WANTT is .FALSE., the contents of H are unspecified on +*> exit. The output state of H if INFO is nonzero is given +*> below under the description of INFO. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues ILO to IHI are stored in the corresponding +*> elements of WR and WI. If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the +*> eigenvalues are stored in the same order as on the diagonal +*> of the Schur form returned in H, with WR(i) = H(i,i), and, if +*> H(i:i+1,i:i+1) is a 2-by-2 diagonal block, +*> WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> If WANTZ is .TRUE., on entry Z must contain the current +*> matrix Z of transformations accumulated by DHSEQR, and on +*> exit Z has been updated; transformations are applied only to +*> the submatrix Z(ILOZ:IHIZ,ILO:IHI). +*> If WANTZ is .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: If INFO = i, DLAHQR failed to compute all the +*> eigenvalues ILO to IHI in a total of 30 iterations +*> per eigenvalue; elements i+1:ihi of WR and WI +*> contain those eigenvalues which have been +*> successfully computed. +*> +*> If INFO .GT. 0 and WANTT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the +*> eigenvalues of the upper Hessenberg matrix rows +*> and columns ILO thorugh INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> (*) (initial value of H)*U = U*(final value of H) +*> where U is an orthognal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> (final value of Z) = (initial value of Z)*U +*> where U is the orthogonal matrix in (*) +*> (regardless of the value of WANTT.) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 02-96 Based on modifications by +*> David Day, Sandia National Laboratory, USA +*> +*> 12-04 Further modifications by +*> Ralph Byers, University of Kansas, USA +*> This is a modified version of DLAHQR from LAPACK version 3.0. +*> It is (1) more robust against overflow and underflow and +*> (2) adopts the more conservative Ahues & Tisseur stopping +*> criterion (LAWN 122, 1997). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* ========================================================= +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0, TWO = 2.0d0 ) + DOUBLE PRECISION DAT1, DAT2 + PARAMETER ( DAT1 = 3.0d0 / 4.0d0, DAT2 = -0.4375d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, AB, BA, BB, CS, DET, H11, H12, H21, H21S, + $ H22, RT1I, RT1R, RT2I, RT2R, RTDISC, S, SAFMAX, + $ SAFMIN, SMLNUM, SN, SUM, T1, T2, T3, TR, TST, + $ ULP, V2, V3 + INTEGER I, I1, I2, ITS, ITMAX, J, K, L, M, NH, NR, NZ +* .. +* .. Local Arrays .. + DOUBLE PRECISION V( 3 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLABAD, DLANV2, DLARFG, DROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITMAX is the total number of QR iterations allowed. +* + ITMAX = 30 * MAX( 10, NH ) +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1 or 2. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 20 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 160 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 or 2 splits off at the bottom because a +* subdiagonal element has become negligible. +* + DO 140 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 30 K = I, L + 1, -1 + IF( ABS( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 40 + TST = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( H( K-1, K-2 ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( H( K+1, K ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some cases. ==== + IF( ABS( H( K, K-1 ) ).LE.ULP*TST ) THEN + AB = MAX( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + BA = MIN( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + AA = MAX( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 or 2 has split off. +* + IF( L.GE.I-1 ) + $ GO TO 150 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 ) THEN +* +* Exceptional shift. +* + S = ABS( H( L+1, L ) ) + ABS( H( L+2, L+1 ) ) + H11 = DAT1*S + H( L, L ) + H12 = DAT2*S + H21 = S + H22 = H11 + ELSE IF( ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + H11 = DAT1*S + H( I, I ) + H12 = DAT2*S + H21 = S + H22 = H11 + ELSE +* +* Prepare to use Francis' double shift +* (i.e. 2nd degree generalized Rayleigh quotient) +* + H11 = H( I-1, I-1 ) + H21 = H( I, I-1 ) + H12 = H( I-1, I ) + H22 = H( I, I ) + END IF + S = ABS( H11 ) + ABS( H12 ) + ABS( H21 ) + ABS( H22 ) + IF( S.EQ.ZERO ) THEN + RT1R = ZERO + RT1I = ZERO + RT2R = ZERO + RT2I = ZERO + ELSE + H11 = H11 / S + H21 = H21 / S + H12 = H12 / S + H22 = H22 / S + TR = ( H11+H22 ) / TWO + DET = ( H11-TR )*( H22-TR ) - H12*H21 + RTDISC = SQRT( ABS( DET ) ) + IF( DET.GE.ZERO ) THEN +* +* ==== complex conjugate shifts ==== +* + RT1R = TR*S + RT2R = RT1R + RT1I = RTDISC*S + RT2I = -RT1I + ELSE +* +* ==== real shifts (use only one of them) ==== +* + RT1R = TR + RTDISC + RT2R = TR - RTDISC + IF( ABS( RT1R-H22 ).LE.ABS( RT2R-H22 ) ) THEN + RT1R = RT1R*S + RT2R = RT1R + ELSE + RT2R = RT2R*S + RT1R = RT2R + END IF + RT1I = ZERO + RT2I = ZERO + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 50 M = I - 2, L, -1 +* Determine the effect of starting the double-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. (The following uses scaling to avoid +* overflows and most underflows.) +* + H21S = H( M+1, M ) + S = ABS( H( M, M )-RT2R ) + ABS( RT2I ) + ABS( H21S ) + H21S = H( M+1, M ) / S + V( 1 ) = H21S*H( M, M+1 ) + ( H( M, M )-RT1R )* + $ ( ( H( M, M )-RT2R ) / S ) - RT1I*( RT2I / S ) + V( 2 ) = H21S*( H( M, M )+H( M+1, M+1 )-RT1R-RT2R ) + V( 3 ) = H21S*H( M+2, M+1 ) + S = ABS( V( 1 ) ) + ABS( V( 2 ) ) + ABS( V( 3 ) ) + V( 1 ) = V( 1 ) / S + V( 2 ) = V( 2 ) / S + V( 3 ) = V( 3 ) / S + IF( M.EQ.L ) + $ GO TO 60 + IF( ABS( H( M, M-1 ) )*( ABS( V( 2 ) )+ABS( V( 3 ) ) ).LE. + $ ULP*ABS( V( 1 ) )*( ABS( H( M-1, M-1 ) )+ABS( H( M, + $ M ) )+ABS( H( M+1, M+1 ) ) ) )GO TO 60 + 50 CONTINUE + 60 CONTINUE +* +* Double-shift QR step +* + DO 130 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( 3, I-K+1 ) + IF( K.GT.M ) + $ CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL DLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + IF( K.LT.I-1 ) + $ H( K+2, K-1 ) = ZERO + ELSE IF( M.GT.L ) THEN +* ==== Use the following instead of +* . H( K, K-1 ) = -H( K, K-1 ) to +* . avoid a bug when v(2) and v(3) +* . underflow. ==== + H( K, K-1 ) = H( K, K-1 )*( ONE-T1 ) + END IF + V2 = V( 2 ) + T2 = T1*V2 + IF( NR.EQ.3 ) THEN + V3 = V( 3 ) + T3 = T1*V3 +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 70 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + H( K+2, J ) = H( K+2, J ) - SUM*T3 + 70 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 80 J = I1, MIN( K+3, I ) + SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + H( J, K+2 ) = H( J, K+2 ) - SUM*T3 + 80 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 90 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3 + 90 CONTINUE + END IF + ELSE IF( NR.EQ.2 ) THEN +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 100 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + 100 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 110 J = I1, I + SUM = H( J, K ) + V2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + 110 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 120 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + 120 CONTINUE + END IF + END IF + 130 CONTINUE +* + 140 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 150 CONTINUE +* + IF( L.EQ.I ) THEN +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + WR( I ) = H( I, I ) + WI( I ) = ZERO + ELSE IF( L.EQ.I-1 ) THEN +* +* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. +* +* Transform the 2-by-2 submatrix to standard Schur form, +* and compute and store the eigenvalues. +* + CALL DLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), + $ H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ), + $ CS, SN ) +* + IF( WANTT ) THEN +* +* Apply the transformation to the rest of H. +* + IF( I2.GT.I ) + $ CALL DROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, + $ CS, SN ) + CALL DROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN ) + END IF + IF( WANTZ ) THEN +* +* Apply the transformation to Z. +* + CALL DROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN ) + END IF + END IF +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 20 +* + 160 CONTINUE + RETURN +* +* End of DLAHQR +* + END diff --git a/dspl/liblapack/SRC/dlahr2.f b/dspl/liblapack/SRC/dlahr2.f new file mode 100644 index 0000000..beb9795 --- /dev/null +++ b/dspl/liblapack/SRC/dlahr2.f @@ -0,0 +1,326 @@ +*> \brief \b DLAHR2 reduces the specified number of first columns of a general rectangular matrix A so that elements below the specified subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAHR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an orthogonal similarity transformation +*> Q**T * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. +*> +*> This is an auxiliary routine called by DGEHRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> K < N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**T) * (A - Y*V**T). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a a a a a ) +*> ( a a a a a ) +*> ( a a a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This subroutine is a slight modification of LAPACK-3.0's DLAHRD +*> incorporating improvements proposed by Quintana-Orti and Van de +*> Gejin. Note that the entries of A(1:K,2:NB) differ from those +*> returned by the original LAPACK-3.0's DLAHRD routine. (This +*> subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) +*> \endverbatim +* +*> \par References: +* ================ +*> +*> Gregorio Quintana-Orti and Robert van de Geijn, "Improving the +*> performance of reduction to Hessenberg form," ACM Transactions on +*> Mathematical Software, 32(2):180-194, June 2006. +*> +* ===================================================================== + SUBROUTINE DLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMM, DGEMV, DLACPY, + $ DLARFG, DSCAL, DTRMM, DTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V**T +* + CALL DGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) +* +* Apply I - V * T**T * V**T to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**T * b1 +* + CALL DCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL DTRMV( 'Lower', 'Transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**T * b2 +* + CALL DGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**T * w +* + CALL DTRMV( 'Upper', 'Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL DGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL DTRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL DAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL DLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL DGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL DGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL DSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL DSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL DTRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL DLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL DTRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL DGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL DTRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of DLAHR2 +* + END diff --git a/dspl/liblapack/SRC/dlaic1.f b/dspl/liblapack/SRC/dlaic1.f new file mode 100644 index 0000000..e9dc083 --- /dev/null +++ b/dspl/liblapack/SRC/dlaic1.f @@ -0,0 +1,367 @@ +*> \brief \b DLAIC1 applies one step of incremental condition estimation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAIC1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* .. Scalar Arguments .. +* INTEGER J, JOB +* DOUBLE PRECISION C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. +* DOUBLE PRECISION W( J ), X( J ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAIC1 applies one step of incremental condition estimation in +*> its simplest version: +*> +*> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +*> lower triangular matrix L, such that +*> twonorm(L*x) = sest +*> Then DLAIC1 computes sestpr, s, c such that +*> the vector +*> [ s*x ] +*> xhat = [ c ] +*> is an approximate singular vector of +*> [ L 0 ] +*> Lhat = [ w**T gamma ] +*> in the sense that +*> twonorm(Lhat*xhat) = sestpr. +*> +*> Depending on JOB, an estimate for the largest or smallest singular +*> value is computed. +*> +*> Note that [s c]**T and sestpr**2 is an eigenpair of the system +*> +*> diag(sest*sest, 0) + [alpha gamma] * [ alpha ] +*> [ gamma ] +*> +*> where alpha = x**T*w. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> = 1: an estimate for the largest singular value is computed. +*> = 2: an estimate for the smallest singular value is computed. +*> \endverbatim +*> +*> \param[in] J +*> \verbatim +*> J is INTEGER +*> Length of X and W +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (J) +*> The j-vector x. +*> \endverbatim +*> +*> \param[in] SEST +*> \verbatim +*> SEST is DOUBLE PRECISION +*> Estimated singular value of j by j matrix L +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (J) +*> The j-vector w. +*> \endverbatim +*> +*> \param[in] GAMMA +*> \verbatim +*> GAMMA is DOUBLE PRECISION +*> The diagonal element gamma. +*> \endverbatim +*> +*> \param[out] SESTPR +*> \verbatim +*> SESTPR is DOUBLE PRECISION +*> Estimated singular value of (j+1) by (j+1) matrix Lhat. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> Sine needed in forming xhat. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> Cosine needed in forming xhat. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER J, JOB + DOUBLE PRECISION C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. + DOUBLE PRECISION W( J ), X( J ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + DOUBLE PRECISION HALF, FOUR + PARAMETER ( HALF = 0.5D0, FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS, + $ NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL DDOT, DLAMCH +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Epsilon' ) + ALPHA = DDOT( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + S = SQRT( ONE+TMP*TMP ) + SESTPR = S2*S + C = ( GAMMA / S2 ) / S + S = SIGN( ONE, ALPHA ) / S + ELSE + TMP = S2 / S1 + C = SQRT( ONE+TMP*TMP ) + SESTPR = S1*C + S = ( ALPHA / S1 ) / C + C = SIGN( ONE, GAMMA ) / C + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -GAMMA + COSINE = ALPHA + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + C = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / C ) + S = -( GAMMA / S2 ) / C + C = SIGN( ONE, ALPHA ) / C + ELSE + TMP = S2 / S1 + S = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / S + C = ( ALPHA / S1 ) / S + S = -SIGN( ONE, GAMMA ) / S + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ), + $ ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ZETA1 / ( ONE-T ) + COSINE = -ZETA2 / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of DLAIC1 +* + END diff --git a/dspl/liblapack/SRC/dlaisnan.f b/dspl/liblapack/SRC/dlaisnan.f new file mode 100644 index 0000000..c2e87d8 --- /dev/null +++ b/dspl/liblapack/SRC/dlaisnan.f @@ -0,0 +1,91 @@ +*> \brief \b DLAISNAN tests input for NaN by comparing two arguments for inequality. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAISNAN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is not for general use. It exists solely to avoid +*> over-optimization in DISNAN. +*> +*> DLAISNAN checks for NaNs by comparing its two arguments for +*> inequality. NaN is the only floating-point value where NaN != NaN +*> returns .TRUE. To check for NaNs, pass the same variable as both +*> arguments. +*> +*> A compiler must assume that the two arguments are +*> not the same variable, and the test will not be optimized away. +*> Interprocedural or whole-program optimization may delete this +*> test. The ISNAN functions will be replaced by the correct +*> Fortran 03 intrinsic once the intrinsic is widely available. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIN1 +*> \verbatim +*> DIN1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] DIN2 +*> \verbatim +*> DIN2 is DOUBLE PRECISION +*> Two numbers to compare for inequality. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION DLAISNAN( DIN1, DIN2 ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION, INTENT(IN) :: DIN1, DIN2 +* .. +* +* ===================================================================== +* +* .. Executable Statements .. + DLAISNAN = (DIN1.NE.DIN2) + RETURN + END diff --git a/dspl/liblapack/SRC/dlaln2.f b/dspl/liblapack/SRC/dlaln2.f new file mode 100644 index 0000000..a094b73 --- /dev/null +++ b/dspl/liblapack/SRC/dlaln2.f @@ -0,0 +1,611 @@ +*> \brief \b DLALN2 solves a 1-by-1 or 2-by-2 linear system of equations of the specified form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLALN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, +* LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LTRANS +* INTEGER INFO, LDA, LDB, LDX, NA, NW +* DOUBLE PRECISION CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLALN2 solves a system of the form (ca A - w D ) X = s B +*> or (ca A**T - w D) X = s B with possible scaling ("s") and +*> perturbation of A. (A**T means A-transpose.) +*> +*> A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +*> real diagonal matrix, w is a real or complex value, and X and B are +*> NA x 1 matrices -- real if w is real, complex if w is complex. NA +*> may be 1 or 2. +*> +*> If w is complex, X and B are represented as NA x 2 matrices, +*> the first column of each being the real part and the second +*> being the imaginary part. +*> +*> "s" is a scaling factor (.LE. 1), computed by DLALN2, which is +*> so chosen that X can be computed without overflow. X is further +*> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +*> than overflow. +*> +*> If both singular values of (ca A - w D) are less than SMIN, +*> SMIN*identity will be used instead of (ca A - w D). If only one +*> singular value is less than SMIN, one element of (ca A - w D) will be +*> perturbed enough to make the smallest singular value roughly SMIN. +*> If both singular values are at least SMIN, (ca A - w D) will not be +*> perturbed. In any case, the perturbation will be at most some small +*> multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +*> are computed by infinity-norm approximations, and thus will only be +*> correct to a factor of 2 or so. +*> +*> Note: all input quantities are assumed to be smaller than overflow +*> by a reasonable factor. (See BIGNUM.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRANS +*> \verbatim +*> LTRANS is LOGICAL +*> =.TRUE.: A-transpose will be used. +*> =.FALSE.: A will be used (not transposed.) +*> \endverbatim +*> +*> \param[in] NA +*> \verbatim +*> NA is INTEGER +*> The size of the matrix A. It may (only) be 1 or 2. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> 1 if "w" is real, 2 if "w" is complex. It may only be 1 +*> or 2. +*> \endverbatim +*> +*> \param[in] SMIN +*> \verbatim +*> SMIN is DOUBLE PRECISION +*> The desired lower bound on the singular values of A. This +*> should be a safe distance away from underflow or overflow, +*> say, between (underflow/machine precision) and (machine +*> precision * overflow ). (See BIGNUM and ULP.) +*> \endverbatim +*> +*> \param[in] CA +*> \verbatim +*> CA is DOUBLE PRECISION +*> The coefficient c, which A is multiplied by. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,NA) +*> The NA x NA matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least NA. +*> \endverbatim +*> +*> \param[in] D1 +*> \verbatim +*> D1 is DOUBLE PRECISION +*> The 1,1 element in the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] D2 +*> \verbatim +*> D2 is DOUBLE PRECISION +*> The 2,2 element in the diagonal matrix D. Not used if NA=1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NW) +*> The NA x NW matrix B (right-hand side). If NW=2 ("w" is +*> complex), column 1 contains the real part of B and column 2 +*> contains the imaginary part. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. It must be at least NA. +*> \endverbatim +*> +*> \param[in] WR +*> \verbatim +*> WR is DOUBLE PRECISION +*> The real part of the scalar "w". +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is DOUBLE PRECISION +*> The imaginary part of the scalar "w". Not used if NW=1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NW) +*> The NA x NW matrix X (unknowns), as computed by DLALN2. +*> If NW=2 ("w" is complex), on exit, column 1 will contain +*> the real part of X and column 2 will contain the imaginary +*> part. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of X. It must be at least NA. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scale factor that B must be multiplied by to insure +*> that overflow does not occur when computing X. Thus, +*> (ca A - w D) X will be SCALE*B, not B (ignoring +*> perturbations of A.) It will be at most 1. +*> \endverbatim +*> +*> \param[out] XNORM +*> \verbatim +*> XNORM is DOUBLE PRECISION +*> The infinity-norm of X, when X is regarded as an NA x NW +*> real matrix. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> An error flag. It will be set to zero if no error occurs, +*> a negative number if an argument is in error, or a positive +*> number if ca A - w D had to be perturbed. +*> The possible values are: +*> = 0: No error occurred, and (ca A - w D) did not have to be +*> perturbed. +*> = 1: (ca A - w D) had to be perturbed to make its smallest +*> (or only) singular value greater than SMIN. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, + $ LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL LTRANS + INTEGER INFO, LDA, LDB, LDX, NA, NW + DOUBLE PRECISION CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + INTEGER ICMAX, J + DOUBLE PRECISION BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21, + $ CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21, + $ LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R, + $ UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S, + $ UR22, XI1, XI2, XR1, XR2 +* .. +* .. Local Arrays .. + LOGICAL RSWAP( 4 ), ZSWAP( 4 ) + INTEGER IPIVOT( 4, 4 ) + DOUBLE PRECISION CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Equivalences .. + EQUIVALENCE ( CI( 1, 1 ), CIV( 1 ) ), + $ ( CR( 1, 1 ), CRV( 1 ) ) +* .. +* .. Data statements .. + DATA ZSWAP / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. / + DATA IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, + $ 3, 2, 1 / +* .. +* .. Executable Statements .. +* +* Compute BIGNUM +* + SMLNUM = TWO*DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + SMINI = MAX( SMIN, SMLNUM ) +* +* Don't check for input errors +* + INFO = 0 +* +* Standard Initializations +* + SCALE = ONE +* + IF( NA.EQ.1 ) THEN +* +* 1 x 1 (i.e., scalar) system C X = B +* + IF( NW.EQ.1 ) THEN +* +* Real 1x1 system. +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CNORM = ABS( CSR ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR + XNORM = ABS( X( 1, 1 ) ) + ELSE +* +* Complex 1x1 system (w is complex) +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CSI = -WI*D1 + CNORM = ABS( CSR ) + ABS( CSI ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CSI = ZERO + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + CALL DLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI, + $ X( 1, 1 ), X( 1, 2 ) ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + END IF +* + ELSE +* +* 2x2 System +* +* Compute the real part of C = ca A - w D (or ca A**T - w D ) +* + CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1 + CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2 + IF( LTRANS ) THEN + CR( 1, 2 ) = CA*A( 2, 1 ) + CR( 2, 1 ) = CA*A( 1, 2 ) + ELSE + CR( 2, 1 ) = CA*A( 2, 1 ) + CR( 1, 2 ) = CA*A( 1, 2 ) + END IF +* + IF( NW.EQ.1 ) THEN +* +* Real 2x2 system (w is real) +* +* Find the largest element in C +* + CMAX = ZERO + ICMAX = 0 +* + DO 10 J = 1, 4 + IF( ABS( CRV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ICMAX = J + END IF + 10 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + UR11R = ONE / UR11 + LR21 = UR11R*CR21 + UR22 = CR22 - UR12*LR21 +* +* If smaller pivot < SMINI, use SMINI +* + IF( ABS( UR22 ).LT.SMINI ) THEN + UR22 = SMINI + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR1 = B( 2, 1 ) + BR2 = B( 1, 1 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + END IF + BR2 = BR2 - LR21*BR1 + BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) ) + IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN + IF( BBND.GE.BIGNUM*ABS( UR22 ) ) + $ SCALE = ONE / BBND + END IF +* + XR2 = ( BR2*SCALE ) / UR22 + XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 ) + IF( ZSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + END IF + XNORM = MAX( ABS( XR1 ), ABS( XR2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + ELSE +* +* Complex 2x2 system (w is complex) +* +* Find the largest element in C +* + CI( 1, 1 ) = -WI*D1 + CI( 2, 1 ) = ZERO + CI( 1, 2 ) = ZERO + CI( 2, 2 ) = -WI*D2 + CMAX = ZERO + ICMAX = 0 +* + DO 20 J = 1, 4 + IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) ) + ICMAX = J + END IF + 20 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + X( 1, 2 ) = TEMP*B( 1, 2 ) + X( 2, 2 ) = TEMP*B( 2, 2 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + UI11 = CIV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + CI21 = CIV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + UI12 = CIV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + CI22 = CIV( IPIVOT( 4, ICMAX ) ) + IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN +* +* Code when off-diagonals of pivoted C are real +* + IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN + TEMP = UI11 / UR11 + UR11R = ONE / ( UR11*( ONE+TEMP**2 ) ) + UI11R = -TEMP*UR11R + ELSE + TEMP = UR11 / UI11 + UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) ) + UR11R = -TEMP*UI11R + END IF + LR21 = CR21*UR11R + LI21 = CR21*UI11R + UR12S = UR12*UR11R + UI12S = UR12*UI11R + UR22 = CR22 - UR12*LR21 + UI22 = CI22 - UR12*LI21 + ELSE +* +* Code when diagonals of pivoted C are real +* + UR11R = ONE / UR11 + UI11R = ZERO + LR21 = CR21*UR11R + LI21 = CI21*UR11R + UR12S = UR12*UR11R + UI12S = UI12*UR11R + UR22 = CR22 - UR12*LR21 + UI12*LI21 + UI22 = -UR12*LI21 - UI12*LR21 + END IF + U22ABS = ABS( UR22 ) + ABS( UI22 ) +* +* If smaller pivot < SMINI, use SMINI +* + IF( U22ABS.LT.SMINI ) THEN + UR22 = SMINI + UI22 = ZERO + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR2 = B( 1, 1 ) + BR1 = B( 2, 1 ) + BI2 = B( 1, 2 ) + BI1 = B( 2, 2 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + BI1 = B( 1, 2 ) + BI2 = B( 2, 2 ) + END IF + BR2 = BR2 - LR21*BR1 + LI21*BI1 + BI2 = BI2 - LI21*BR1 - LR21*BI1 + BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )* + $ ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ), + $ ABS( BR2 )+ABS( BI2 ) ) + IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN + IF( BBND.GE.BIGNUM*U22ABS ) THEN + SCALE = ONE / BBND + BR1 = SCALE*BR1 + BI1 = SCALE*BI1 + BR2 = SCALE*BR2 + BI2 = SCALE*BI2 + END IF + END IF +* + CALL DLADIV( BR2, BI2, UR22, UI22, XR2, XI2 ) + XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2 + XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2 + IF( ZSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + X( 1, 2 ) = XI2 + X( 2, 2 ) = XI1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + X( 1, 2 ) = XI1 + X( 2, 2 ) = XI2 + END IF + XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + X( 1, 2 ) = TEMP*X( 1, 2 ) + X( 2, 2 ) = TEMP*X( 2, 2 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + END IF + END IF +* + RETURN +* +* End of DLALN2 +* + END diff --git a/dspl/liblapack/SRC/dlals0.f b/dspl/liblapack/SRC/dlals0.f new file mode 100644 index 0000000..d4cff16 --- /dev/null +++ b/dspl/liblapack/SRC/dlals0.f @@ -0,0 +1,499 @@ +*> \brief \b DLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLALS0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, +* $ LDGNUM, NL, NR, NRHS, SQRE +* DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), PERM( * ) +* DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), DIFL( * ), +* $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), +* $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLALS0 applies back the multiplying factors of either the left or the +*> right singular vector matrix of a diagonal matrix appended by a row +*> to the right hand side matrix B in solving the least squares problem +*> using the divide-and-conquer SVD approach. +*> +*> For the left singular vector matrix, three types of orthogonal +*> matrices are involved: +*> +*> (1L) Givens rotations: the number of such rotations is GIVPTR; the +*> pairs of columns/rows they were applied to are stored in GIVCOL; +*> and the C- and S-values of these rotations are stored in GIVNUM. +*> +*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +*> row, and for J=2:N, PERM(J)-th row of B is to be moved to the +*> J-th row. +*> +*> (3L) The left singular vector matrix of the remaining matrix. +*> +*> For the right singular vector matrix, four types of orthogonal +*> matrices are involved: +*> +*> (1R) The right singular vector matrix of the remaining matrix. +*> +*> (2R) If SQRE = 1, one extra Givens rotation to generate the right +*> null space. +*> +*> (3R) The inverse transformation of (2L). +*> +*> (4R) The inverse transformation of (1L). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Left singular vector matrix. +*> = 1: Right singular vector matrix. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. On output, B contains +*> the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB must be at least +*> max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS ) +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) applied +*> to the two blocks. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of rows/columns +*> involved in a Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of arrays DIFR, POLES and +*> GIVNUM, must be at least K. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> On entry, POLES(1:K, 1) contains the new singular +*> values obtained from solving the secular equation, and +*> POLES(1:K, 2) is an array containing the poles in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( K ). +*> On entry, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). +*> On entry, DIFR(I, 1) contains the distances between I-th +*> updated (undeflated) singular value and the I+1-th +*> (undeflated) old singular value. And DIFR(I, 2) is the +*> normalizing factor for the I-th right singular vector. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( K ) +*> Contain the components of the deflation-adjusted updating row +*> vector. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( K ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), DIFL( * ), + $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), + $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, M, N, NLP1 + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DROT, DSCAL, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL DCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL DCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL DCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL DSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 50 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + WORK( 1 ) = NEGONE + TEMP = DNRM2( K, WORK, 1 ) + CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) + CALL DLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 50 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL DLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL DCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 80 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 60 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 60 CONTINUE + DO 70 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 70 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) + 80 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL DCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL DROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL DLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL DCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL DCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 90 I = 2, N + CALL DCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 90 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 100 I = GIVPTR, 1, -1 + CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 100 CONTINUE + END IF +* + RETURN +* +* End of DLALS0 +* + END diff --git a/dspl/liblapack/SRC/dlalsa.f b/dspl/liblapack/SRC/dlalsa.f new file mode 100644 index 0000000..b643f11 --- /dev/null +++ b/dspl/liblapack/SRC/dlalsa.f @@ -0,0 +1,493 @@ +*> \brief \b DLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLALSA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, +* LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, +* GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, +* $ SMLSIZ +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), C( * ), +* $ DIFL( LDU, * ), DIFR( LDU, * ), +* $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), +* $ U( LDU, * ), VT( LDU, * ), WORK( * ), +* $ Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLALSA is an itermediate step in solving the least squares problem +*> by computing the SVD of the coefficient matrix in compact form (The +*> singular vectors are computed as products of simple orthorgonal +*> matrices.). +*> +*> If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector +*> matrix of an upper bidiagonal matrix to the right hand side; and if +*> ICOMPQ = 1, DLALSA applies the right singular vector matrix to the +*> right hand side. The singular vector matrices were generated in +*> compact form by DLALSA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether the left or the right singular vector +*> matrix is involved. +*> = 0: Left singular vector matrix +*> = 1: Right singular vector matrix +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row and column dimensions of the upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. +*> On output, B contains the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is DOUBLE PRECISION array, dimension ( LDBX, NRHS ) +*> On exit, the result of applying the left or right singular +*> vector matrix to B. +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). +*> On entry, U contains the left singular vector matrices of all +*> subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, +*> POLES, GIVNUM, and Z. +*> \endverbatim +*> +*> \param[in] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). +*> On entry, VT**T contains the right singular vector matrices of +*> all subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER array, dimension ( N ). +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ). +*> where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +*> distances between singular values on the I-th level and +*> singular values on the (I -1)-th level, and DIFR(*, 2 * I) +*> record the normalizing factors of the right singular vectors +*> matrices of subproblems on I-th level. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( LDU, NLVL ). +*> On entry, Z(1, I) contains the components of the deflation- +*> adjusted updating row vector for subproblems on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +*> singular values involved in the secular equations on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension ( N ). +*> On entry, GIVPTR( I ) records the number of Givens +*> rotations performed on the I-th problem on the computation +*> tree. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +*> On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +*> locations of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( LDGCOL, NLVL ). +*> On entry, PERM(*, I) records permutations done on the I-th +*> level of the computation tree. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +*> values of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> S( I ) contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION B( LDB, * ), BX( LDBX, * ), C( * ), + $ DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), + $ U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2, + $ ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL, + $ NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLALS0, DLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 50. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 50 + END IF +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 10 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 10 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 20 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 20 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 40 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 30 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 30 CONTINUE + 40 CONTINUE + GO TO 90 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 50 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 70 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 60 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 60 CONTINUE + 70 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 80 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 80 CONTINUE +* + 90 CONTINUE +* + RETURN +* +* End of DLALSA +* + END diff --git a/dspl/liblapack/SRC/dlalsd.f b/dspl/liblapack/SRC/dlalsd.f new file mode 100644 index 0000000..510e045 --- /dev/null +++ b/dspl/liblapack/SRC/dlalsd.f @@ -0,0 +1,523 @@ +*> \brief \b DLALSD uses the singular value decomposition of A to solve the least squares problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLALSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, +* RANK, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLALSD uses the singular value decomposition of A to solve the least +*> squares problem of finding X to minimize the Euclidean norm of each +*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +*> are N-by-NRHS. The solution X overwrites B. +*> +*> The singular values of A smaller than RCOND times the largest +*> singular value are treated as zero in solving the least squares +*> problem; in this case a minimum norm solution is returned. +*> The actual singular values are returned in D in ascending order. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': D and E define an upper bidiagonal matrix. +*> = 'L': D and E define a lower bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit, if INFO = 0, D contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> Contains the super-diagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On input, B contains the right hand sides of the least +*> squares problem. On output, B contains the solution X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,N). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The singular values of A less than or equal to RCOND times +*> the largest singular value are treated as zero in solving +*> the least squares problem. If RCOND is negative, +*> machine precision is used instead. +*> For example, if diag(S)*X=B were the least squares problem, +*> where diag(S) is a diagonal matrix of singular values, the +*> solution would be X(i) = B(i) / S(i) if S(i) is greater than +*> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +*> RCOND*max(S). +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The number of singular values of A greater than RCOND times +*> the largest singular value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension at least +*> (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), +*> where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension at least +*> (3*N*NLVL + 11*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through MOD(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL, + $ NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI, + $ SMLSZP, SQRE, ST, ST1, U, VT, Z + DOUBLE PRECISION CS, EPS, ORGNRM, R, RCND, SN, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL IDAMAX, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL, + $ DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLALSD', -INFO ) + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB ) + ELSE + RANK = 1 + CALL DLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL DROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + WORK( I*2-1 ) = CS + WORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = WORK( J*2-1 ) + SN = WORK( J*2 ) + CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + NWORK = 1 + N*N + CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N ) + CALL DLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B, + $ LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) ) + DO 40 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + ELSE + CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 40 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) + CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB ) +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + BX = GIVNUM + 2*NLVL*N + NWORK = BX + N*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 50 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 50 CONTINUE +* + DO 60 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by DLASDQ. +* + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ WORK( VT+ST1 ), N ) + CALL DLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ), + $ E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ), + $ N, B( ST, 1 ), LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ), + $ IWORK( K+ST1 ), WORK( DIFL+ST1 ), + $ WORK( DIFR+ST1 ), WORK( Z+ST1 ), + $ WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ WORK( GIVNUM+ST1 ), WORK( C+ST1 ), + $ WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 60 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) ) +* + DO 70 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 70 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 80 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) + ELSE + CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 80 CONTINUE +* +* Unscale and sort the singular values. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of DLALSD +* + END diff --git a/dspl/liblapack/SRC/dlamrg.f b/dspl/liblapack/SRC/dlamrg.f new file mode 100644 index 0000000..de19508 --- /dev/null +++ b/dspl/liblapack/SRC/dlamrg.f @@ -0,0 +1,171 @@ +*> \brief \b DLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single set sorted in ascending order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAMRG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) +* +* .. Scalar Arguments .. +* INTEGER DTRD1, DTRD2, N1, N2 +* .. +* .. Array Arguments .. +* INTEGER INDEX( * ) +* DOUBLE PRECISION A( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMRG will create a permutation list which will merge the elements +*> of A (which is composed of two independently sorted sets) into a +*> single set which is sorted in ascending order. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> These arguments contain the respective lengths of the two +*> sorted lists to be merged. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (N1+N2) +*> The first N1 elements of A contain a list of numbers which +*> are sorted in either ascending or descending order. Likewise +*> for the final N2 elements. +*> \endverbatim +*> +*> \param[in] DTRD1 +*> \verbatim +*> DTRD1 is INTEGER +*> \endverbatim +*> +*> \param[in] DTRD2 +*> \verbatim +*> DTRD2 is INTEGER +*> These are the strides to be taken through the array A. +*> Allowable strides are 1 and -1. They indicate whether a +*> subset of A is sorted in ascending (DTRDx = 1) or descending +*> (DTRDx = -1) order. +*> \endverbatim +*> +*> \param[out] INDEX +*> \verbatim +*> INDEX is INTEGER array, dimension (N1+N2) +*> On exit this array will contain a permutation such that +*> if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +*> sorted in ascending order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER DTRD1, DTRD2, N1, N2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + DOUBLE PRECISION A( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( DTRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( DTRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + DTRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + DTRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of DLAMRG +* + END diff --git a/dspl/liblapack/SRC/dlamswlq.f b/dspl/liblapack/SRC/dlamswlq.f new file mode 100644 index 0000000..19e32f8 --- /dev/null +++ b/dspl/liblapack/SRC/dlamswlq.f @@ -0,0 +1,418 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMQRTS overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product of blocked +*> elementary reflectors computed by short wide LQ +*> factorization (DLASWLQ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> M >= K >= 0; +*> +*> \endverbatim +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > M. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the blocked +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DLASWLQ in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension +*> ( M * Number of blocks(CEIL(N-K/NB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,NB) * MB; +*> if SIDE = 'R', LWORK >= max(1,M) * MB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, CTR, LW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL DTPMLQT, DGEMLQT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * MB + ELSE + LW = M * MB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, MB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAMSWLQ', -INFO ) + WORK(1) = LW + RETURN + ELSE IF (LQUERY) THEN + WORK(1) = LW + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN + CALL DGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(NB-K)) + CTR = (M-K)/(NB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL DTPMLQT('L','T',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+NB) +* + CTR = CTR - 1 + CALL DTPMLQT('L','T',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1, CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:NB) +* + CALL DGEMLQT('L','T',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(NB-K)) + II=M-KK+1 + CTR = 1 + CALL DGEMLQT('L','N',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (I:I+NB,1:N) +* + CALL DTPMLQT('L','N',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL DTPMLQT('L','N',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(NB-K)) + CTR = (N-K)/(NB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL DTPMLQT('R','N',M , KK, K, 0, MB, A(1, II), LDA, + $ T(1,CTR *K+1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL DTPMLQT('R','N', M, NB-K, K, 0, MB, A(1, I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) +* + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL DGEMLQT('R','N',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(NB-K)) + CTR = 1 + II=N-KK+1 + CALL DGEMLQT('R','T',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL DTPMLQT('R','T',M , NB-K, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL DTPMLQT('R','T',M , KK, K, 0,MB, A(1,II), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of DLAMSWLQ +* + END diff --git a/dspl/liblapack/SRC/dlamtsqr.f b/dspl/liblapack/SRC/dlamtsqr.f new file mode 100644 index 0000000..6af89d2 --- /dev/null +++ b/dspl/liblapack/SRC/dlamtsqr.f @@ -0,0 +1,415 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAMTSQR overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (DLATSQR) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> N >= K >= 0; +*> +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > N. (must be the same as DLATSQR) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> blockedelementary reflector H(i), for i = 1,2,...,k, as +*> returned by DLATSQR in the first k columns of +*> its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension +*> ( N * Number of blocks(CEIL(M-K/MB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If SIDE = 'L', LWORK >= max(1,N)*NB; +*> if SIDE = 'R', LWORK >= max(1,MB)*NB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL DGEMQRT, DTPMQRT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * NB + ELSE + LW = MB * NB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, NB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* +* Determine the block size if it is tall skinny or short and wide +* + IF( INFO.EQ.0) THEN + WORK(1) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAMTSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((MB.LE.K).OR.(MB.GE.MAX(M,N,K))) THEN + CALL DGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(MB-K)) + CTR = (M-K)/(MB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL DTPMQRT('L','N',KK , N, K, 0, NB, A(II,1), LDA, + $ T(1,CTR*K+1),LDT , C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CTR = CTR - 1 + CALL DTPMQRT('L','N',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) +* + END DO +* +* Multiply Q to the first block of C (1:MB,1:N) +* + CALL DGEMQRT('L','N',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(MB-K)) + II=M-KK+1 + CTR = 1 + CALL DGEMQRT('L','T',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CALL DTPMQRT('L','T',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR * K + 1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL DTPMQRT('L','T',KK , N, K, 0,NB, A(II,1), LDA, + $ T(1,CTR * K + 1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(MB-K)) + CTR = (N-K)/(MB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL DTPMQRT('R','T',M , KK, K, 0, NB, A(II,1), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL DTPMQRT('R','T',M , MB-K, K, 0,NB, A(I,1), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) +* + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL DGEMQRT('R','T',M , MB, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(MB-K)) + II=N-KK+1 + CTR = 1 + CALL DGEMQRT('R','N', M, MB , K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL DTPMQRT('R','N', M, MB-K, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL DTPMQRT('R','N', M, KK , K, 0,NB, A(II,1), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of DLAMTSQR +* + END diff --git a/dspl/liblapack/SRC/dlaneg.f b/dspl/liblapack/SRC/dlaneg.f new file mode 100644 index 0000000..3d13d31 --- /dev/null +++ b/dspl/liblapack/SRC/dlaneg.f @@ -0,0 +1,227 @@ +*> \brief \b DLANEG computes the Sturm count. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANEG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION DLANEG( N, D, LLD, SIGMA, PIVMIN, R ) +* +* .. Scalar Arguments .. +* INTEGER N, R +* DOUBLE PRECISION PIVMIN, SIGMA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), LLD( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANEG computes the Sturm count, the number of negative pivots +*> encountered while factoring tridiagonal T - sigma I = L D L^T. +*> This implementation works directly on the factors without forming +*> the tridiagonal matrix T. The Sturm count is also the number of +*> eigenvalues of T less than sigma. +*> +*> This routine is called from DLARRB. +*> +*> The current routine does not use the PIVMIN parameter but rather +*> requires IEEE-754 propagation of Infinities and NaNs. This +*> routine also has no input range restrictions but does require +*> default exception handling such that x/0 produces Inf when x is +*> non-zero, and Inf/Inf produces NaN. For more information, see: +*> +*> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in +*> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on +*> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 +*> (Tech report version in LAWN 172 with the same title.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is DOUBLE PRECISION array, dimension (N-1) +*> The (N-1) elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> Shift amount in T - sigma I = L D L^T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence. May be used +*> when zero pivots are encountered on non-IEEE-754 +*> architectures. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization that is used +*> for the negcount. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +*> Jason Riedy, University of California, Berkeley, USA \n +*> +* ===================================================================== + INTEGER FUNCTION DLANEG( N, D, LLD, SIGMA, PIVMIN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, R + DOUBLE PRECISION PIVMIN, SIGMA +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), LLD( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* Some architectures propagate Infinities and NaNs very slowly, so +* the code computes counts in BLKLEN chunks. Then a NaN can +* propagate at most BLKLEN columns before being detected. This is +* not a general tuning parameter; it needs only to be just large +* enough that the overhead is tiny in common cases. + INTEGER BLKLEN + PARAMETER ( BLKLEN = 128 ) +* .. +* .. Local Scalars .. + INTEGER BJ, J, NEG1, NEG2, NEGCNT + DOUBLE PRECISION BSAV, DMINUS, DPLUS, GAMMA, P, T, TMP + LOGICAL SAWNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN +* .. +* .. Executable Statements .. + + NEGCNT = 0 + +* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T + T = -SIGMA + DO 210 BJ = 1, R-1, BLKLEN + NEG1 = 0 + BSAV = T + DO 21 J = BJ, MIN(BJ+BLKLEN-1, R-1) + DPLUS = D( J ) + T + IF( DPLUS.LT.ZERO ) NEG1 = NEG1 + 1 + TMP = T / DPLUS + T = TMP * LLD( J ) - SIGMA + 21 CONTINUE + SAWNAN = DISNAN( T ) +* Run a slower version of the above loop if a NaN is detected. +* A NaN should occur only with a zero pivot after an infinite +* pivot. In that case, substituting 1 for T/DPLUS is the +* correct limit. + IF( SAWNAN ) THEN + NEG1 = 0 + T = BSAV + DO 22 J = BJ, MIN(BJ+BLKLEN-1, R-1) + DPLUS = D( J ) + T + IF( DPLUS.LT.ZERO ) NEG1 = NEG1 + 1 + TMP = T / DPLUS + IF (DISNAN(TMP)) TMP = ONE + T = TMP * LLD(J) - SIGMA + 22 CONTINUE + END IF + NEGCNT = NEGCNT + NEG1 + 210 CONTINUE +* +* II) lower part: L D L^T - SIGMA I = U- D- U-^T + P = D( N ) - SIGMA + DO 230 BJ = N-1, R, -BLKLEN + NEG2 = 0 + BSAV = P + DO 23 J = BJ, MAX(BJ-BLKLEN+1, R), -1 + DMINUS = LLD( J ) + P + IF( DMINUS.LT.ZERO ) NEG2 = NEG2 + 1 + TMP = P / DMINUS + P = TMP * D( J ) - SIGMA + 23 CONTINUE + SAWNAN = DISNAN( P ) +* As above, run a slower version that substitutes 1 for Inf/Inf. +* + IF( SAWNAN ) THEN + NEG2 = 0 + P = BSAV + DO 24 J = BJ, MAX(BJ-BLKLEN+1, R), -1 + DMINUS = LLD( J ) + P + IF( DMINUS.LT.ZERO ) NEG2 = NEG2 + 1 + TMP = P / DMINUS + IF (DISNAN(TMP)) TMP = ONE + P = TMP * D(J) - SIGMA + 24 CONTINUE + END IF + NEGCNT = NEGCNT + NEG2 + 230 CONTINUE +* +* III) Twist index +* T was shifted by SIGMA initially. + GAMMA = (T + SIGMA) + P + IF( GAMMA.LT.ZERO ) NEGCNT = NEGCNT+1 + + DLANEG = NEGCNT + END diff --git a/dspl/liblapack/SRC/dlangb.f b/dspl/liblapack/SRC/dlangb.f new file mode 100644 index 0000000..078573b --- /dev/null +++ b/dspl/liblapack/SRC/dlangb.f @@ -0,0 +1,225 @@ +*> \brief \b DLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANGB( NORM, N, KL, KU, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANGB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +*> \endverbatim +*> +*> \return DLANGB +*> \verbatim +*> +*> DLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANGB as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANGB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of sub-diagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of super-diagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + DOUBLE PRECISION SCALE, SUM, VALUE, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + TEMP = ABS( AB( I, J ) ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANGB = VALUE + RETURN +* +* End of DLANGB +* + END diff --git a/dspl/liblapack/SRC/dlange.f b/dspl/liblapack/SRC/dlange.f new file mode 100644 index 0000000..9dbf45e --- /dev/null +++ b/dspl/liblapack/SRC/dlange.f @@ -0,0 +1,211 @@ +*> \brief \b DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANGE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real matrix A. +*> \endverbatim +*> +*> \return DLANGE +*> \verbatim +*> +*> DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANGE as described +*> above. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. When M = 0, +*> DLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. When N = 0, +*> DLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + TEMP = ABS( A( I, J ) ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANGE = VALUE + RETURN +* +* End of DLANGE +* + END diff --git a/dspl/liblapack/SRC/dlangt.f b/dspl/liblapack/SRC/dlangt.f new file mode 100644 index 0000000..c9576c0 --- /dev/null +++ b/dspl/liblapack/SRC/dlangt.f @@ -0,0 +1,208 @@ +*> \brief \b DLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANGT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANGT( NORM, N, DL, D, DU ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANGT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real tridiagonal matrix A. +*> \endverbatim +*> +*> \return DLANGT +*> \verbatim +*> +*> DLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANGT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANGT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) sub-diagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + IF( ANORM.LT.ABS( DL( I ) ) .OR. DISNAN( ABS( DL( I ) ) ) ) + $ ANORM = ABS(DL(I)) + IF( ANORM.LT.ABS( D( I ) ) .OR. DISNAN( ABS( D( I ) ) ) ) + $ ANORM = ABS(D(I)) + IF( ANORM.LT.ABS( DU( I ) ) .OR. DISNAN (ABS( DU( I ) ) ) ) + $ ANORM = ABS(DU(I)) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + DO 20 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + DO 30 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL DLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL DLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL DLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + DLANGT = ANORM + RETURN +* +* End of DLANGT +* + END diff --git a/dspl/liblapack/SRC/dlanhs.f b/dspl/liblapack/SRC/dlanhs.f new file mode 100644 index 0000000..691dbc2 --- /dev/null +++ b/dspl/liblapack/SRC/dlanhs.f @@ -0,0 +1,205 @@ +*> \brief \b DLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANHS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANHS( NORM, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANHS returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> Hessenberg matrix A. +*> \endverbatim +*> +*> \return DLANHS +*> \verbatim +*> +*> DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANHS as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANHS is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The n by n upper Hessenberg matrix A; the part of A below the +*> first sub-diagonal is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANHS = VALUE + RETURN +* +* End of DLANHS +* + END diff --git a/dspl/liblapack/SRC/dlansb.f b/dspl/liblapack/SRC/dlansb.f new file mode 100644 index 0000000..4ccf5f2 --- /dev/null +++ b/dspl/liblapack/SRC/dlansb.f @@ -0,0 +1,258 @@ +*> \brief \b DLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANSB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANSB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n symmetric band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return DLANSB +*> \verbatim +*> +*> DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANSB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular part is supplied +*> = 'L': Lower triangular part is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANSB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL DLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSB = VALUE + RETURN +* +* End of DLANSB +* + END diff --git a/dspl/liblapack/SRC/dlansf.f b/dspl/liblapack/SRC/dlansf.f new file mode 100644 index 0000000..d9b6c5b --- /dev/null +++ b/dspl/liblapack/SRC/dlansf.f @@ -0,0 +1,963 @@ +*> \brief \b DLANSF returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANSF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANSF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, TRANSR, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ), WORK( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANSF returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A in RFP format. +*> \endverbatim +*> +*> \return DLANSF +*> \verbatim +*> +*> DLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANSF as described +*> above. +*> \endverbatim +*> +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> Specifies whether the RFP format of A is normal or +*> transposed format. +*> = 'N': RFP format is Normal; +*> = 'T': RFP format is Transpose. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> = 'U': RFP A came from an upper triangular matrix; +*> = 'L': RFP A came from a lower triangular matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANSF is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); +*> On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') +*> part of the symmetric matrix A stored in RFP format. See the +*> "Notes" below for more details. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANSF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, TRANSR, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ), WORK( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA + DOUBLE PRECISION SCALE, S, VALUE, AA, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + DLANSF = ZERO + RETURN + ELSE IF( N.EQ.1 ) THEN + DLANSF = ABS( A(0) ) + RETURN + END IF +* +* set noe = 1 if n is odd. if n is even set noe=0 +* + NOE = 1 + IF( MOD( N, 2 ).EQ.0 ) + $ NOE = 0 +* +* set ifm = 0 when form='T or 't' and 1 otherwise +* + IFM = 1 + IF( LSAME( TRANSR, 'T' ) ) + $ IFM = 0 +* +* set ilu = 0 when uplo='U or 'u' and 1 otherwise +* + ILU = 1 + IF( LSAME( UPLO, 'U' ) ) + $ ILU = 0 +* +* set lda = (n+1)/2 when ifm = 0 +* set lda = n when ifm = 1 and noe = 1 +* set lda = n+1 when ifm = 1 and noe = 0 +* + IF( IFM.EQ.1 ) THEN + IF( NOE.EQ.1 ) THEN + LDA = N + ELSE +* noe=0 + LDA = N + 1 + END IF + ELSE +* ifm=0 + LDA = ( N+1 ) / 2 + END IF +* + IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = ( N+1 ) / 2 + VALUE = ZERO + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is n by k + DO J = 0, K - 1 + DO I = 0, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* xpose case; A is k by n + DO J = 0, N - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is n+1 by k + DO J = 0, K - 1 + DO I = 0, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* xpose case; A is k by n+1 + DO J = 0, N + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + IF( IFM.EQ.1 ) THEN + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd + IF( ILU.EQ.0 ) THEN + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + IF( I.EQ.K+K ) + $ GO TO 10 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + 10 CONTINUE + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + IF( J.GT.0 ) THEN + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + END IF + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even + IF( ILU.EQ.0 ) THEN + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + ELSE +* ifm=0 + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd + IF( ILU.EQ.0 ) THEN + N1 = K +* n/2 + K = K + 1 +* k is the row size and lda + DO I = N1, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, N1 - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,n1+i) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=n1=k-1 is special + S = ABS( A( 0+J*LDA ) ) +* A(k-1,k-1) + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,i+n1) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K, N - 1 + S = ZERO + DO I = 0, J - K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-k + AA = ABS( A( I+J*LDA ) ) +* A(j-k,j-k) + S = S + AA + WORK( J-K ) = WORK( J-K ) + S + I = I + 1 + S = ABS( A( I+J*LDA ) ) +* A(j,j) + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 2 +* process + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* i=j so process of A(j,j) + S = S + AA + WORK( J ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( A( I+J*LDA ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k-1 is special :process col A(k-1,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K, N - 1 +* process col j of A = A(j,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even + IF( ILU.EQ.0 ) THEN + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i+k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=k + AA = ABS( A( 0+J*LDA ) ) +* A(k,k) + S = AA + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k,k+i) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K + 1, N - 1 + S = ZERO + DO I = 0, J - 2 - K + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-1-k + AA = ABS( A( I+J*LDA ) ) +* A(j-k-1,j-k-1) + S = S + AA + WORK( J-K-1 ) = WORK( J-K-1 ) + S + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,j) + S = AA + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO +* j=n + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(i,k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = WORK( I ) + S + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO +* j=0 is special :process col A(k:n-1,k) + S = ABS( A( 0 ) ) +* A(k,k) + DO I = 1, K - 1 + AA = ABS( A( I ) ) +* A(k+i,k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( K ) = WORK( K ) + S + DO J = 1, K - 1 +* process + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* i=j-1 so process of A(j-1,j-1) + S = S + AA + WORK( J-1 ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( A( I+J*LDA ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k is special :process col A(k,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K + 1, N +* process col j-1 of A = A(j-1,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J-1 ) = WORK( J-1 ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + K = ( N+1 ) / 2 + SCALE = ZERO + S = ONE + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 3 + CALL DLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) +* L at A(k,0) + END DO + DO J = 0, K - 1 + CALL DLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K-1, A( K ), LDA+1, SCALE, S ) +* tri L at A(k,0) + CALL DLASSQ( K, A( K-1 ), LDA+1, SCALE, S ) +* tri U at A(k-1,0) + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL DLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* trap L at A(0,0) + END DO + DO J = 0, K - 2 + CALL DLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri L at A(0,0) + CALL DLASSQ( K-1, A( 0+LDA ), LDA+1, SCALE, S ) +* tri U at A(0,1) + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**T is upper + DO J = 1, K - 2 + CALL DLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) +* U at A(0,k) + END DO + DO J = 0, K - 2 + CALL DLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL DLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, + $ SCALE, S ) +* L at A(0,k-1) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K-1, A( 0+K*LDA ), LDA+1, SCALE, S ) +* tri U at A(0,k) + CALL DLASSQ( K, A( 0+( K-1 )*LDA ), LDA+1, SCALE, S ) +* tri L at A(0,k-1) + ELSE +* A**T is lower + DO J = 1, K - 1 + CALL DLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + DO J = K, N - 1 + CALL DLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,k) + END DO + DO J = 0, K - 3 + CALL DLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) +* L at A(1,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + CALL DLASSQ( K-1, A( 1 ), LDA+1, SCALE, S ) +* tri L at A(1,0) + END IF + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 2 + CALL DLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) +* L at A(k+1,0) + END DO + DO J = 0, K - 1 + CALL DLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( K+1 ), LDA+1, SCALE, S ) +* tri L at A(k+1,0) + CALL DLASSQ( K, A( K ), LDA+1, SCALE, S ) +* tri U at A(k,0) + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL DLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) +* trap L at A(1,0) + END DO + DO J = 1, K - 1 + CALL DLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( 1 ), LDA+1, SCALE, S ) +* tri L at A(1,0) + CALL DLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**T is upper + DO J = 1, K - 1 + CALL DLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) +* U at A(0,k+1) + END DO + DO J = 0, K - 1 + CALL DLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL DLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, + $ S ) +* L at A(0,k) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( 0+( K+1 )*LDA ), LDA+1, SCALE, S ) +* tri U at A(0,k+1) + CALL DLASSQ( K, A( 0+K*LDA ), LDA+1, SCALE, S ) +* tri L at A(0,k) + ELSE +* A**T is lower + DO J = 1, K - 1 + CALL DLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + DO J = K + 1, N + CALL DLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,k+1) + END DO + DO J = 0, K - 2 + CALL DLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* L at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL DLASSQ( K, A( LDA ), LDA+1, SCALE, S ) +* tri L at A(0,1) + CALL DLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + END IF + END IF + END IF + VALUE = SCALE*SQRT( S ) + END IF +* + DLANSF = VALUE + RETURN +* +* End of DLANSF +* + END diff --git a/dspl/liblapack/SRC/dlansp.f b/dspl/liblapack/SRC/dlansp.f new file mode 100644 index 0000000..a1829db --- /dev/null +++ b/dspl/liblapack/SRC/dlansp.f @@ -0,0 +1,261 @@ +*> \brief \b DLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANSP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANSP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return DLANSP +*> \verbatim +*> +*> DLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANSP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANSP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( AP( K ).NE.ZERO ) THEN + ABSA = ABS( AP( K ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSP = VALUE + RETURN +* +* End of DLANSP +* + END diff --git a/dspl/liblapack/SRC/dlanst.f b/dspl/liblapack/SRC/dlanst.f new file mode 100644 index 0000000..e952e2d --- /dev/null +++ b/dspl/liblapack/SRC/dlanst.f @@ -0,0 +1,186 @@ +*> \brief \b DLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANST returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric tridiagonal matrix A. +*> \endverbatim +*> +*> \return DLANST +*> \verbatim +*> +*> DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANST as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANST is +*> set to zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) sub-diagonal or super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + SUM = ABS( D( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + SUM = ABS( E( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) + SUM = ABS( E( N-1 ) )+ABS( D( N ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + DO 20 I = 2, N - 1 + SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL DLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL DLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + DLANST = ANORM + RETURN +* +* End of DLANST +* + END diff --git a/dspl/liblapack/SRC/dlansy.f b/dspl/liblapack/SRC/dlansy.f new file mode 100644 index 0000000..2372fce --- /dev/null +++ b/dspl/liblapack/SRC/dlansy.f @@ -0,0 +1,241 @@ +*> \brief \b DLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANSY returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A. +*> \endverbatim +*> +*> \return DLANSY +*> \verbatim +*> +*> DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANSY as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANSY is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL DLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANSY = VALUE + RETURN +* +* End of DLANSY +* + END diff --git a/dspl/liblapack/SRC/dlantb.f b/dspl/liblapack/SRC/dlantb.f new file mode 100644 index 0000000..3d2bfe7 --- /dev/null +++ b/dspl/liblapack/SRC/dlantb.f @@ -0,0 +1,361 @@ +*> \brief \b DLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANTB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANTB( NORM, UPLO, DIAG, N, K, AB, +* LDAB, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANTB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n triangular band matrix A, with ( k + 1 ) diagonals. +*> \endverbatim +*> +*> \return DLANTB +*> \verbatim +*> +*> DLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANTB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANTB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first k+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that when DIAG = 'U', the elements of the array AB +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL DLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL DLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTB = VALUE + RETURN +* +* End of DLANTB +* + END diff --git a/dspl/liblapack/SRC/dlantp.f b/dspl/liblapack/SRC/dlantp.f new file mode 100644 index 0000000..f84a9e9 --- /dev/null +++ b/dspl/liblapack/SRC/dlantp.f @@ -0,0 +1,355 @@ +*> \brief \b DLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANTP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> triangular matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return DLANTP +*> \verbatim +*> +*> DLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANTP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, DLANTP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that when DIAG = 'U', the elements of the array AP +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL DLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL DLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTP = VALUE + RETURN +* +* End of DLANTP +* + END diff --git a/dspl/liblapack/SRC/dlantr.f b/dspl/liblapack/SRC/dlantr.f new file mode 100644 index 0000000..8585b2f --- /dev/null +++ b/dspl/liblapack/SRC/dlantr.f @@ -0,0 +1,353 @@ +*> \brief \b DLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANTR returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> trapezoidal or triangular matrix A. +*> \endverbatim +*> +*> \return DLANTR +*> \verbatim +*> +*> DLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in DLANTR as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower trapezoidal. +*> = 'U': Upper trapezoidal +*> = 'L': Lower trapezoidal +*> Note that A is triangular instead of trapezoidal if M = N. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A has unit diagonal. +*> = 'N': Non-unit diagonal +*> = 'U': Unit diagonal +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0, and if +*> UPLO = 'U', M <= N. When M = 0, DLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0, and if +*> UPLO = 'L', N <= M. When N = 0, DLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The trapezoidal matrix A (A is triangular if M = N). +*> If UPLO = 'U', the leading m by n upper trapezoidal part of +*> the array A contains the upper trapezoidal matrix, and the +*> strictly lower triangular part of A is not referenced. +*> If UPLO = 'L', the leading m by n lower trapezoidal part of +*> the array A contains the lower trapezoidal matrix, and the +*> strictly upper triangular part of A is not referenced. Note +*> that when DIAG = 'U', the diagonal elements of A are not +*> referenced and are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + DLANTR = VALUE + RETURN +* +* End of DLANTR +* + END diff --git a/dspl/liblapack/SRC/dlanv2.f b/dspl/liblapack/SRC/dlanv2.f new file mode 100644 index 0000000..91fa14f --- /dev/null +++ b/dspl/liblapack/SRC/dlanv2.f @@ -0,0 +1,289 @@ +*> \brief \b DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric matrix in standard form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLANV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric +*> matrix in standard form: +*> +*> [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] +*> [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] +*> +*> where either +*> 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or +*> 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex +*> conjugate eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION +*> On entry, the elements of the input matrix. +*> On exit, they are overwritten by the elements of the +*> standardised Schur form. +*> \endverbatim +*> +*> \param[out] RT1R +*> \verbatim +*> RT1R is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] RT1I +*> \verbatim +*> RT1I is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] RT2R +*> \verbatim +*> RT2R is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] RT2I +*> \verbatim +*> RT2I is DOUBLE PRECISION +*> The real and imaginary parts of the eigenvalues. If the +*> eigenvalues are a complex conjugate pair, RT1I > 0. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is DOUBLE PRECISION +*> Parameters of the rotation matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by V. Sima, Research Institute for Informatics, Bucharest, +*> Romania, to reduce the risk of cancellation errors, +*> when computing real eigenvalues, and to ensure, if possible, that +*> abs(RT1R) >= abs(RT2R). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION MULTPL + PARAMETER ( MULTPL = 4.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, + $ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'P' ) + IF( C.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + GO TO 10 +* + ELSE IF( B.EQ.ZERO ) THEN +* +* Swap rows and columns +* + CS = ZERO + SN = ONE + TEMP = D + D = A + A = TEMP + B = -C + C = ZERO + GO TO 10 + ELSE IF( ( A-D ).EQ.ZERO .AND. SIGN( ONE, B ).NE.SIGN( ONE, C ) ) + $ THEN + CS = ONE + SN = ZERO + GO TO 10 + ELSE +* + TEMP = A - D + P = HALF*TEMP + BCMAX = MAX( ABS( B ), ABS( C ) ) + BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C ) + SCALE = MAX( ABS( P ), BCMAX ) + Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS +* +* If Z is of the order of the machine accuracy, postpone the +* decision on the nature of eigenvalues +* + IF( Z.GE.MULTPL*EPS ) THEN +* +* Real eigenvalues. Compute A and D. +* + Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P ) + A = D + Z + D = D - ( BCMAX / Z )*BCMIS +* +* Compute B and the rotation matrix +* + TAU = DLAPY2( C, Z ) + CS = Z / TAU + SN = C / TAU + B = B - C + C = ZERO + ELSE +* +* Complex eigenvalues, or real (almost) equal eigenvalues. +* Make diagonal elements equal. +* + SIGMA = B + C + TAU = DLAPY2( SIGMA, TEMP ) + CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) ) + SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA ) +* +* Compute [ AA BB ] = [ A B ] [ CS -SN ] +* [ CC DD ] [ C D ] [ SN CS ] +* + AA = A*CS + B*SN + BB = -A*SN + B*CS + CC = C*CS + D*SN + DD = -C*SN + D*CS +* +* Compute [ A B ] = [ CS SN ] [ AA BB ] +* [ C D ] [-SN CS ] [ CC DD ] +* + A = AA*CS + CC*SN + B = BB*CS + DD*SN + C = -AA*SN + CC*CS + D = -BB*SN + DD*CS +* + TEMP = HALF*( A+D ) + A = TEMP + D = TEMP +* + IF( C.NE.ZERO ) THEN + IF( B.NE.ZERO ) THEN + IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN +* +* Real eigenvalues: reduce to upper triangular form +* + SAB = SQRT( ABS( B ) ) + SAC = SQRT( ABS( C ) ) + P = SIGN( SAB*SAC, C ) + TAU = ONE / SQRT( ABS( B+C ) ) + A = TEMP + P + D = TEMP - P + B = B - C + C = ZERO + CS1 = SAB*TAU + SN1 = SAC*TAU + TEMP = CS*CS1 - SN*SN1 + SN = CS*SN1 + SN*CS1 + CS = TEMP + END IF + ELSE + B = -C + C = ZERO + TEMP = CS + CS = -SN + SN = TEMP + END IF + END IF + END IF +* + END IF +* + 10 CONTINUE +* +* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). +* + RT1R = A + RT2R = D + IF( C.EQ.ZERO ) THEN + RT1I = ZERO + RT2I = ZERO + ELSE + RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) ) + RT2I = -RT1I + END IF + RETURN +* +* End of DLANV2 +* + END diff --git a/dspl/liblapack/SRC/dlapll.f b/dspl/liblapack/SRC/dlapll.f new file mode 100644 index 0000000..e8fb733 --- /dev/null +++ b/dspl/liblapack/SRC/dlapll.f @@ -0,0 +1,165 @@ +*> \brief \b DLAPLL measures the linear dependence of two vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPLL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given two column vectors X and Y, let +*> +*> A = ( X Y ). +*> +*> The subroutine first computes the QR factorization of A = Q*R, +*> and then computes the SVD of the 2-by-2 upper triangular matrix R. +*> The smaller singular value of R is returned in SSMIN, which is used +*> as the measurement of the linear dependency of the vectors X and Y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vectors X and Y. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> On entry, X contains the N-vector X. +*> On exit, X is overwritten. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCY) +*> On entry, Y contains the N-vector Y. +*> On exit, Y is overwritten. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> The smallest singular value of the N-by-2 matrix A = ( X Y ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A11, A12, A22, C, SSMAX, TAU +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT + EXTERNAL DDOT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL DLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = ONE +* + C = -TAU*DDOT( N, X, INCX, Y, INCY ) + CALL DAXPY( N, C, X, INCX, Y, INCY ) +* + CALL DLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL DLAS2( A11, A12, A22, SSMIN, SSMAX ) +* + RETURN +* +* End of DLAPLL +* + END diff --git a/dspl/liblapack/SRC/dlapmr.f b/dspl/liblapack/SRC/dlapmr.f new file mode 100644 index 0000000..257eb61 --- /dev/null +++ b/dspl/liblapack/SRC/dlapmr.f @@ -0,0 +1,204 @@ +*> \brief \b DLAPMR rearranges rows of a matrix as specified by a permutation vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAPMR( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* DOUBLE PRECISION X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPMR rearranges the rows of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (M) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAPMR( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + DOUBLE PRECISION X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IN, J, JJ + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* + IF( M.LE.1 ) + $ RETURN +* + DO 10 I = 1, M + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 JJ = 1, N + TEMP = X( J, JJ ) + X( J, JJ ) = X( IN, JJ ) + X( IN, JJ ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 JJ = 1, N + TEMP = X( I, JJ ) + X( I, JJ ) = X( J, JJ ) + X( J, JJ ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END + diff --git a/dspl/liblapack/SRC/dlapmt.f b/dspl/liblapack/SRC/dlapmt.f new file mode 100644 index 0000000..b322e7a --- /dev/null +++ b/dspl/liblapack/SRC/dlapmt.f @@ -0,0 +1,203 @@ +*> \brief \b DLAPMT performs a forward or backward permutation of the columns of a matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPMT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAPMT( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* DOUBLE PRECISION X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPMT rearranges the columns of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (N) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + DOUBLE PRECISION X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, IN, J + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of DLAPMT +* + END diff --git a/dspl/liblapack/SRC/dlapy2.f b/dspl/liblapack/SRC/dlapy2.f new file mode 100644 index 0000000..bc01829 --- /dev/null +++ b/dspl/liblapack/SRC/dlapy2.f @@ -0,0 +1,116 @@ +*> \brief \b DLAPY2 returns sqrt(x2+y2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +*> overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> X and Y specify the values x and y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, Z + LOGICAL X_IS_NAN, Y_IS_NAN +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + X_IS_NAN = DISNAN( X ) + Y_IS_NAN = DISNAN( Y ) + IF ( X_IS_NAN ) DLAPY2 = X + IF ( Y_IS_NAN ) DLAPY2 = Y +* + IF ( .NOT.( X_IS_NAN.OR.Y_IS_NAN ) ) THEN + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + DLAPY2 = W + ELSE + DLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + END IF + RETURN +* +* End of DLAPY2 +* + END diff --git a/dspl/liblapack/SRC/dlapy3.f b/dspl/liblapack/SRC/dlapy3.f new file mode 100644 index 0000000..3bbba88 --- /dev/null +++ b/dspl/liblapack/SRC/dlapy3.f @@ -0,0 +1,111 @@ +*> \brief \b DLAPY3 returns sqrt(x2+y2+z2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y, Z +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +*> unnecessary overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION +*> X, Y and Z specify the values x, y and z. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y, Z +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + DLAPY3 = XABS + YABS + ZABS + ELSE + DLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of DLAPY3 +* + END diff --git a/dspl/liblapack/SRC/dlaqgb.f b/dspl/liblapack/SRC/dlaqgb.f new file mode 100644 index 0000000..3c9fac0 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqgb.f @@ -0,0 +1,256 @@ +*> \brief \b DLAQGB scales a general band matrix, using row and column scaling factors computed by sgbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQGB equilibrates a general M by N band matrix A with KL +*> subdiagonals and KU superdiagonals using the row and scaling factors +*> in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, the equilibrated matrix, in the same storage format +*> as A. See EQUED for the form of the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDA >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGBauxiliary +* +* ===================================================================== + SUBROUTINE DLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of DLAQGB +* + END diff --git a/dspl/liblapack/SRC/dlaqge.f b/dspl/liblapack/SRC/dlaqge.f new file mode 100644 index 0000000..a985254 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqge.f @@ -0,0 +1,236 @@ +*> \brief \b DLAQGE scales a general rectangular matrix, using row and column scaling factors computed by sgeequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQGE equilibrates a general M by N matrix A using the row and +*> column scaling factors in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M by N matrix A. +*> On exit, the equilibrated matrix. See EQUED for the form of +*> the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEauxiliary +* +* ===================================================================== + SUBROUTINE DLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of DLAQGE +* + END diff --git a/dspl/liblapack/SRC/dlaqp2.f b/dspl/liblapack/SRC/dlaqp2.f new file mode 100644 index 0000000..b6351e0 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqp2.f @@ -0,0 +1,262 @@ +*> \brief \b DLAQP2 computes a QR factorization with column pivoting of the matrix block. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, +* WORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQP2 computes a QR factorization with column pivoting of +*> the block A(OFFSET+1:M,1:N). +*> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of the matrix A that must be pivoted +*> but no factorized. OFFSET >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +*> the triangular factor obtained; the elements in block +*> A(OFFSET+1:M,1:N) below the diagonal, together with the +*> array TAU, represent the orthogonal matrix Q as a product of +*> elementary reflectors. Block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is DOUBLE PRECISION array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is DOUBLE PRECISION array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + DOUBLE PRECISION AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFG, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL IDAMAX, DLAMCH, DNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL DLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL DLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)**T to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = ONE + CALL DLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = DNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of DLAQP2 +* + END diff --git a/dspl/liblapack/SRC/dlaqps.f b/dspl/liblapack/SRC/dlaqps.f new file mode 100644 index 0000000..395d8e0 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqps.f @@ -0,0 +1,358 @@ +*> \brief \b DLAQPS computes a step of QR factorization with column pivoting of a real m-by-n matrix A by using BLAS level 3. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, +* VN2, AUXV, F, LDF ) +* +* .. Scalar Arguments .. +* INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), +* $ VN1( * ), VN2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQPS computes a step of QR factorization with column pivoting +*> of a real M-by-N matrix A by using Blas-3. It tries to factorize +*> NB columns from A starting from the row OFFSET+1, and updates all +*> of the matrix with Blas-3 xGEMM. +*> +*> In some cases, due to catastrophic cancellations, it cannot +*> factorize NB columns. Hence, the actual number of factorized +*> columns is returned in KB. +*> +*> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of A that have been factorized in +*> previous steps. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to factorize. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns actually factorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, block A(OFFSET+1:M,1:KB) is the triangular +*> factor obtained and block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +*> been updated. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> JPVT(I) = K <==> Column K of the full matrix A has been +*> permuted into position I in AP. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (KB) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is DOUBLE PRECISION array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is DOUBLE PRECISION array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[in,out] AUXV +*> \verbatim +*> AUXV is DOUBLE PRECISION array, dimension (NB) +*> Auxiliar vector. +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is DOUBLE PRECISION array, dimension (LDF,NB) +*> Matrix F**T = L*Y**T*A. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), + $ VN1( * ), VN2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + DOUBLE PRECISION AKK, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEMV, DLARFG, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, NINT, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL IDAMAX, DLAMCH, DNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL DSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL DSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**T. +* + IF( K.GT.1 ) THEN + CALL DGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 ) + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL DLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL DLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = ONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**T*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 20 J = 1, K + F( J, K ) = ZERO + 20 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**T +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL DGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ), + $ LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 ) +* + CALL DGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, ONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**T. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF, + $ A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 30 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = DBLE( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**T. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL DGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE, + $ A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE, + $ A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 40 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = DNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 40 + END IF +* + RETURN +* +* End of DLAQPS +* + END diff --git a/dspl/liblapack/SRC/dlaqr0.f b/dspl/liblapack/SRC/dlaqr0.f new file mode 100644 index 0000000..247d4ef --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr0.f @@ -0,0 +1,740 @@ +*> \brief \b DLAQR0 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQR0 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to DGEBAL, and then passed to DGEHRD when the +*> matrix output by DGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H contains +*> the upper quasi-triangular matrix T from the Schur +*> decomposition (the Schur form); 2-by-2 diagonal blocks +*> (corresponding to complex conjugate pairs of eigenvalues) +*> are returned in standard form, with H(i,i) = H(i+1,i+1) +*> and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (IHI) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (IHI) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) +*> and WI(ILO:IHI). If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +*> the eigenvalues are stored in the same order as on the +*> diagonal of the Schur form returned in H, with +*> WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +*> block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then DLAQR0 does a workspace query. +*> In this case, DLAQR0 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, DLAQR0 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is an orthogonal matrix. The final +*> value of H is upper Hessenberg and quasi-triangular +*> in rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . DLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== + DOUBLE PRECISION WILK1, WILK2 + PARAMETER ( WILK1 = 0.75d0, WILK2 = -0.4375d0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + DOUBLE PRECISION ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLAHQR, DLANV2, DLAQR3, DLAQR4, DLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use DLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'DLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'DLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to DLAQR3 ==== +* + CALL DLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DBLE( LWKOPT ) + RETURN + END IF +* +* ==== DLAHQR/DLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'DLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'DLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'DLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if DLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . DLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL DLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use DLAQR4 or +* . DLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL DLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL DLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, WORK, + $ LWORK, INF ) + ELSE + CALL DLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL DLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = DBLE( LWKOPT ) +* +* ==== End of DLAQR0 ==== +* + END diff --git a/dspl/liblapack/SRC/dlaqr1.f b/dspl/liblapack/SRC/dlaqr1.f new file mode 100644 index 0000000..81a462f --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr1.f @@ -0,0 +1,179 @@ +*> \brief \b DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and specified shifts. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION SI1, SI2, SR1, SR2 +* INTEGER LDH, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a +*> scalar multiple of the first column of the product +*> +*> (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) +*> +*> scaling to avoid overflows and most underflows. It +*> is assumed that either +*> +*> 1) sr1 = sr2 and si1 = -si2 +*> or +*> 2) si1 = si2 = 0. +*> +*> This is useful for starting double implicit shift bulges +*> in the QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Order of the matrix H. N must be either 2 or 3. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> The 2-by-2 or 3-by-3 matrix H in (*). +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of H as declared in +*> the calling procedure. LDH.GE.N +*> \endverbatim +*> +*> \param[in] SR1 +*> \verbatim +*> SR1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] SI1 +*> \verbatim +*> SI1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] SR2 +*> \verbatim +*> SR2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] SI2 +*> \verbatim +*> SI2 is DOUBLE PRECISION +*> The shifts in (*). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (N) +*> A scalar multiple of the first column of the +*> matrix K in (*). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE DLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION SI1, SI2, SR1, SR2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), V( * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION H21S, H31S, S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-SR1 )* + $ ( ( H( 1, 1 )-SR2 ) / S ) - SI1*( SI2 / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + END IF + ELSE + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + + $ ABS( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-SR1 )*( ( H( 1, 1 )-SR2 ) / S ) - + $ SI1*( SI2 / S ) + H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + + $ H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-SR1-SR2 ) + + $ H21S*H( 3, 2 ) + END IF + END IF + END diff --git a/dspl/liblapack/SRC/dlaqr2.f b/dspl/liblapack/SRC/dlaqr2.f new file mode 100644 index 0000000..431b3f1 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr2.f @@ -0,0 +1,684 @@ +*> \brief \b DLAQR2 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, +* LDT, NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), +* $ V( LDV, * ), WORK( * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQR2 is identical to DLAQR3 except that it avoids +*> recursion by calling DLAHQR instead of DLAQR4. +*> +*> Aggressive early deflation: +*> +*> This subroutine accepts as input an upper Hessenberg matrix +*> H and performs an orthogonal similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an orthogonal similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the quasi-triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the orthogonal matrix Z is updated so +*> so that the orthogonal Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the orthogonal matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by an orthogonal +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the orthogonal +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SR +*> \verbatim +*> SR is DOUBLE PRECISION array, dimension (KBOT) +*> \endverbatim +*> +*> \param[out] SI +*> \verbatim +*> SI is DOUBLE PRECISION array, dimension (KBOT) +*> On output, the real and imaginary parts of approximate +*> eigenvalues that may be used for shifts are stored in +*> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +*> SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +*> The real and imaginary parts of converged eigenvalues +*> are stored in SR(KBOT-ND+1) through SR(KBOT) and +*> SI(KBOT-ND+1) through SI(KBOT), respectively. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is DOUBLE PRECISION array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; DLAQR2 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, + $ LWKOPT + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR, + $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to DGEHRD ==== +* + CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to DORMHR ==== +* + CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DBLE( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) +* +* ==== DTREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT.BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (DTREXC can not fail in this case.) ==== +* + IFST = NS + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undeflatable. Move them up out of the way. +* . Fortunately, DTREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL DCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = DBLE( LWKOPT ) +* +* ==== End of DLAQR2 ==== +* + END diff --git a/dspl/liblapack/SRC/dlaqr3.f b/dspl/liblapack/SRC/dlaqr3.f new file mode 100644 index 0000000..aa23617 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr3.f @@ -0,0 +1,695 @@ +*> \brief \b DLAQR3 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, +* LDT, NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), +* $ V( LDV, * ), WORK( * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Aggressive early deflation: +*> +*> DLAQR3 accepts as input an upper Hessenberg matrix +*> H and performs an orthogonal similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an orthogonal similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the quasi-triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the orthogonal matrix Z is updated so +*> so that the orthogonal Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the orthogonal matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by an orthogonal +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the orthogonal +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SR +*> \verbatim +*> SR is DOUBLE PRECISION array, dimension (KBOT) +*> \endverbatim +*> +*> \param[out] SI +*> \verbatim +*> SI is DOUBLE PRECISION array, dimension (KBOT) +*> On output, the real and imaginary parts of approximate +*> eigenvalues that may be used for shifts are stored in +*> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +*> SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +*> The real and imaginary parts of converged eigenvalues +*> are stored in SR(KBOT-ND+1) through SR(KBOT) and +*> SI(KBOT-ND+1) through SI(KBOT), respectively. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is DOUBLE PRECISION array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; DLAQR3 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER ILAENV + EXTERNAL DLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR, + $ DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR, + $ DTREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to DGEHRD ==== +* + CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to DORMHR ==== +* + CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to DLAQR4 ==== +* + CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, + $ V, LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DBLE( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) + END IF +* +* ==== DTREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT. BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (DTREXC can not fail in this case.) ==== +* + IFST = NS + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undeflatable. Move them up out of the way. +* . Fortunately, DTREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL DCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = DBLE( LWKOPT ) +* +* ==== End of DLAQR3 ==== +* + END diff --git a/dspl/liblapack/SRC/dlaqr4.f b/dspl/liblapack/SRC/dlaqr4.f new file mode 100644 index 0000000..89b9b7f --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr4.f @@ -0,0 +1,739 @@ +*> \brief \b DLAQR4 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQR4 implements one level of recursion for DLAQR0. +*> It is a complete implementation of the small bulge multi-shift +*> QR algorithm. It may be called by DLAQR0 and, for large enough +*> deflation window size, it may be called by DLAQR3. This +*> subroutine is identical to DLAQR0 except that it calls DLAQR2 +*> instead of DLAQR3. +*> +*> DLAQR4 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to DGEBAL, and then passed to DGEHRD when the +*> matrix output by DGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H contains +*> the upper quasi-triangular matrix T from the Schur +*> decomposition (the Schur form); 2-by-2 diagonal blocks +*> (corresponding to complex conjugate pairs of eigenvalues) +*> are returned in standard form, with H(i,i) = H(i+1,i+1) +*> and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (IHI) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (IHI) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) +*> and WI(ILO:IHI). If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +*> the eigenvalues are stored in the same order as on the +*> diagonal of the Schur form returned in H, with +*> WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +*> block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then DLAQR4 does a workspace query. +*> In this case, DLAQR4 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, DLAQR4 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a orthogonal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE DLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . DLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== + DOUBLE PRECISION WILK1, WILK2 + PARAMETER ( WILK1 = 0.75d0, WILK2 = -0.4375d0 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + DOUBLE PRECISION ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLAHQR, DLANV2, DLAQR2, DLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use DLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'DLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'DLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to DLAQR2 ==== +* + CALL DLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(DLAQR5, DLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DBLE( LWKOPT ) + RETURN + END IF +* +* ==== DLAHQR/DLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'DLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'DLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'DLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if DLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . DLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL DLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use DLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL DLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL DLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), WI( KS ), + $ 1, 1, ZDUM, 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL DLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = DBLE( LWKOPT ) +* +* ==== End of DLAQR4 ==== +* + END diff --git a/dspl/liblapack/SRC/dlaqr5.f b/dspl/liblapack/SRC/dlaqr5.f new file mode 100644 index 0000000..5cc4eda --- /dev/null +++ b/dspl/liblapack/SRC/dlaqr5.f @@ -0,0 +1,919 @@ +*> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQR5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, +* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, +* LDU, NV, WV, LDWV, NH, WH, LDWH ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, +* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), +* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQR5, called by DLAQR0, performs a +*> single small-bulge multi-shift QR sweep. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> WANTT = .true. if the quasi-triangular Schur factor +*> is being computed. WANTT is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> WANTZ = .true. if the orthogonal Schur factor is being +*> computed. WANTZ is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] KACC22 +*> \verbatim +*> KACC22 is INTEGER with value 0, 1, or 2. +*> Specifies the computation mode of far-from-diagonal +*> orthogonal updates. +*> = 0: DLAQR5 does not accumulate reflections and does not +*> use matrix-matrix multiply to update far-from-diagonal +*> matrix entries. +*> = 1: DLAQR5 accumulates reflections and uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries. +*> = 2: DLAQR5 accumulates reflections, uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries, +*> and takes advantage of 2-by-2 block structure during +*> matrix multiplies. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H upon which this +*> subroutine operates. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> These are the first and last rows and columns of an +*> isolated diagonal block upon which the QR sweep is to be +*> applied. It is assumed without a check that +*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 +*> and +*> either KBOT = N or H(KBOT+1,KBOT) = 0. +*> \endverbatim +*> +*> \param[in] NSHFTS +*> \verbatim +*> NSHFTS is INTEGER +*> NSHFTS gives the number of simultaneous shifts. NSHFTS +*> must be positive and even. +*> \endverbatim +*> +*> \param[in,out] SR +*> \verbatim +*> SR is DOUBLE PRECISION array, dimension (NSHFTS) +*> \endverbatim +*> +*> \param[in,out] SI +*> \verbatim +*> SI is DOUBLE PRECISION array, dimension (NSHFTS) +*> SR contains the real parts and SI contains the imaginary +*> parts of the NSHFTS shifts of origin that define the +*> multi-shift QR sweep. On output SR and SI may be +*> reordered. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION array, dimension (LDH,N) +*> On input H contains a Hessenberg matrix. On output a +*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +*> to the isolated diagonal block in rows and columns KTOP +*> through KBOT. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> LDH is the leading dimension of H just as declared in the +*> calling procedure. LDH.GE.MAX(1,N). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ) +*> If WANTZ = .TRUE., then the QR Sweep orthogonal +*> similarity transformation is accumulated into +*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ = .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> LDA is the leading dimension of Z just as declared in +*> the calling procedure. LDZ.GE.N. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2) +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> LDV is the leading dimension of V as declared in the +*> calling procedure. LDV.GE.3. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> LDU is the leading dimension of U just as declared in the +*> in the calling subroutine. LDU.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> NH is the number of columns in array WH available for +*> workspace. NH.GE.1. +*> \endverbatim +*> +*> \param[out] WH +*> \verbatim +*> WH is DOUBLE PRECISION array, dimension (LDWH,NH) +*> \endverbatim +*> +*> \param[in] LDWH +*> \verbatim +*> LDWH is INTEGER +*> Leading dimension of WH just as declared in the +*> calling procedure. LDWH.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> NV is the number of rows in WV agailable for workspace. +*> NV.GE.1. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is DOUBLE PRECISION array, dimension (LDWV,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> LDWV is the leading dimension of WV as declared in the +*> in the calling subroutine. LDWV.GE.NV. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> +* ===================================================================== + SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, + $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, + $ LDU, NV, WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), + $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM, + $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, + $ ULP + INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, DBLE, MAX, MIN, MOD +* .. +* .. Local Arrays .. + DOUBLE PRECISION VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET, + $ DTRMM +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== Shuffle shifts into pairs of real shifts and pairs +* . of complex conjugate shifts assuming complex +* . conjugate shifts are already adjacent to one +* . another. ==== +* + DO 10 I = 1, NSHFTS - 2, 2 + IF( SI( I ).NE.-SI( I+1 ) ) THEN +* + SWAP = SR( I ) + SR( I ) = SR( I+1 ) + SR( I+1 ) = SR( I+2 ) + SR( I+2 ) = SWAP +* + SWAP = SI( I ) + SI( I ) = SI( I+1 ) + SI( I+1 ) = SI( I+2 ) + SI( I+2 ) = SWAP + END IF + 10 CONTINUE +* +* ==== NSHFTS is supposed to be even, but if it is odd, +* . then simply reduce it by one. The shuffle above +* . ensures that the dropped shift is real and that +* . the remaining shifts are paired. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 20 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ V( 1, M ) ) + ALPHA = V( 1, M ) + CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. In the +* . underflow case, try the two-small-subdiagonals +* . trick to try to reinflate the bulge. ==== +* + IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. + $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K) and H(K+2,K). +* . If the fill resulting from the new +* . reflector is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ VT ) + ALPHA = VT( 1 ) + CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* + $ H( K+2, K ) ) +* + IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ + $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + $ ( ABS( H( K, K ) )+ABS( H( K+1, + $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. Use +* . the old one with trepidation. ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 20 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), + $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), + $ V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 40 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 30 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* + $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 30 CONTINUE + 40 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 50 J = MAX( K+1, KTOP ), JBOT + REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 50 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 90 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 60 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) + H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) + 60 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 70 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) + U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) + 70 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 80 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) + Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF ( V( 1, M22 ).NE.ZERO ) THEN + DO 100 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) + 100 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 110 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ + $ V( 2, M22 )*U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*V( 2, M22 ) + 110 CONTINUE + ELSE IF( WANTZ ) THEN + DO 120 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) + 120 CONTINUE + END IF + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 130 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) + IF( TST1.EQ.ZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + ABS( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + ABS( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + ABS( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) + END IF + IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H11 = MAX( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 130 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 140 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*V( 2, M ) + H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M ) + 140 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 150 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 160 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 170 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 180 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 180 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21**T ==== +* + CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11**T ==== +* + CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H to bottom of WH ==== +* + CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21**T ==== +* + CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 190 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 200 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 210 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL DLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 210 CONTINUE + END IF + END IF + END IF + 220 CONTINUE +* +* ==== End of DLAQR5 ==== +* + END diff --git a/dspl/liblapack/SRC/dlaqsb.f b/dspl/liblapack/SRC/dlaqsb.f new file mode 100644 index 0000000..3a6ef70 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqsb.f @@ -0,0 +1,226 @@ +*> \brief \b DLAQSB scales a symmetric/Hermitian band matrix, using scaling factors computed by spbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQSB equilibrates a symmetric band matrix A using the scaling +*> factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSB +* + END diff --git a/dspl/liblapack/SRC/dlaqsp.f b/dspl/liblapack/SRC/dlaqsp.f new file mode 100644 index 0000000..5f25d1e --- /dev/null +++ b/dspl/liblapack/SRC/dlaqsp.f @@ -0,0 +1,212 @@ +*> \brief \b DLAQSP scales a symmetric/Hermitian matrix in packed storage, using scaling factors computed by sppequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQSP equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSP +* + END diff --git a/dspl/liblapack/SRC/dlaqsy.f b/dspl/liblapack/SRC/dlaqsy.f new file mode 100644 index 0000000..a3ed114 --- /dev/null +++ b/dspl/liblapack/SRC/dlaqsy.f @@ -0,0 +1,216 @@ +*> \brief \b DLAQSY scales a symmetric/Hermitian matrix, using scaling factors computed by spoequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQSY equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYauxiliary +* +* ===================================================================== + SUBROUTINE DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of DLAQSY +* + END diff --git a/dspl/liblapack/SRC/dlaqtr.f b/dspl/liblapack/SRC/dlaqtr.f new file mode 100644 index 0000000..71c441f --- /dev/null +++ b/dspl/liblapack/SRC/dlaqtr.f @@ -0,0 +1,748 @@ +*> \brief \b DLAQTR solves a real quasi-triangular system of equations, or a complex quasi-triangular system of special form, in real arithmetic. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAQTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LREAL, LTRAN +* INTEGER INFO, LDT, N +* DOUBLE PRECISION SCALE, W +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAQTR solves the real quasi-triangular system +*> +*> op(T)*p = scale*c, if LREAL = .TRUE. +*> +*> or the complex quasi-triangular systems +*> +*> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. +*> +*> in real arithmetic, where T is upper quasi-triangular. +*> If LREAL = .FALSE., then the first diagonal block of T must be +*> 1 by 1, B is the specially structured matrix +*> +*> B = [ b(1) b(2) ... b(n) ] +*> [ w ] +*> [ w ] +*> [ . ] +*> [ w ] +*> +*> op(A) = A or A**T, A**T denotes the transpose of +*> matrix A. +*> +*> On input, X = [ c ]. On output, X = [ p ]. +*> [ d ] [ q ] +*> +*> This subroutine is designed for the condition number estimation +*> in routine DTRSNA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRAN +*> \verbatim +*> LTRAN is LOGICAL +*> On entry, LTRAN specifies the option of conjugate transpose: +*> = .FALSE., op(T+i*B) = T+i*B, +*> = .TRUE., op(T+i*B) = (T+i*B)**T. +*> \endverbatim +*> +*> \param[in] LREAL +*> \verbatim +*> LREAL is LOGICAL +*> On entry, LREAL specifies the input matrix structure: +*> = .FALSE., the input is complex +*> = .TRUE., the input is real +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of T+i*B. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> On entry, T contains a matrix in Schur canonical form. +*> If LREAL = .FALSE., then the first diagonal block of T mu +*> be 1 by 1. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the matrix T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (N) +*> On entry, B contains the elements to form the matrix +*> B as described above. +*> If LREAL = .TRUE., B is not referenced. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION +*> On entry, W is the diagonal element of the matrix B. +*> If LREAL = .TRUE., W is not referenced. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, SCALE is the scale factor. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (2*N) +*> On entry, X contains the right hand side of the system. +*> On exit, X is overwritten by the solution. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO is set to +*> 0: successful exit. +*> 1: the some diagonal 1 by 1 block has been perturbed by +*> a small number SMIN to keep nonsingularity. +*> 2: the some diagonal 2 by 2 block has been perturbed by +*> a small number in DLALN2 to keep nonsingularity. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL LREAL, LTRAN + INTEGER INFO, LDT, N + DOUBLE PRECISION SCALE, W +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, J1, J2, JNEXT, K, N1, N2 + DOUBLE PRECISION BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW, + $ SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z +* .. +* .. Local Arrays .. + DOUBLE PRECISION D( 2, 2 ), V( 2, 2 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH, DLANGE + EXTERNAL IDAMAX, DASUM, DDOT, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLADIV, DLALN2, DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Do not test the input parameters for errors +* + NOTRAN = .NOT.LTRAN + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM +* + XNORM = DLANGE( 'M', N, N, T, LDT, D ) + IF( .NOT.LREAL ) + $ XNORM = MAX( XNORM, ABS( W ), DLANGE( 'M', N, 1, B, N, D ) ) + SMIN = MAX( SMLNUM, EPS*XNORM ) +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 10 J = 2, N + WORK( J ) = DASUM( J-1, T( 1, J ), 1 ) + 10 CONTINUE +* + IF( .NOT.LREAL ) THEN + DO 20 I = 2, N + WORK( I ) = WORK( I ) + ABS( B( I ) ) + 20 CONTINUE + END IF +* + N2 = 2*N + N1 = N + IF( .NOT.LREAL ) + $ N1 = N2 + K = IDAMAX( N1, X, 1 ) + XMAX = ABS( X( K ) ) + SCALE = ONE +* + IF( XMAX.GT.BIGNUM ) THEN + SCALE = BIGNUM / XMAX + CALL DSCAL( N1, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( LREAL ) THEN +* + IF( NOTRAN ) THEN +* +* Solve T*p = scale*c +* + JNEXT = N + DO 30 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 30 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* Meet 1 by 1 diagonal block +* +* Scale to avoid overflow when computing +* x(j) = b(j)/T(j,j) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 30 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XJ = ABS( X( J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + K = IDAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* +* Call 2 by 2 linear system solve, to take +* care of possible overflow by scaling factor. +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) +* +* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) +* to avoid overflow in updating right-hand side. +* + XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update right-hand side +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) + K = IDAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + END IF +* + 30 CONTINUE +* + ELSE +* +* Solve T**T*p = scale*c +* + JNEXT = 1 + DO 40 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 40 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 ) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XMAX = MAX( XMAX, ABS( X( J1 ) ) ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side elements by inner product. +* + XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )* + $ REC ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX ) +* + END IF + 40 CONTINUE + END IF +* + ELSE +* + SMINW = MAX( EPS*ABS( W ), SMIN ) + IF( NOTRAN ) THEN +* +* Solve (T + iB)*(p+iq) = c+id +* + JNEXT = N + DO 70 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 70 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in division +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 70 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL DLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI ) + X( J1 ) = SR + X( N+J1 ) = SI + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) +* + XMAX = ZERO + DO 50 K = 1, J1 - 1 + XMAX = MAX( XMAX, ABS( X( K ) )+ + $ ABS( X( K+N ) ) ) + 50 CONTINUE + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + D( 1, 2 ) = X( N+J1 ) + D( 2, 2 ) = X( N+J2 ) + CALL DLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, -W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( 2*N, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) +* +* Scale X(J1), .... to avoid overflow in +* updating right hand side. +* + XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ), + $ ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update the right-hand side. +* + IF( J1.GT.1 ) THEN + CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) +* + CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + CALL DAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + + $ B( J2 )*X( N+J2 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) - + $ B( J2 )*X( J2 ) +* + XMAX = ZERO + DO 60 K = 1, J1 - 1 + XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ), + $ XMAX ) + 60 CONTINUE + END IF +* + END IF + 70 CONTINUE +* + ELSE +* +* Solve (T + iB)**T*(p+iq) = c+id +* + JNEXT = 1 + DO 80 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 80 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 ) + X( N+J1 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + IF( J1.GT.1 ) THEN + X( J1 ) = X( J1 ) - B( J1 )*X( N+1 ) + X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 ) + END IF + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) +* +* Scale if necessary to avoid overflow in +* complex division +* + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL DLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI ) + X( J1 ) = SR + X( J1+N ) = SI + XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XJ ) / XMAX ) THEN + CALL DSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) + D( 1, 2 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + D( 2, 2 ) = X( N+J2 ) - DDOT( J1-1, T( 1, J2 ), 1, + $ X( N+1 ), 1 ) + D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 ) + D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 ) + D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 ) + D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 ) +* + CALL DLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL DSCAL( N2, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) + XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX ) +* + END IF +* + 80 CONTINUE +* + END IF +* + END IF +* + RETURN +* +* End of DLAQTR +* + END diff --git a/dspl/liblapack/SRC/dlar1v.f b/dspl/liblapack/SRC/dlar1v.f new file mode 100644 index 0000000..3fa7178 --- /dev/null +++ b/dspl/liblapack/SRC/dlar1v.f @@ -0,0 +1,486 @@ +*> \brief \b DLAR1V computes the (scaled) r-th column of the inverse of the submatrix in rows b1 through bn of the tridiagonal matrix LDLT - λI. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAR1V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, +* PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, +* R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* .. Scalar Arguments .. +* LOGICAL WANTNC +* INTEGER B1, BN, N, NEGCNT, R +* DOUBLE PRECISION GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, +* $ RQCORR, ZTZ +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ) +* DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ), +* $ WORK( * ) +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAR1V computes the (scaled) r-th column of the inverse of +*> the sumbmatrix in rows B1 through BN of the tridiagonal matrix +*> L D L**T - sigma I. When sigma is close to an eigenvalue, the +*> computed vector is an accurate eigenvector. Usually, r corresponds +*> to the index where the eigenvector is largest in magnitude. +*> The following steps accomplish this computation : +*> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, +*> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, +*> (c) Computation of the diagonal elements of the inverse of +*> L D L**T - sigma I by combining the above transforms, and choosing +*> r as the index where the diagonal of the inverse is (one of the) +*> largest in magnitude. +*> (d) Computation of the (scaled) r-th column of the inverse using the +*> twisted factorization obtained by combining the top part of the +*> the stationary and the bottom part of the progressive transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix L D L**T. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is INTEGER +*> First index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] BN +*> \verbatim +*> BN is INTEGER +*> Last index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is DOUBLE PRECISION +*> The shift. In order to compute an accurate eigenvector, +*> LAMBDA should be a good approximation to an eigenvalue +*> of L D L**T. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal matrix +*> L, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is DOUBLE PRECISION array, dimension (N-1) +*> The n-1 elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is DOUBLE PRECISION array, dimension (N-1) +*> The n-1 elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] GAPTOL +*> \verbatim +*> GAPTOL is DOUBLE PRECISION +*> Tolerance that indicates when eigenvector entries are negligible +*> w.r.t. their contribution to the residual. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On input, all entries of Z must be set to 0. +*> On output, Z contains the (scaled) r-th column of the +*> inverse. The scaling is such that Z(R) equals 1. +*> \endverbatim +*> +*> \param[in] WANTNC +*> \verbatim +*> WANTNC is LOGICAL +*> Specifies whether NEGCNT has to be computed. +*> \endverbatim +*> +*> \param[out] NEGCNT +*> \verbatim +*> NEGCNT is INTEGER +*> If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin +*> in the matrix factorization L D L**T, and NEGCNT = -1 otherwise. +*> \endverbatim +*> +*> \param[out] ZTZ +*> \verbatim +*> ZTZ is DOUBLE PRECISION +*> The square of the 2-norm of Z. +*> \endverbatim +*> +*> \param[out] MINGMA +*> \verbatim +*> MINGMA is DOUBLE PRECISION +*> The reciprocal of the largest (in magnitude) diagonal +*> element of the inverse of L D L**T - sigma I. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization used to +*> compute Z. +*> On input, 0 <= R <= N. If R is input as 0, R is set to +*> the index where (L D L**T - sigma I)^{-1} is largest +*> in magnitude. If 1 <= R <= N, R is unchanged. +*> On output, R contains the twist index used to compute Z. +*> Ideally, R designates the position of the maximum entry in the +*> eigenvector. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension (2) +*> The support of the vector in Z, i.e., the vector Z is +*> nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +*> \endverbatim +*> +*> \param[out] NRMINV +*> \verbatim +*> NRMINV is DOUBLE PRECISION +*> NRMINV = 1/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RESID +*> \verbatim +*> RESID is DOUBLE PRECISION +*> The residual of the FP vector. +*> RESID = ABS( MINGMA )/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RQCORR +*> \verbatim +*> RQCORR is DOUBLE PRECISION +*> The Rayleigh Quotient correction to LAMBDA. +*> RQCORR = MINGMA*TMP +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, + $ PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, + $ R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTNC + INTEGER B1, BN, N, NEGCNT, R + DOUBLE PRECISION GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, + $ RQCORR, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + +* .. +* .. Local Scalars .. + LOGICAL SAWNAN1, SAWNAN2 + INTEGER I, INDLPL, INDP, INDS, INDUMN, NEG1, NEG2, R1, + $ R2 + DOUBLE PRECISION DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + LOGICAL DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL DISNAN, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Precision' ) + + + IF( R.EQ.0 ) THEN + R1 = B1 + R2 = BN + ELSE + R1 = R + R2 = R + END IF + +* Storage for LPLUS + INDLPL = 0 +* Storage for UMINUS + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS+B1-1 ) = LLD( B1-1 ) + END IF + +* +* Compute the stationary transform (using the differential form) +* until the index R2. +* + SAWNAN1 = .FALSE. + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 50 I = B1, R1 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 50 CONTINUE + SAWNAN1 = DISNAN( S ) + IF( SAWNAN1 ) GOTO 60 + DO 51 I = R1, R2 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 51 CONTINUE + SAWNAN1 = DISNAN( S ) +* + 60 CONTINUE + IF( SAWNAN1 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 70 I = B1, R1 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 70 CONTINUE + DO 71 I = R1, R2 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 71 CONTINUE + END IF +* +* Compute the progressive transform (using the differential form) +* until the index R1 +* + SAWNAN2 = .FALSE. + NEG2 = 0 + WORK( INDP+BN-1 ) = D( BN ) - LAMBDA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + SAWNAN2 = DISNAN( TMP ) + + IF( SAWNAN2 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG2 = 0 + DO 100 I = BN-1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + IF(ABS(DMINUS).LT.PIVMIN) DMINUS = -PIVMIN + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + IF( TMP.EQ.ZERO ) + $ WORK( INDP+I-1 ) = D( I ) - LAMBDA + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.LT.ZERO ) NEG1 = NEG1 + 1 + IF( WANTNC ) THEN + NEGCNT = NEG1 + NEG2 + ELSE + NEGCNT = -1 + ENDIF + IF( ABS(MINGMA).EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LE.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the FP vector: solve N^T v = e_r +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = ONE + ZTZ = ONE +* +* Compute the FP vector upwards from R +* + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 210 I = R-1, B1, -1 + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GOTO 220 + ENDIF + ZTZ = ZTZ + Z( I )*Z( I ) + 210 CONTINUE + 220 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 230 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GO TO 240 + END IF + ZTZ = ZTZ + Z( I )*Z( I ) + 230 CONTINUE + 240 CONTINUE + ENDIF + +* Compute the FP vector downwards from R in blocks of size BLKSIZ + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 250 I = R, BN-1 + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 260 + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 250 CONTINUE + 260 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 270 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 280 + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 270 CONTINUE + 280 CONTINUE + END IF +* +* Compute quantities for convergence test +* + TMP = ONE / ZTZ + NRMINV = SQRT( TMP ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP +* +* + RETURN +* +* End of DLAR1V +* + END diff --git a/dspl/liblapack/SRC/dlar2v.f b/dspl/liblapack/SRC/dlar2v.f new file mode 100644 index 0000000..32c77bf --- /dev/null +++ b/dspl/liblapack/SRC/dlar2v.f @@ -0,0 +1,157 @@ +*> \brief \b DLAR2V applies a vector of plane rotations with real cosines and real sines from both sides to a sequence of 2-by-2 symmetric/Hermitian matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAR2V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAR2V applies a vector of real plane rotations from both sides to +*> a sequence of 2-by-2 real symmetric matrices, defined by the elements +*> of the vectors x, y and z. For i = 1,2,...,n +*> +*> ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) +*> ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> The vector y. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> The vector z. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X, Y and Z. INCX > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + DOUBLE PRECISION CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IX ) + ZI = Z( IX ) + CI = C( IC ) + SI = S( IC ) + T1 = SI*ZI + T2 = CI*ZI + T3 = T2 - SI*XI + T4 = T2 + SI*YI + T5 = CI*XI + T1 + T6 = CI*YI - T1 + X( IX ) = CI*T5 + SI*T4 + Y( IX ) = CI*T6 - SI*T3 + Z( IX ) = CI*T4 - SI*T5 + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE +* +* End of DLAR2V +* + RETURN + END diff --git a/dspl/liblapack/SRC/dlarf.f b/dspl/liblapack/SRC/dlarf.f new file mode 100644 index 0000000..e99d0bb --- /dev/null +++ b/dspl/liblapack/SRC/dlarf.f @@ -0,0 +1,227 @@ +*> \brief \b DLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARF applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILADLR, ILADLC + EXTERNAL LSAME, ILADLR, ILADLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILADLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILADLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + CALL DGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T +* + CALL DGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL DGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T +* + CALL DGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of DLARF +* + END diff --git a/dspl/liblapack/SRC/dlarfb.f b/dspl/liblapack/SRC/dlarfb.f new file mode 100644 index 0000000..5b2cc2b --- /dev/null +++ b/dspl/liblapack/SRC/dlarfb.f @@ -0,0 +1,710 @@ +*> \brief \b DLARFB applies a block reflector or its transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFB applies a real block reflector H or its transpose H**T to a +*> real m by n matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The triangular k by k matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2013 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2013 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C1**T +* + DO 10 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**T * V2 +* + CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C2**T +* + DO 70 J = 1, K + CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T * V1 +* + CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**T +* + CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C1**T +* + DO 130 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**T * V2**T +* + CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**T +* + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2**T +* + CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C2**T +* + DO 190 J = 1, K + CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T * V1**T +* + CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**T * W**T +* + CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**T +* + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**T +* + CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of DLARFB +* + END diff --git a/dspl/liblapack/SRC/dlarfg.f b/dspl/liblapack/SRC/dlarfg.f new file mode 100644 index 0000000..be33f93 --- /dev/null +++ b/dspl/liblapack/SRC/dlarfg.f @@ -0,0 +1,196 @@ +*> \brief \b DLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFG generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, and x is an (n-1)-element real +*> vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> +*> Otherwise 1 <= tau <= 2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 + EXTERNAL DLAMCH, DLAPY2, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + 10 CONTINUE + KNT = KNT + 1 + CALL DSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DNRM2( N-1, X, INCX ) + BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + TAU = ( BETA-ALPHA ) / BETA + CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of DLARFG +* + END diff --git a/dspl/liblapack/SRC/dlarfgp.f b/dspl/liblapack/SRC/dlarfgp.f new file mode 100644 index 0000000..d040a8c --- /dev/null +++ b/dspl/liblapack/SRC/dlarfgp.f @@ -0,0 +1,242 @@ +*> \brief \b DLARFGP generates an elementary reflector (Householder matrix) with non-negative beta. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFGP generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, beta is non-negative, and x is +*> an (n-1)-element real vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARFGP( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION ALPHA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2, DNRM2 + EXTERNAL DLAMCH, DLAPY2, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = [+/-1, 0; I], sign chosen so ALPHA >= 0 +* + IF( ALPHA.GE.ZERO ) THEN +* When TAU.eq.ZERO, the vector is special-cased to be +* all zeros in the application routines. We do not need +* to clear it. + TAU = ZERO + ELSE +* However, the application routines rely on explicit +* zero checks when TAU.ne.ZERO, and we must clear X. + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = 0 + END DO + ALPHA = -ALPHA + END IF + ELSE +* +* general case +* + BETA = SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SMLNUM ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + BIGNUM = ONE / SMLNUM + 10 CONTINUE + KNT = KNT + 1 + CALL DSCAL( N-1, BIGNUM, X, INCX ) + BETA = BETA*BIGNUM + ALPHA = ALPHA*BIGNUM + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SMLNUM +* + XNORM = DNRM2( N-1, X, INCX ) + BETA = SIGN( DLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + SAVEALPHA = ALPHA + ALPHA = ALPHA + BETA + IF( BETA.LT.ZERO ) THEN + BETA = -BETA + TAU = -ALPHA / BETA + ELSE + ALPHA = XNORM * (XNORM/ALPHA) + TAU = ALPHA / BETA + ALPHA = -ALPHA + END IF +* + IF ( ABS(TAU).LE.SMLNUM ) THEN +* +* In the case where the computed TAU ends up being a denormalized number, +* it loses relative accuracy. This is a BIG problem. Solution: flush TAU +* to ZERO. This explains the next IF statement. +* +* (Bug report provided by Pat Quillen from MathWorks on Jul 29, 2009.) +* (Thanks Pat. Thanks MathWorks.) +* + IF( SAVEALPHA.GE.ZERO ) THEN + TAU = ZERO + ELSE + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = 0 + END DO + BETA = -SAVEALPHA + END IF +* + ELSE +* +* This is the general case. +* + CALL DSCAL( N-1, ONE / ALPHA, X, INCX ) +* + END IF +* +* If BETA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SMLNUM + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of DLARFGP +* + END diff --git a/dspl/liblapack/SRC/dlarft.f b/dspl/liblapack/SRC/dlarft.f new file mode 100644 index 0000000..e69a6b7 --- /dev/null +++ b/dspl/liblapack/SRC/dlarft.f @@ -0,0 +1,326 @@ +*> \brief \b DLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL DGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL DGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, ONE, + $ T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL DGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL DGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of DLARFT +* + END diff --git a/dspl/liblapack/SRC/dlarfx.f b/dspl/liblapack/SRC/dlarfx.f new file mode 100644 index 0000000..260d367 --- /dev/null +++ b/dspl/liblapack/SRC/dlarfx.f @@ -0,0 +1,697 @@ +*> \brief \b DLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the reflector has order ≤ 10. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARFX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFX applies a real elementary reflector H to a real m by n +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix +*> +*> This version uses inline code if H has order < 11. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (M) if SIDE = 'L' +*> or (N) if SIDE = 'R' +*> The vector v in the representation of H. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDA >= (1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> WORK is not referenced if H has order < 11. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J + DOUBLE PRECISION SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* + CALL DLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* + CALL DLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 CONTINUE + RETURN +* +* End of DLARFX +* + END diff --git a/dspl/liblapack/SRC/dlarfy.f b/dspl/liblapack/SRC/dlarfy.f new file mode 100644 index 0000000..a0b0ebb --- /dev/null +++ b/dspl/liblapack/SRC/dlarfy.f @@ -0,0 +1,161 @@ +*> \brief \b DLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n symmetric matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup double_eig +* +* ===================================================================== + SUBROUTINE DLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0, HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSYMV, DSYR2 +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT + EXTERNAL DDOT +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL DSYMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*DDOT( N, WORK, 1, V, INCV ) + CALL DAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL DSYR2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of DLARFY +* + END diff --git a/dspl/liblapack/SRC/dlargv.f b/dspl/liblapack/SRC/dlargv.f new file mode 100644 index 0000000..f28bcec --- /dev/null +++ b/dspl/liblapack/SRC/dlargv.f @@ -0,0 +1,167 @@ +*> \brief \b DLARGV generates a vector of plane rotations with real cosines and real sines. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARGV generates a vector of real plane rotations, determined by +*> elements of the real vectors x and y. For i = 1,2,...,n +*> +*> ( c(i) s(i) ) ( x(i) ) = ( a(i) ) +*> ( -s(i) c(i) ) ( y(i) ) = ( 0 ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be generated. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> On entry, the vector x. +*> On exit, x(i) is overwritten by a(i), for i = 1,...,n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCY) +*> On entry, the vector y. +*> On exit, the sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IC, IX, IY + DOUBLE PRECISION F, G, T, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + F = X( IX ) + G = Y( IY ) + IF( G.EQ.ZERO ) THEN + C( IC ) = ONE + ELSE IF( F.EQ.ZERO ) THEN + C( IC ) = ZERO + Y( IY ) = ONE + X( IX ) = G + ELSE IF( ABS( F ).GT.ABS( G ) ) THEN + T = G / F + TT = SQRT( ONE+T*T ) + C( IC ) = ONE / TT + Y( IY ) = T*C( IC ) + X( IX ) = F*TT + ELSE + T = F / G + TT = SQRT( ONE+T*T ) + Y( IY ) = ONE / TT + C( IC ) = T*Y( IY ) + X( IX ) = G*TT + END IF + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 10 CONTINUE + RETURN +* +* End of DLARGV +* + END diff --git a/dspl/liblapack/SRC/dlarnv.f b/dspl/liblapack/SRC/dlarnv.f new file mode 100644 index 0000000..02e62bc --- /dev/null +++ b/dspl/liblapack/SRC/dlarnv.f @@ -0,0 +1,178 @@ +*> \brief \b DLARNV returns a vector of random numbers from a uniform or normal distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARNV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARNV( IDIST, ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER IDIST, N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARNV returns a vector of n random real numbers from a uniform or +*> normal distribution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDIST +*> \verbatim +*> IDIST is INTEGER +*> Specifies the distribution of the random numbers: +*> = 1: uniform (0,1) +*> = 2: uniform (-1,1) +*> = 3: normal (0,1) +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine calls the auxiliary routine DLARUV to generate random +*> real numbers from a uniform (0,1) distribution, in batches of up to +*> 128 using vectorisable code. The Box-Muller method is used to +*> transform numbers from a uniform to a normal distribution. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D+0, TWO = 2.0D+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + DOUBLE PRECISION TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IL2, IV +* .. +* .. Local Arrays .. + DOUBLE PRECISION U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC COS, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL DLARUV +* .. +* .. Executable Statements .. +* + DO 40 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) + IF( IDIST.EQ.3 ) THEN + IL2 = 2*IL + ELSE + IL2 = IL + END IF +* +* Call DLARUV to generate IL2 numbers from a uniform (0,1) +* distribution (IL2 <= LV) +* + CALL DLARUV( ISEED, IL2, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = U( I ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = TWO*U( I ) - ONE + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ COS( TWOPI*U( 2*I ) ) + 30 CONTINUE + END IF + 40 CONTINUE + RETURN +* +* End of DLARNV +* + END diff --git a/dspl/liblapack/SRC/dlarra.f b/dspl/liblapack/SRC/dlarra.f new file mode 100644 index 0000000..7406a8f --- /dev/null +++ b/dspl/liblapack/SRC/dlarra.f @@ -0,0 +1,210 @@ +*> \brief \b DLARRA computes the splitting points with the specified threshold. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, +* NSPLIT, ISPLIT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N, NSPLIT +* DOUBLE PRECISION SPLTOL, TNRM +* .. +* .. Array Arguments .. +* INTEGER ISPLIT( * ) +* DOUBLE PRECISION D( * ), E( * ), E2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Compute the splitting points with threshold SPLTOL. +*> DLARRA sets any "small" off-diagonal elements to zero. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal +*> matrix T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) need not be set. +*> On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, +*> are set to zero, the other entries of E are untouched. +*> \endverbatim +*> +*> \param[in,out] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N) +*> On entry, the first (N-1) entries contain the SQUARES of the +*> subdiagonal elements of the tridiagonal matrix T; +*> E2(N) need not be set. +*> On exit, the entries E2( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, have been set to zero +*> \endverbatim +*> +*> \param[in] SPLTOL +*> \verbatim +*> SPLTOL is DOUBLE PRECISION +*> The threshold for splitting. Two criteria can be used: +*> SPLTOL<0 : criterion based on absolute off-diagonal value +*> SPLTOL>0 : criterion that preserves relative accuracy +*> \endverbatim +*> +*> \param[in] TNRM +*> \verbatim +*> TNRM is DOUBLE PRECISION +*> The norm of the matrix. +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of blocks T splits into. 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRA( N, D, E, E2, SPLTOL, TNRM, + $ NSPLIT, ISPLIT, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, N, NSPLIT + DOUBLE PRECISION SPLTOL, TNRM +* .. +* .. Array Arguments .. + INTEGER ISPLIT( * ) + DOUBLE PRECISION D( * ), E( * ), E2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION EABS, TMP1 + +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Compute splitting points + NSPLIT = 1 + IF(SPLTOL.LT.ZERO) THEN +* Criterion based on absolute off-diagonal value + TMP1 = ABS(SPLTOL)* TNRM + DO 9 I = 1, N-1 + EABS = ABS( E(I) ) + IF( EABS .LE. TMP1) THEN + E(I) = ZERO + E2(I) = ZERO + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 9 CONTINUE + ELSE +* Criterion that guarantees relative accuracy + DO 10 I = 1, N-1 + EABS = ABS( E(I) ) + IF( EABS .LE. SPLTOL * SQRT(ABS(D(I)))*SQRT(ABS(D(I+1))) ) + $ THEN + E(I) = ZERO + E2(I) = ZERO + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 10 CONTINUE + ENDIF + ISPLIT( NSPLIT ) = N + + RETURN +* +* End of DLARRA +* + END diff --git a/dspl/liblapack/SRC/dlarrb.f b/dspl/liblapack/SRC/dlarrb.f new file mode 100644 index 0000000..2b6389e --- /dev/null +++ b/dspl/liblapack/SRC/dlarrb.f @@ -0,0 +1,407 @@ +*> \brief \b DLARRB provides limited bisection to locate eigenvalues for more accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRB( N, D, LLD, IFIRST, ILAST, RTOL1, +* RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, +* PIVMIN, SPDIAM, TWIST, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST +* DOUBLE PRECISION PIVMIN, RTOL1, RTOL2, SPDIAM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), LLD( * ), W( * ), +* $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the relatively robust representation(RRR) L D L^T, DLARRB +*> does "limited" bisection to refine the eigenvalues of L D L^T, +*> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial +*> guesses for these eigenvalues are input in W, the corresponding estimate +*> of the error in these guesses and their gaps are input in WERR +*> and WGAP, respectively. During bisection, intervals +*> [left, right] are maintained by storing their mid-points and +*> semi-widths in the arrays W and WERR respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is DOUBLE PRECISION array, dimension (N-1) +*> The (N-1) elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] IFIRST +*> \verbatim +*> IFIRST is INTEGER +*> The index of the first eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] ILAST +*> \verbatim +*> ILAST is INTEGER +*> The index of the last eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is DOUBLE PRECISION +*> Tolerance for the convergence of the bisection intervals. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> where GAP is the (estimated) distance to the nearest +*> eigenvalue. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET +*> through ILAST-OFFSET elements of these arrays are to be used. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are +*> estimates of the eigenvalues of L D L^T indexed IFIRST through +*> ILAST. +*> On output, these estimates are refined. +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is DOUBLE PRECISION array, dimension (N-1) +*> On input, the (estimated) gaps between consecutive +*> eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between +*> eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST +*> then WGAP(IFIRST-OFFSET) must be set to ZERO. +*> On output, these gaps are refined. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are +*> the errors in the estimates of the corresponding elements in W. +*> On output, these errors are refined. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is DOUBLE PRECISION +*> The spectral diameter of the matrix. +*> \endverbatim +*> +*> \param[in] TWIST +*> \verbatim +*> TWIST is INTEGER +*> The twist index for the twisted factorization that is used +*> for the negcount. +*> TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T +*> TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T +*> TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Error flag. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRB( N, D, LLD, IFIRST, ILAST, RTOL1, + $ RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, + $ PIVMIN, SPDIAM, TWIST, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST + DOUBLE PRECISION PIVMIN, RTOL1, RTOL2, SPDIAM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), LLD( * ), W( * ), + $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0D0, TWO = 2.0D0, + $ HALF = 0.5D0 ) + INTEGER MAXITR +* .. +* .. Local Scalars .. + INTEGER I, I1, II, IP, ITER, K, NEGCNT, NEXT, NINT, + $ OLNINT, PREV, R + DOUBLE PRECISION BACK, CVRGD, GAP, LEFT, LGAP, MID, MNWDTH, + $ RGAP, RIGHT, TMP, WIDTH +* .. +* .. External Functions .. + INTEGER DLANEG + EXTERNAL DLANEG +* +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + MNWDTH = TWO * PIVMIN +* + R = TWIST + IF((R.LT.1).OR.(R.GT.N)) R = N +* +* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. +* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while +* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) +* for an unconverged interval is set to the index of the next unconverged +* interval, and is -1 or 0 for a converged interval. Thus a linked +* list of unconverged intervals is set up. +* + I1 = IFIRST +* The number of unconverged intervals + NINT = 0 +* The last unconverged interval found + PREV = 0 + + RGAP = WGAP( I1-OFFSET ) + DO 75 I = I1, ILAST + K = 2*I + II = I - OFFSET + LEFT = W( II ) - WERR( II ) + RIGHT = W( II ) + WERR( II ) + LGAP = RGAP + RGAP = WGAP( II ) + GAP = MIN( LGAP, RGAP ) + +* Make sure that [LEFT,RIGHT] contains the desired eigenvalue +* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT +* +* Do while( NEGCNT(LEFT).GT.I-1 ) +* + BACK = WERR( II ) + 20 CONTINUE + NEGCNT = DLANEG( N, D, LLD, LEFT, PIVMIN, R ) + IF( NEGCNT.GT.I-1 ) THEN + LEFT = LEFT - BACK + BACK = TWO*BACK + GO TO 20 + END IF +* +* Do while( NEGCNT(RIGHT).LT.I ) +* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT +* + BACK = WERR( II ) + 50 CONTINUE + + NEGCNT = DLANEG( N, D, LLD, RIGHT, PIVMIN, R ) + IF( NEGCNT.LT.I ) THEN + RIGHT = RIGHT + BACK + BACK = TWO*BACK + GO TO 50 + END IF + WIDTH = HALF*ABS( LEFT - RIGHT ) + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + CVRGD = MAX(RTOL1*GAP,RTOL2*TMP) + IF( WIDTH.LE.CVRGD .OR. WIDTH.LE.MNWDTH ) THEN +* This interval has already converged and does not need refinement. +* (Note that the gaps might change through refining the +* eigenvalues, however, they can only get bigger.) +* Remove it from the list. + IWORK( K-1 ) = -1 +* Make sure that I1 always points to the first unconverged interval + IF((I.EQ.I1).AND.(I.LT.ILAST)) I1 = I + 1 + IF((PREV.GE.I1).AND.(I.LE.ILAST)) IWORK( 2*PREV-1 ) = I + 1 + ELSE +* unconverged interval found + PREV = I + NINT = NINT + 1 + IWORK( K-1 ) = I + 1 + IWORK( K ) = NEGCNT + END IF + WORK( K-1 ) = LEFT + WORK( K ) = RIGHT + 75 CONTINUE + +* +* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals +* and while (ITER.LT.MAXITR) +* + ITER = 0 + 80 CONTINUE + PREV = I1 - 1 + I = I1 + OLNINT = NINT + + DO 100 IP = 1, OLNINT + K = 2*I + II = I - OFFSET + RGAP = WGAP( II ) + LGAP = RGAP + IF(II.GT.1) LGAP = WGAP( II-1 ) + GAP = MIN( LGAP, RGAP ) + NEXT = IWORK( K-1 ) + LEFT = WORK( K-1 ) + RIGHT = WORK( K ) + MID = HALF*( LEFT + RIGHT ) + +* semiwidth of interval + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + CVRGD = MAX(RTOL1*GAP,RTOL2*TMP) + IF( ( WIDTH.LE.CVRGD ) .OR. ( WIDTH.LE.MNWDTH ).OR. + $ ( ITER.EQ.MAXITR ) )THEN +* reduce number of unconverged intervals + NINT = NINT - 1 +* Mark interval as converged. + IWORK( K-1 ) = 0 + IF( I1.EQ.I ) THEN + I1 = NEXT + ELSE +* Prev holds the last unconverged interval previously examined + IF(PREV.GE.I1) IWORK( 2*PREV-1 ) = NEXT + END IF + I = NEXT + GO TO 100 + END IF + PREV = I +* +* Perform one bisection step +* + NEGCNT = DLANEG( N, D, LLD, MID, PIVMIN, R ) + IF( NEGCNT.LE.I-1 ) THEN + WORK( K-1 ) = MID + ELSE + WORK( K ) = MID + END IF + I = NEXT + 100 CONTINUE + ITER = ITER + 1 +* do another loop if there are still unconverged intervals +* However, in the last iteration, all intervals are accepted +* since this is the best we can do. + IF( ( NINT.GT.0 ).AND.(ITER.LE.MAXITR) ) GO TO 80 +* +* +* At this point, all the intervals have converged + DO 110 I = IFIRST, ILAST + K = 2*I + II = I - OFFSET +* All intervals marked by '0' have been refined. + IF( IWORK( K-1 ).EQ.0 ) THEN + W( II ) = HALF*( WORK( K-1 )+WORK( K ) ) + WERR( II ) = WORK( K ) - W( II ) + END IF + 110 CONTINUE +* + DO 111 I = IFIRST+1, ILAST + K = 2*I + II = I - OFFSET + WGAP( II-1 ) = MAX( ZERO, + $ W(II) - WERR (II) - W( II-1 ) - WERR( II-1 )) + 111 CONTINUE + + RETURN +* +* End of DLARRB +* + END diff --git a/dspl/liblapack/SRC/dlarrc.f b/dspl/liblapack/SRC/dlarrc.f new file mode 100644 index 0000000..093bfa0 --- /dev/null +++ b/dspl/liblapack/SRC/dlarrc.f @@ -0,0 +1,251 @@ +*> \brief \b DLARRC computes the number of eigenvalues of the symmetric tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, +* EIGCNT, LCNT, RCNT, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBT +* INTEGER EIGCNT, INFO, LCNT, N, RCNT +* DOUBLE PRECISION PIVMIN, VL, VU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Find the number of eigenvalues of the symmetric tridiagonal matrix T +*> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T +*> if JOBT = 'L'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> = 'T': Compute Sturm count for matrix T. +*> = 'L': Compute Sturm count for matrix L D L^T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> The lower bound for the eigenvalues. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> The upper bound for the eigenvalues. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. +*> JOBT = 'L': The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> JOBT = 'T': The N-1 offdiagonal elements of the matrix T. +*> JOBT = 'L': The N-1 offdiagonal elements of the matrix L. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[out] EIGCNT +*> \verbatim +*> EIGCNT is INTEGER +*> The number of eigenvalues of the symmetric tridiagonal matrix T +*> that are in the interval (VL,VU] +*> \endverbatim +*> +*> \param[out] LCNT +*> \verbatim +*> LCNT is INTEGER +*> \endverbatim +*> +*> \param[out] RCNT +*> \verbatim +*> RCNT is INTEGER +*> The left and right negcounts of the interval. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRC( JOBT, N, VL, VU, D, E, PIVMIN, + $ EIGCNT, LCNT, RCNT, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBT + INTEGER EIGCNT, INFO, LCNT, N, RCNT + DOUBLE PRECISION PIVMIN, VL, VU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I + LOGICAL MATT + DOUBLE PRECISION LPIVOT, RPIVOT, SL, SU, TMP, TMP2 + +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + LCNT = 0 + RCNT = 0 + EIGCNT = 0 + MATT = LSAME( JOBT, 'T' ) + + + IF (MATT) THEN +* Sturm sequence count on T + LPIVOT = D( 1 ) - VL + RPIVOT = D( 1 ) - VU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + DO 10 I = 1, N-1 + TMP = E(I)**2 + LPIVOT = ( D( I+1 )-VL ) - TMP/LPIVOT + RPIVOT = ( D( I+1 )-VU ) - TMP/RPIVOT + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + 10 CONTINUE + ELSE +* Sturm sequence count on L D L^T + SL = -VL + SU = -VU + DO 20 I = 1, N - 1 + LPIVOT = D( I ) + SL + RPIVOT = D( I ) + SU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + TMP = E(I) * D(I) * E(I) +* + TMP2 = TMP / LPIVOT + IF( TMP2.EQ.ZERO ) THEN + SL = TMP - VL + ELSE + SL = SL*TMP2 - VL + END IF +* + TMP2 = TMP / RPIVOT + IF( TMP2.EQ.ZERO ) THEN + SU = TMP - VU + ELSE + SU = SU*TMP2 - VU + END IF + 20 CONTINUE + LPIVOT = D( N ) + SL + RPIVOT = D( N ) + SU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + ENDIF + EIGCNT = RCNT - LCNT + + RETURN +* +* end of DLARRC +* + END diff --git a/dspl/liblapack/SRC/dlarrd.f b/dspl/liblapack/SRC/dlarrd.f new file mode 100644 index 0000000..5cc1052 --- /dev/null +++ b/dspl/liblapack/SRC/dlarrd.f @@ -0,0 +1,869 @@ +*> \brief \b DLARRD computes the eigenvalues of a symmetric tridiagonal matrix to suitable accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, +* RELTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, +* M, W, WERR, WL, WU, IBLOCK, INDEXW, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ORDER, RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* DOUBLE PRECISION PIVMIN, RELTOL, VL, VU, WL, WU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), +* $ ISPLIT( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), E2( * ), +* $ GERS( * ), W( * ), WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARRD computes the eigenvalues of a symmetric tridiagonal +*> matrix T to suitable accuracy. This is an auxiliary code to be +*> called from DSTEMR. +*> The user may ask for all eigenvalues, all eigenvalues +*> in the half-open interval (VL, VU], or the IL-th through IU-th +*> eigenvalues. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] ORDER +*> \verbatim +*> ORDER is CHARACTER*1 +*> = 'B': ("By Block") the eigenvalues will be grouped by +*> split-off block (see IBLOCK, ISPLIT) and +*> ordered from smallest to largest within +*> the block. +*> = 'E': ("Entire matrix") +*> the eigenvalues for the entire matrix +*> will be ordered from smallest to +*> largest. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is DOUBLE PRECISION array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is DOUBLE PRECISION +*> The minimum relative width of an interval. When an interval +*> is narrower than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) squared off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot allowed in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of diagonal blocks in the matrix T. +*> 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> (Only the first NSPLIT elements will actually be used, but +*> since the user cannot know a priori what value NSPLIT will +*> have, N words must be reserved for ISPLIT.) +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The actual number of eigenvalues found. 0 <= M <= N. +*> (See also the description of INFO=2,3.) +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On exit, the first M elements of W will contain the +*> eigenvalue approximations. DLARRD computes an interval +*> I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue +*> approximation is given as the interval midpoint +*> W(j)= ( a_j + b_j)/2. The corresponding error is bounded by +*> WERR(j) = abs( a_j - b_j)/2 +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> The error bound on the corresponding eigenvalue approximation +*> in W. +*> \endverbatim +*> +*> \param[out] WL +*> \verbatim +*> WL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] WU +*> \verbatim +*> WU is DOUBLE PRECISION +*> The interval (WL, WU] contains all the wanted eigenvalues. +*> If RANGE='V', then WL=VL and WU=VU. +*> If RANGE='A', then WL and WU are the global Gerschgorin bounds +*> on the spectrum. +*> If RANGE='I', then WL and WU are computed by DLAEBZ from the +*> index range specified. +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> At each row/column j where E(j) is zero or small, the +*> matrix T is considered to split into a block diagonal +*> matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +*> block (from 1 to the number of blocks) the eigenvalue W(i) +*> belongs. (DLARRD may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= j and IBLOCK(i)=k imply that the +*> i-th eigenvalue W(i) is the j-th eigenvalue in block k. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: some or all of the eigenvalues failed to converge or +*> were not computed: +*> =1 or 3: Bisection failed to converge for some +*> eigenvalues; these eigenvalues are flagged by a +*> negative block number. The effect is that the +*> eigenvalues may not be as accurate as the +*> absolute and relative tolerances. This is +*> generally caused by unexpectedly inaccurate +*> arithmetic. +*> =2 or 3: RANGE='I' only: Not all of the eigenvalues +*> IL:IU were found. +*> Effect: M < IU+1-IL +*> Cause: non-monotonic arithmetic, causing the +*> Sturm sequence to be non-monotonic. +*> Cure: recalculate, using RANGE='A', and pick +*> out eigenvalues IL:IU. In some cases, +*> increasing the PARAMETER "FUDGE" may +*> make things work. +*> = 4: RANGE='I', and the Gershgorin interval +*> initially used was too small. No eigenvalues +*> were computed. +*> Probable cause: your machine has sloppy +*> floating-point arithmetic. +*> Cure: Increase the PARAMETER "FUDGE", +*> recompile, and try again. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> FUDGE DOUBLE PRECISION, default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. Ideally, +*> a value of 1 should work, but on machines with sloppy +*> arithmetic, this needs to be larger. The default for +*> publicly released versions should be large enough to handle +*> the worst machine around. Note that this has no effect +*> on accuracy of the solution. +*> \endverbatim +*> +*> \par Contributors: +* ================== +*> +*> W. Kahan, University of California, Berkeley, USA \n +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, + $ RELTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, + $ M, W, WERR, WL, WU, IBLOCK, INDEXW, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + DOUBLE PRECISION PIVMIN, RELTOL, VL, VU, WL, WU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), + $ ISPLIT( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), E2( * ), + $ GERS( * ), W( * ), WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, HALF, FUDGE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, HALF = ONE/TWO, + $ FUDGE = TWO ) + INTEGER ALLRNG, VALRNG, INDRNG + PARAMETER ( ALLRNG = 1, VALRNG = 2, INDRNG = 3 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER I, IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IOUT, IRANGE, ITMAX, ITMP1, + $ ITMP2, IW, IWOFF, J, JBLK, JDISC, JE, JEE, NB, + $ NWL, NWU + DOUBLE PRECISION ATOLI, EPS, GL, GU, RTOLI, TMP1, TMP2, + $ TNORM, UFLOW, WKILL, WLU, WUL + +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, ILAENV, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAEBZ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = ALLRNG + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = VALRNG + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = INDRNG + ELSE + IRANGE = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( .NOT.(LSAME(ORDER,'B').OR.LSAME(ORDER,'E')) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.VALRNG ) THEN + IF( VL.GE.VU ) + $ INFO = -5 + ELSE IF( IRANGE.EQ.INDRNG .AND. + $ ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IRANGE.EQ.INDRNG .AND. + $ ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + +* Initialize error flags + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. + +* Quick return if possible + M = 0 + IF( N.EQ.0 ) RETURN + +* Simplification: + IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 + +* Get machine constants + EPS = DLAMCH( 'P' ) + UFLOW = DLAMCH( 'U' ) + + +* Special Case when N=1 +* Treat case of 1x1 matrix for quick return + IF( N.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR. + $ ((IRANGE.EQ.VALRNG).AND.(D(1).GT.VL).AND.(D(1).LE.VU)).OR. + $ ((IRANGE.EQ.INDRNG).AND.(IL.EQ.1).AND.(IU.EQ.1)) ) THEN + M = 1 + W(1) = D(1) +* The computation error of the eigenvalue is zero + WERR(1) = ZERO + IBLOCK( 1 ) = 1 + INDEXW( 1 ) = 1 + ENDIF + RETURN + END IF + +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. + NB = ILAENV( 1, 'DSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = 0 + +* Find global spectral radius + GL = D(1) + GU = D(1) + DO 5 I = 1,N + GL = MIN( GL, GERS( 2*I - 1)) + GU = MAX( GU, GERS(2*I) ) + 5 CONTINUE +* Compute global Gerschgorin bounds and spectral diameter + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*EPS*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*EPS*N + FUDGE*TWO*PIVMIN +* [JAN/28/2009] remove the line below since SPDIAM variable not use +* SPDIAM = GU - GL +* Input arguments for DLAEBZ: +* The relative tolerance. An interval (a,b] lies within +* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), + RTOLI = RELTOL +* Set the absolute tolerance for interval convergence to zero to force +* interval convergence based on relative size of the interval. +* This is dangerous because intervals might not converge when RELTOL is +* small. But at least a very small number should be selected so that for +* strongly graded matrices, the code can get relatively accurate +* eigenvalues. + ATOLI = FUDGE*TWO*UFLOW + FUDGE*TWO*PIVMIN + + IF( IRANGE.EQ.INDRNG ) THEN + +* RANGE='I': Compute an interval containing eigenvalues +* IL through IU. The initial interval [GL,GU] from the global +* Gerschgorin bounds GL and GU is refined by DLAEBZ. + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, + $ D, E, E2, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* On exit, output intervals may not be ordered by ascending negcount + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* On exit, the interval [WL, WLU] contains a value with negcount NWL, +* and [WUL, WU] contains a value with negcount NWU. + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + + ELSEIF( IRANGE.EQ.VALRNG ) THEN + WL = VL + WU = VU + + ELSEIF( IRANGE.EQ.ALLRNG ) THEN + WL = GL + WU = GU + ENDIF + + + +* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JBLK = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JBLK ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* 1x1 block + IF( WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.ALLRNG .OR. + $ ( WL.LT.D( IBEGIN )-PIVMIN + $ .AND. WU.GE. D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + WERR(M) = ZERO +* The gap for a single block doesn't matter for the later +* algorithm and is assigned an arbitrary large value + IBLOCK( M ) = JBLK + INDEXW( M ) = 1 + END IF + +* Disabled 2x2 case because of a failure on the following matrix +* RANGE = 'I', IL = IU = 4 +* Original Tridiagonal, d = [ +* -0.150102010615740E+00 +* -0.849897989384260E+00 +* -0.128208148052635E-15 +* 0.128257718286320E-15 +* ]; +* e = [ +* -0.357171383266986E+00 +* -0.180411241501588E-15 +* -0.175152352710251E-15 +* ]; +* +* ELSE IF( IN.EQ.2 ) THEN +** 2x2 block +* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) +* TMP1 = HALF*(D(IBEGIN)+D(IEND)) +* L1 = TMP1 - DISC +* IF( WL.GE. L1-PIVMIN ) +* $ NWL = NWL + 1 +* IF( WU.GE. L1-PIVMIN ) +* $ NWU = NWU + 1 +* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. +* $ L1-PIVMIN ) ) THEN +* M = M + 1 +* W( M ) = L1 +** The uncertainty of eigenvalues of a 2x2 matrix is very small +* WERR( M ) = EPS * ABS( W( M ) ) * TWO +* IBLOCK( M ) = JBLK +* INDEXW( M ) = 1 +* ENDIF +* L2 = TMP1 + DISC +* IF( WL.GE. L2-PIVMIN ) +* $ NWL = NWL + 1 +* IF( WU.GE. L2-PIVMIN ) +* $ NWU = NWU + 1 +* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. +* $ L2-PIVMIN ) ) THEN +* M = M + 1 +* W( M ) = L2 +** The uncertainty of eigenvalues of a 2x2 matrix is very small +* WERR( M ) = EPS * ABS( W( M ) ) * TWO +* IBLOCK( M ) = JBLK +* INDEXW( M ) = 2 +* ENDIF + ELSE +* General Case - block of size IN >= 2 +* Compute local Gerschgorin interval and use it as the initial +* interval for DLAEBZ + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO + + DO 40 J = IBEGIN, IEND + GL = MIN( GL, GERS( 2*J - 1)) + GU = MAX( GU, GERS(2*J) ) + 40 CONTINUE +* [JAN/28/2009] +* change SPDIAM by TNORM in lines 2 and 3 thereafter +* line 1: remove computation of SPDIAM (not useful anymore) +* SPDIAM = GU - GL +* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN +* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN + GL = GL - FUDGE*TNORM*EPS*IN - FUDGE*PIVMIN + GU = GU + FUDGE*TNORM*EPS*IN + FUDGE*PIVMIN +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN +* the local block contains none of the wanted eigenvalues + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF +* refine search interval if possible, only range (WL,WU] matters + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF + +* Find negcount of initial interval boundaries GL and GU + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), E2( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) + +* Compute Eigenvalues + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), E2( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Copy eigenvalues into W and IBLOCK +* Use -JBLK for block number for unconverged eigenvalues. +* Loop over the number of output intervals from DLAEBZ + DO 60 J = 1, IOUT +* eigenvalue approximation is middle point of interval + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* semi length of error interval + TMP2 = HALF*ABS( WORK( J+N )-WORK( J+IN+N ) ) + IF( J.GT.IOUT-IINFO ) THEN +* Flag non-convergence. + NCNVRG = .TRUE. + IB = -JBLK + ELSE + IB = JBLK + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + WERR( JE ) = TMP2 + INDEXW( JE ) = JE - IWOFF + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE + +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. + IF( IRANGE.EQ.INDRNG ) THEN + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 ) THEN + IM = 0 + DO 80 JE = 1, M +* Remove some of the smallest eigenvalues from the left so that +* at the end IDISCL =0. Move all eigenvalues up to the left. + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCU.GT.0 ) THEN +* Remove some of the largest eigenvalues from the right so that +* at the end IDISCU =0. Move all eigenvalues up to the left. + IM=M+1 + DO 81 JE = M, 1, -1 + IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM - 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 81 CONTINUE + JEE = 0 + DO 82 JE = IM, M + JEE = JEE + 1 + W( JEE ) = W( JE ) + WERR( JEE ) = WERR( JE ) + INDEXW( JEE ) = INDEXW( JE ) + IBLOCK( JEE ) = IBLOCK( JE ) + 82 CONTINUE + M = M-IM+1 + END IF + + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* Code to deal with effects of bad arithmetic. (If N(w) is +* monotone non-decreasing, this should never happen.) +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by marking the corresponding IBLOCK = 0 + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GE.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF +* Now erase all eigenvalues with IBLOCK set to zero + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* + IF(( IRANGE.EQ.ALLRNG .AND. M.NE.N ).OR. + $ ( IRANGE.EQ.INDRNG .AND. M.NE.IU-IL+1 ) ) THEN + TOOFEW = .TRUE. + END IF + +* If ORDER='B', do nothing the eigenvalues are already sorted by +* block. +* If ORDER='E', sort the eigenvalues from smallest to largest + + IF( LSAME(ORDER,'E') .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE + IF( IE.NE.0 ) THEN + TMP2 = WERR( IE ) + ITMP1 = IBLOCK( IE ) + ITMP2 = INDEXW( IE ) + W( IE ) = W( JE ) + WERR( IE ) = WERR( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + INDEXW( IE ) = INDEXW( JE ) + W( JE ) = TMP1 + WERR( JE ) = TMP2 + IBLOCK( JE ) = ITMP1 + INDEXW( JE ) = ITMP2 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of DLARRD +* + END diff --git a/dspl/liblapack/SRC/dlarre.f b/dspl/liblapack/SRC/dlarre.f new file mode 100644 index 0000000..0613efb --- /dev/null +++ b/dspl/liblapack/SRC/dlarre.f @@ -0,0 +1,904 @@ +*> \brief \b DLARRE given the tridiagonal matrix T, sets small off-diagonal elements to zero and for each unreduced block Ti, finds base representations and eigenvalues. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, +* RTOL1, RTOL2, SPLTOL, NSPLIT, ISPLIT, M, +* W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* DOUBLE PRECISION PIVMIN, RTOL1, RTOL2, SPLTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ), +* $ INDEXW( * ) +* DOUBLE PRECISION D( * ), E( * ), E2( * ), GERS( * ), +* $ W( * ),WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> To find the desired eigenvalues of a given real symmetric +*> tridiagonal matrix T, DLARRE sets any "small" off-diagonal +*> elements to zero, and for each unreduced block T_i, it finds +*> (a) a suitable shift at one end of the block's spectrum, +*> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and +*> (c) eigenvalues of each L_i D_i L_i^T. +*> The representations and eigenvalues found are then used by +*> DSTEMR to compute the eigenvectors of T. +*> The accuracy varies depending on whether bisection is used to +*> find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to +*> conpute all and then discard any unwanted one. +*> As an added benefit, DLARRE also outputs the n +*> Gerschgorin intervals for the matrices L_i D_i L_i^T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound for the eigenvalues. +*> Eigenvalues less than or equal to VL, or greater than VU, +*> will not be returned. VL < VU. +*> If RANGE='I' or ='A', DLARRE computes bounds on the desired +*> part of the spectrum. +*> \endverbatim +*> +*> \param[in,out] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound for the eigenvalues. +*> Eigenvalues less than or equal to VL, or greater than VU, +*> will not be returned. VL < VU. +*> If RANGE='I' or ='A', DLARRE computes bounds on the desired +*> part of the spectrum. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal +*> matrix T. +*> On exit, the N diagonal elements of the diagonal +*> matrices D_i. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) need not be set. +*> On exit, E contains the subdiagonal elements of the unit +*> bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, contain the base points sigma_i on output. +*> \endverbatim +*> +*> \param[in,out] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N) +*> On entry, the first (N-1) entries contain the SQUARES of the +*> subdiagonal elements of the tridiagonal matrix T; +*> E2(N) need not be set. +*> On exit, the entries E2( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, have been set to zero +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is DOUBLE PRECISION +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in] SPLTOL +*> \verbatim +*> SPLTOL is DOUBLE PRECISION +*> The threshold for splitting. +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of blocks T splits into. 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues (of all L_i D_i L_i^T) +*> found. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the eigenvalues. The +*> eigenvalues of each of the blocks, L_i D_i L_i^T, are +*> sorted in ascending order ( DLARRE may use the +*> remaining N-M elements as workspace). +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> The error bound on the corresponding eigenvalue in W. +*> \endverbatim +*> +*> \param[out] WGAP +*> \verbatim +*> WGAP is DOUBLE PRECISION array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> The gap is only with respect to the eigenvalues of the same block +*> as each block has its own representation tree. +*> Exception: at the right end of a block we store the left gap +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[out] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 +*> \endverbatim +*> +*> \param[out] GERS +*> \verbatim +*> GERS is DOUBLE PRECISION array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). +*> \endverbatim +*> +*> \param[out] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (6*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: A problem occurred in DLARRE. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in DLARRD. +*> = 2: No base representation could be found in MAXTRY iterations. +*> Increasing MAXTRY and recompilation might be a remedy. +*> =-3: Problem in DLARRB when computing the refined root +*> representation for DLASQ2. +*> =-4: Problem in DLARRB when preforming bisection on the +*> desired part of the spectrum. +*> =-5: Problem in DLASQ2. +*> =-6: Problem in DLASQ2. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The base representations are required to suffer very little +*> element growth and consequently define all their eigenvalues to +*> high relative accuracy. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE DLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, + $ RTOL1, RTOL2, SPLTOL, NSPLIT, ISPLIT, M, + $ W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + DOUBLE PRECISION PIVMIN, RTOL1, RTOL2, SPLTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ), + $ INDEXW( * ) + DOUBLE PRECISION D( * ), E( * ), E2( * ), GERS( * ), + $ W( * ),WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION FAC, FOUR, FOURTH, FUDGE, HALF, HNDRD, + $ MAXGROWTH, ONE, PERT, TWO, ZERO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, FOUR=4.0D0, + $ HNDRD = 100.0D0, + $ PERT = 8.0D0, + $ HALF = ONE/TWO, FOURTH = ONE/FOUR, FAC= HALF, + $ MAXGROWTH = 64.0D0, FUDGE = 2.0D0 ) + INTEGER MAXTRY, ALLRNG, INDRNG, VALRNG + PARAMETER ( MAXTRY = 6, ALLRNG = 1, INDRNG = 2, + $ VALRNG = 3 ) +* .. +* .. Local Scalars .. + LOGICAL FORCEB, NOREP, USEDQD + INTEGER CNT, CNT1, CNT2, I, IBEGIN, IDUM, IEND, IINFO, + $ IN, INDL, INDU, IRANGE, J, JBLK, MB, MM, + $ WBEGIN, WEND + DOUBLE PRECISION AVGAP, BSRTOL, CLWDTH, DMAX, DPIVOT, EABS, + $ EMAX, EOLD, EPS, GL, GU, ISLEFT, ISRGHT, RTL, + $ RTOL, S1, S2, SAFMIN, SGNDEF, SIGMA, SPDIAM, + $ TAU, TMP, TMP1 + + +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH, LSAME + +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLARNV, DLARRA, DLARRB, DLARRC, DLARRD, + $ DLASQ2, DLARRK +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN + +* .. +* .. Executable Statements .. +* + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = ALLRNG + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = VALRNG + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = INDRNG + END IF + + M = 0 + +* Get machine constants + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'P' ) + +* Set parameters + RTL = SQRT(EPS) + BSRTOL = SQRT(EPS) + +* Treat case of 1x1 matrix for quick return + IF( N.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR. + $ ((IRANGE.EQ.VALRNG).AND.(D(1).GT.VL).AND.(D(1).LE.VU)).OR. + $ ((IRANGE.EQ.INDRNG).AND.(IL.EQ.1).AND.(IU.EQ.1)) ) THEN + M = 1 + W(1) = D(1) +* The computation error of the eigenvalue is zero + WERR(1) = ZERO + WGAP(1) = ZERO + IBLOCK( 1 ) = 1 + INDEXW( 1 ) = 1 + GERS(1) = D( 1 ) + GERS(2) = D( 1 ) + ENDIF +* store the shift for the initial RRR, which is zero in this case + E(1) = ZERO + RETURN + END IF + +* General case: tridiagonal matrix of order > 1 +* +* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. +* Compute maximum off-diagonal entry and pivmin. + GL = D(1) + GU = D(1) + EOLD = ZERO + EMAX = ZERO + E(N) = ZERO + DO 5 I = 1,N + WERR(I) = ZERO + WGAP(I) = ZERO + EABS = ABS( E(I) ) + IF( EABS .GE. EMAX ) THEN + EMAX = EABS + END IF + TMP1 = EABS + EOLD + GERS( 2*I-1) = D(I) - TMP1 + GL = MIN( GL, GERS( 2*I - 1)) + GERS( 2*I ) = D(I) + TMP1 + GU = MAX( GU, GERS(2*I) ) + EOLD = EABS + 5 CONTINUE +* The minimum pivot allowed in the Sturm sequence for T + PIVMIN = SAFMIN * MAX( ONE, EMAX**2 ) +* Compute spectral diameter. The Gerschgorin bounds give an +* estimate that is wrong by at most a factor of SQRT(2) + SPDIAM = GU - GL + +* Compute splitting points + CALL DLARRA( N, D, E, E2, SPLTOL, SPDIAM, + $ NSPLIT, ISPLIT, IINFO ) + +* Can force use of bisection instead of faster DQDS. +* Option left in the code for future multisection work. + FORCEB = .FALSE. + +* Initialize USEDQD, DQDS should be used for ALLRNG unless someone +* explicitly wants bisection. + USEDQD = (( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB)) + + IF( (IRANGE.EQ.ALLRNG) .AND. (.NOT. FORCEB) ) THEN +* Set interval [VL,VU] that contains all eigenvalues + VL = GL + VU = GU + ELSE +* We call DLARRD to find crude approximations to the eigenvalues +* in the desired range. In case IRANGE = INDRNG, we also obtain the +* interval (VL,VU] that contains all the wanted eigenvalues. +* An interval [LEFT,RIGHT] has converged if +* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) +* DLARRD needs a WORK of size 4*N, IWORK of size 3*N + CALL DLARRD( RANGE, 'B', N, VL, VU, IL, IU, GERS, + $ BSRTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, + $ MM, W, WERR, VL, VU, IBLOCK, INDEXW, + $ WORK, IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 + DO 14 I = MM+1,N + W( I ) = ZERO + WERR( I ) = ZERO + IBLOCK( I ) = 0 + INDEXW( I ) = 0 + 14 CONTINUE + END IF + + +*** +* Loop over unreduced blocks + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) + IN = IEND - IBEGIN + 1 + +* 1 X 1 block + IF( IN.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR.( (IRANGE.EQ.VALRNG).AND. + $ ( D( IBEGIN ).GT.VL ).AND.( D( IBEGIN ).LE.VU ) ) + $ .OR. ( (IRANGE.EQ.INDRNG).AND.(IBLOCK(WBEGIN).EQ.JBLK)) + $ ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + WERR(M) = ZERO +* The gap for a single block doesn't matter for the later +* algorithm and is assigned an arbitrary large value + WGAP(M) = ZERO + IBLOCK( M ) = JBLK + INDEXW( M ) = 1 + WBEGIN = WBEGIN + 1 + ENDIF +* E( IEND ) holds the shift for the initial RRR + E( IEND ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + END IF +* +* Blocks of size larger than 1x1 +* +* E( IEND ) will hold the shift for the initial RRR, for now set it =0 + E( IEND ) = ZERO +* +* Find local outer bounds GL,GU for the block + GL = D(IBEGIN) + GU = D(IBEGIN) + DO 15 I = IBEGIN , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 15 CONTINUE + SPDIAM = GU - GL + + IF(.NOT. ((IRANGE.EQ.ALLRNG).AND.(.NOT.FORCEB)) ) THEN +* Count the number of eigenvalues in the current block. + MB = 0 + DO 20 I = WBEGIN,MM + IF( IBLOCK(I).EQ.JBLK ) THEN + MB = MB+1 + ELSE + GOTO 21 + ENDIF + 20 CONTINUE + 21 CONTINUE + + IF( MB.EQ.0) THEN +* No eigenvalue in the current block lies in the desired range +* E( IEND ) holds the shift for the initial RRR + E( IEND ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + ELSE + +* Decide whether dqds or bisection is more efficient + USEDQD = ( (MB .GT. FAC*IN) .AND. (.NOT.FORCEB) ) + WEND = WBEGIN + MB - 1 +* Calculate gaps for the current block +* In later stages, when representations for individual +* eigenvalues are different, we use SIGMA = E( IEND ). + SIGMA = ZERO + DO 30 I = WBEGIN, WEND - 1 + WGAP( I ) = MAX( ZERO, + $ W(I+1)-WERR(I+1) - (W(I)+WERR(I)) ) + 30 CONTINUE + WGAP( WEND ) = MAX( ZERO, + $ VU - SIGMA - (W( WEND )+WERR( WEND ))) +* Find local index of the first and last desired evalue. + INDL = INDEXW(WBEGIN) + INDU = INDEXW( WEND ) + ENDIF + ENDIF + IF(( (IRANGE.EQ.ALLRNG) .AND. (.NOT. FORCEB) ).OR.USEDQD) THEN +* Case of DQDS +* Find approximations to the extremal eigenvalues of the block + CALL DLARRK( IN, 1, GL, GU, D(IBEGIN), + $ E2(IBEGIN), PIVMIN, RTL, TMP, TMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF + ISLEFT = MAX(GL, TMP - TMP1 + $ - HNDRD * EPS* ABS(TMP - TMP1)) + + CALL DLARRK( IN, IN, GL, GU, D(IBEGIN), + $ E2(IBEGIN), PIVMIN, RTL, TMP, TMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF + ISRGHT = MIN(GU, TMP + TMP1 + $ + HNDRD * EPS * ABS(TMP + TMP1)) +* Improve the estimate of the spectral diameter + SPDIAM = ISRGHT - ISLEFT + ELSE +* Case of bisection +* Find approximations to the wanted extremal eigenvalues + ISLEFT = MAX(GL, W(WBEGIN) - WERR(WBEGIN) + $ - HNDRD * EPS*ABS(W(WBEGIN)- WERR(WBEGIN) )) + ISRGHT = MIN(GU,W(WEND) + WERR(WEND) + $ + HNDRD * EPS * ABS(W(WEND)+ WERR(WEND))) + ENDIF + + +* Decide whether the base representation for the current block +* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I +* should be on the left or the right end of the current block. +* The strategy is to shift to the end which is "more populated" +* Furthermore, decide whether to use DQDS for the computation of +* the eigenvalue approximations at the end of DLARRE or bisection. +* dqds is chosen if all eigenvalues are desired or the number of +* eigenvalues to be computed is large compared to the blocksize. + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN +* If all the eigenvalues have to be computed, we use dqd + USEDQD = .TRUE. +* INDL is the local index of the first eigenvalue to compute + INDL = 1 + INDU = IN +* MB = number of eigenvalues to compute + MB = IN + WEND = WBEGIN + MB - 1 +* Define 1/4 and 3/4 points of the spectrum + S1 = ISLEFT + FOURTH * SPDIAM + S2 = ISRGHT - FOURTH * SPDIAM + ELSE +* DLARRD has computed IBLOCK and INDEXW for each eigenvalue +* approximation. +* choose sigma + IF( USEDQD ) THEN + S1 = ISLEFT + FOURTH * SPDIAM + S2 = ISRGHT - FOURTH * SPDIAM + ELSE + TMP = MIN(ISRGHT,VU) - MAX(ISLEFT,VL) + S1 = MAX(ISLEFT,VL) + FOURTH * TMP + S2 = MIN(ISRGHT,VU) - FOURTH * TMP + ENDIF + ENDIF + +* Compute the negcount at the 1/4 and 3/4 points + IF(MB.GT.1) THEN + CALL DLARRC( 'T', IN, S1, S2, D(IBEGIN), + $ E(IBEGIN), PIVMIN, CNT, CNT1, CNT2, IINFO) + ENDIF + + IF(MB.EQ.1) THEN + SIGMA = GL + SGNDEF = ONE + ELSEIF( CNT1 - INDL .GE. INDU - CNT2 ) THEN + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN + SIGMA = MAX(ISLEFT,GL) + ELSEIF( USEDQD ) THEN +* use Gerschgorin bound as shift to get pos def matrix +* for dqds + SIGMA = ISLEFT + ELSE +* use approximation of the first desired eigenvalue of the +* block as shift + SIGMA = MAX(ISLEFT,VL) + ENDIF + SGNDEF = ONE + ELSE + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN + SIGMA = MIN(ISRGHT,GU) + ELSEIF( USEDQD ) THEN +* use Gerschgorin bound as shift to get neg def matrix +* for dqds + SIGMA = ISRGHT + ELSE +* use approximation of the first desired eigenvalue of the +* block as shift + SIGMA = MIN(ISRGHT,VU) + ENDIF + SGNDEF = -ONE + ENDIF + + +* An initial SIGMA has been chosen that will be used for computing +* T - SIGMA I = L D L^T +* Define the increment TAU of the shift in case the initial shift +* needs to be refined to obtain a factorization with not too much +* element growth. + IF( USEDQD ) THEN +* The initial SIGMA was to the outer end of the spectrum +* the matrix is definite and we need not retreat. + TAU = SPDIAM*EPS*N + TWO*PIVMIN + TAU = MAX( TAU,TWO*EPS*ABS(SIGMA) ) + ELSE + IF(MB.GT.1) THEN + CLWDTH = W(WEND) + WERR(WEND) - W(WBEGIN) - WERR(WBEGIN) + AVGAP = ABS(CLWDTH / DBLE(WEND-WBEGIN)) + IF( SGNDEF.EQ.ONE ) THEN + TAU = HALF*MAX(WGAP(WBEGIN),AVGAP) + TAU = MAX(TAU,WERR(WBEGIN)) + ELSE + TAU = HALF*MAX(WGAP(WEND-1),AVGAP) + TAU = MAX(TAU,WERR(WEND)) + ENDIF + ELSE + TAU = WERR(WBEGIN) + ENDIF + ENDIF +* + DO 80 IDUM = 1, MAXTRY +* Compute L D L^T factorization of tridiagonal matrix T - sigma I. +* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of +* pivots in WORK(2*IN+1:3*IN) + DPIVOT = D( IBEGIN ) - SIGMA + WORK( 1 ) = DPIVOT + DMAX = ABS( WORK(1) ) + J = IBEGIN + DO 70 I = 1, IN - 1 + WORK( 2*IN+I ) = ONE / WORK( I ) + TMP = E( J )*WORK( 2*IN+I ) + WORK( IN+I ) = TMP + DPIVOT = ( D( J+1 )-SIGMA ) - TMP*E( J ) + WORK( I+1 ) = DPIVOT + DMAX = MAX( DMAX, ABS(DPIVOT) ) + J = J + 1 + 70 CONTINUE +* check for element growth + IF( DMAX .GT. MAXGROWTH*SPDIAM ) THEN + NOREP = .TRUE. + ELSE + NOREP = .FALSE. + ENDIF + IF( USEDQD .AND. .NOT.NOREP ) THEN +* Ensure the definiteness of the representation +* All entries of D (of L D L^T) must have the same sign + DO 71 I = 1, IN + TMP = SGNDEF*WORK( I ) + IF( TMP.LT.ZERO ) NOREP = .TRUE. + 71 CONTINUE + ENDIF + IF(NOREP) THEN +* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin +* shift which makes the matrix definite. So we should end up +* here really only in the case of IRANGE = VALRNG or INDRNG. + IF( IDUM.EQ.MAXTRY-1 ) THEN + IF( SGNDEF.EQ.ONE ) THEN +* The fudged Gerschgorin shift should succeed + SIGMA = + $ GL - FUDGE*SPDIAM*EPS*N - FUDGE*TWO*PIVMIN + ELSE + SIGMA = + $ GU + FUDGE*SPDIAM*EPS*N + FUDGE*TWO*PIVMIN + END IF + ELSE + SIGMA = SIGMA - SGNDEF * TAU + TAU = TWO * TAU + END IF + ELSE +* an initial RRR is found + GO TO 83 + END IF + 80 CONTINUE +* if the program reaches this point, no base representation could be +* found in MAXTRY iterations. + INFO = 2 + RETURN + + 83 CONTINUE +* At this point, we have found an initial base representation +* T - SIGMA I = L D L^T with not too much element growth. +* Store the shift. + E( IEND ) = SIGMA +* Store D and L. + CALL DCOPY( IN, WORK, 1, D( IBEGIN ), 1 ) + CALL DCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 ) + + + IF(MB.GT.1 ) THEN +* +* Perturb each entry of the base representation by a small +* (but random) relative amount to overcome difficulties with +* glued matrices. +* + DO 122 I = 1, 4 + ISEED( I ) = 1 + 122 CONTINUE + + CALL DLARNV(2, ISEED, 2*IN-1, WORK(1)) + DO 125 I = 1,IN-1 + D(IBEGIN+I-1) = D(IBEGIN+I-1)*(ONE+EPS*PERT*WORK(I)) + E(IBEGIN+I-1) = E(IBEGIN+I-1)*(ONE+EPS*PERT*WORK(IN+I)) + 125 CONTINUE + D(IEND) = D(IEND)*(ONE+EPS*FOUR*WORK(IN)) +* + ENDIF +* +* Don't update the Gerschgorin intervals because keeping track +* of the updates would be too much work in DLARRV. +* We update W instead and use it to locate the proper Gerschgorin +* intervals. + +* Compute the required eigenvalues of L D L' by bisection or dqds + IF ( .NOT.USEDQD ) THEN +* If DLARRD has been used, shift the eigenvalue approximations +* according to their representation. This is necessary for +* a uniform DLARRV since dqds computes eigenvalues of the +* shifted representation. In DLARRV, W will always hold the +* UNshifted eigenvalue approximation. + DO 134 J=WBEGIN,WEND + W(J) = W(J) - SIGMA + WERR(J) = WERR(J) + ABS(W(J)) * EPS + 134 CONTINUE +* call DLARRB to reduce eigenvalue error of the approximations +* from DLARRD + DO 135 I = IBEGIN, IEND-1 + WORK( I ) = D( I ) * E( I )**2 + 135 CONTINUE +* use bisection to find EV from INDL to INDU + CALL DLARRB(IN, D(IBEGIN), WORK(IBEGIN), + $ INDL, INDU, RTOL1, RTOL2, INDL-1, + $ W(WBEGIN), WGAP(WBEGIN), WERR(WBEGIN), + $ WORK( 2*N+1 ), IWORK, PIVMIN, SPDIAM, + $ IN, IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = -4 + RETURN + END IF +* DLARRB computes all gaps correctly except for the last one +* Record distance to VU/GU + WGAP( WEND ) = MAX( ZERO, + $ ( VU-SIGMA ) - ( W( WEND ) + WERR( WEND ) ) ) + DO 138 I = INDL, INDU + M = M + 1 + IBLOCK(M) = JBLK + INDEXW(M) = I + 138 CONTINUE + ELSE +* Call dqds to get all eigs (and then possibly delete unwanted +* eigenvalues). +* Note that dqds finds the eigenvalues of the L D L^T representation +* of T to high relative accuracy. High relative accuracy +* might be lost when the shift of the RRR is subtracted to obtain +* the eigenvalues of T. However, T is not guaranteed to define its +* eigenvalues to high relative accuracy anyway. +* Set RTOL to the order of the tolerance used in DLASQ2 +* This is an ESTIMATED error, the worst case bound is 4*N*EPS +* which is usually too large and requires unnecessary work to be +* done by bisection when computing the eigenvectors + RTOL = LOG(DBLE(IN)) * FOUR * EPS + J = IBEGIN + DO 140 I = 1, IN - 1 + WORK( 2*I-1 ) = ABS( D( J ) ) + WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 ) + J = J + 1 + 140 CONTINUE + WORK( 2*IN-1 ) = ABS( D( IEND ) ) + WORK( 2*IN ) = ZERO + CALL DLASQ2( IN, WORK, IINFO ) + IF( IINFO .NE. 0 ) THEN +* If IINFO = -5 then an index is part of a tight cluster +* and should be changed. The index is in IWORK(1) and the +* gap is in WORK(N+1) + INFO = -5 + RETURN + ELSE +* Test that all eigenvalues are positive as expected + DO 149 I = 1, IN + IF( WORK( I ).LT.ZERO ) THEN + INFO = -6 + RETURN + ENDIF + 149 CONTINUE + END IF + IF( SGNDEF.GT.ZERO ) THEN + DO 150 I = INDL, INDU + M = M + 1 + W( M ) = WORK( IN-I+1 ) + IBLOCK( M ) = JBLK + INDEXW( M ) = I + 150 CONTINUE + ELSE + DO 160 I = INDL, INDU + M = M + 1 + W( M ) = -WORK( I ) + IBLOCK( M ) = JBLK + INDEXW( M ) = I + 160 CONTINUE + END IF + + DO 165 I = M - MB + 1, M +* the value of RTOL below should be the tolerance in DLASQ2 + WERR( I ) = RTOL * ABS( W(I) ) + 165 CONTINUE + DO 166 I = M - MB + 1, M - 1 +* compute the right gap between the intervals + WGAP( I ) = MAX( ZERO, + $ W(I+1)-WERR(I+1) - (W(I)+WERR(I)) ) + 166 CONTINUE + WGAP( M ) = MAX( ZERO, + $ ( VU-SIGMA ) - ( W( M ) + WERR( M ) ) ) + END IF +* proceed with next block + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* end of DLARRE +* + END diff --git a/dspl/liblapack/SRC/dlarrf.f b/dspl/liblapack/SRC/dlarrf.f new file mode 100644 index 0000000..f814ee1 --- /dev/null +++ b/dspl/liblapack/SRC/dlarrf.f @@ -0,0 +1,495 @@ +*> \brief \b DLARRF finds a new relatively robust representation such that at least one of the eigenvalues is relatively isolated. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRF( N, D, L, LD, CLSTRT, CLEND, +* W, WGAP, WERR, +* SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, +* DPLUS, LPLUS, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CLSTRT, CLEND, INFO, N +* DOUBLE PRECISION CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DPLUS( * ), L( * ), LD( * ), +* $ LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the initial representation L D L^T and its cluster of close +*> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... +*> W( CLEND ), DLARRF finds a new relatively robust representation +*> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the +*> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix (subblock, if the matrix split). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is DOUBLE PRECISION array, dimension (N-1) +*> The (N-1) subdiagonal elements of the unit bidiagonal +*> matrix L. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is DOUBLE PRECISION array, dimension (N-1) +*> The (N-1) elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] CLSTRT +*> \verbatim +*> CLSTRT is INTEGER +*> The index of the first eigenvalue in the cluster. +*> \endverbatim +*> +*> \param[in] CLEND +*> \verbatim +*> CLEND is INTEGER +*> The index of the last eigenvalue in the cluster. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> The eigenvalue APPROXIMATIONS of L D L^T in ascending order. +*> W( CLSTRT ) through W( CLEND ) form the cluster of relatively +*> close eigenalues. +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is DOUBLE PRECISION array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> WERR contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue APPROXIMATION in W +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is DOUBLE PRECISION +*> estimate of the spectral diameter obtained from the +*> Gerschgorin intervals +*> \endverbatim +*> +*> \param[in] CLGAPL +*> \verbatim +*> CLGAPL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] CLGAPR +*> \verbatim +*> CLGAPR is DOUBLE PRECISION +*> absolute gap on each end of the cluster. +*> Set by the calling routine to protect against shifts too close +*> to eigenvalues outside the cluster. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> The shift used to form L(+) D(+) L(+)^T. +*> \endverbatim +*> +*> \param[out] DPLUS +*> \verbatim +*> DPLUS is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of the diagonal matrix D(+). +*> \endverbatim +*> +*> \param[out] LPLUS +*> \verbatim +*> LPLUS is DOUBLE PRECISION array, dimension (N-1) +*> The first (N-1) elements of LPLUS contain the subdiagonal +*> elements of the unit bidiagonal matrix L(+). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Signals processing OK (=0) or failure (=1) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRF( N, D, L, LD, CLSTRT, CLEND, + $ W, WGAP, WERR, + $ SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, + $ DPLUS, LPLUS, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CLSTRT, CLEND, INFO, N + DOUBLE PRECISION CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DPLUS( * ), L( * ), LD( * ), + $ LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION FOUR, MAXGROWTH1, MAXGROWTH2, ONE, QUART, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0, FOUR = 4.0D0, + $ QUART = 0.25D0, + $ MAXGROWTH1 = 8.D0, + $ MAXGROWTH2 = 8.D0 ) +* .. +* .. Local Scalars .. + LOGICAL DORRR1, FORCER, NOFAIL, SAWNAN1, SAWNAN2, TRYRRR1 + INTEGER I, INDX, KTRY, KTRYMAX, SLEFT, SRIGHT, SHIFT + PARAMETER ( KTRYMAX = 1, SLEFT = 1, SRIGHT = 2 ) + DOUBLE PRECISION AVGAP, BESTSHIFT, CLWDTH, EPS, FACT, FAIL, + $ FAIL2, GROWTHBOUND, LDELTA, LDMAX, LSIGMA, + $ MAX1, MAX2, MINGAP, OLDP, PROD, RDELTA, RDMAX, + $ RRR1, RRR2, RSIGMA, S, SMLGROWTH, TMP, ZNM2 +* .. +* .. External Functions .. + LOGICAL DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL DISNAN, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + FACT = DBLE(2**KTRYMAX) + EPS = DLAMCH( 'Precision' ) + SHIFT = 0 + FORCER = .FALSE. + + +* Note that we cannot guarantee that for any of the shifts tried, +* the factorization has a small or even moderate element growth. +* There could be Ritz values at both ends of the cluster and despite +* backing off, there are examples where all factorizations tried +* (in IEEE mode, allowing zero pivots & infinities) have INFINITE +* element growth. +* For this reason, we should use PIVMIN in this subroutine so that at +* least the L D L^T factorization exists. It can be checked afterwards +* whether the element growth caused bad residuals/orthogonality. + +* Decide whether the code should accept the best among all +* representations despite large element growth or signal INFO=1 +* Setting NOFAIL to .FALSE. for quick fix for bug 113 + NOFAIL = .FALSE. +* + +* Compute the average gap length of the cluster + CLWDTH = ABS(W(CLEND)-W(CLSTRT)) + WERR(CLEND) + WERR(CLSTRT) + AVGAP = CLWDTH / DBLE(CLEND-CLSTRT) + MINGAP = MIN(CLGAPL, CLGAPR) +* Initial values for shifts to both ends of cluster + LSIGMA = MIN(W( CLSTRT ),W( CLEND )) - WERR( CLSTRT ) + RSIGMA = MAX(W( CLSTRT ),W( CLEND )) + WERR( CLEND ) + +* Use a small fudge to make sure that we really shift to the outside + LSIGMA = LSIGMA - ABS(LSIGMA)* FOUR * EPS + RSIGMA = RSIGMA + ABS(RSIGMA)* FOUR * EPS + +* Compute upper bounds for how much to back off the initial shifts + LDMAX = QUART * MINGAP + TWO * PIVMIN + RDMAX = QUART * MINGAP + TWO * PIVMIN + + LDELTA = MAX(AVGAP,WGAP( CLSTRT ))/FACT + RDELTA = MAX(AVGAP,WGAP( CLEND-1 ))/FACT +* +* Initialize the record of the best representation found +* + S = DLAMCH( 'S' ) + SMLGROWTH = ONE / S + FAIL = DBLE(N-1)*MINGAP/(SPDIAM*EPS) + FAIL2 = DBLE(N-1)*MINGAP/(SPDIAM*SQRT(EPS)) + BESTSHIFT = LSIGMA +* +* while (KTRY <= KTRYMAX) + KTRY = 0 + GROWTHBOUND = MAXGROWTH1*SPDIAM + + 5 CONTINUE + SAWNAN1 = .FALSE. + SAWNAN2 = .FALSE. +* Ensure that we do not back off too much of the initial shifts + LDELTA = MIN(LDMAX,LDELTA) + RDELTA = MIN(RDMAX,RDELTA) + +* Compute the element growth when shifting to both ends of the cluster +* accept the shift if there is no element growth at one of the two ends + +* Left end + S = -LSIGMA + DPLUS( 1 ) = D( 1 ) + S + IF(ABS(DPLUS(1)).LT.PIVMIN) THEN + DPLUS(1) = -PIVMIN +* Need to set SAWNAN1 because refined RRR test should not be used +* in this case + SAWNAN1 = .TRUE. + ENDIF + MAX1 = ABS( DPLUS( 1 ) ) + DO 6 I = 1, N - 1 + LPLUS( I ) = LD( I ) / DPLUS( I ) + S = S*LPLUS( I )*L( I ) - LSIGMA + DPLUS( I+1 ) = D( I+1 ) + S + IF(ABS(DPLUS(I+1)).LT.PIVMIN) THEN + DPLUS(I+1) = -PIVMIN +* Need to set SAWNAN1 because refined RRR test should not be used +* in this case + SAWNAN1 = .TRUE. + ENDIF + MAX1 = MAX( MAX1,ABS(DPLUS(I+1)) ) + 6 CONTINUE + SAWNAN1 = SAWNAN1 .OR. DISNAN( MAX1 ) + + IF( FORCER .OR. + $ (MAX1.LE.GROWTHBOUND .AND. .NOT.SAWNAN1 ) ) THEN + SIGMA = LSIGMA + SHIFT = SLEFT + GOTO 100 + ENDIF + +* Right end + S = -RSIGMA + WORK( 1 ) = D( 1 ) + S + IF(ABS(WORK(1)).LT.PIVMIN) THEN + WORK(1) = -PIVMIN +* Need to set SAWNAN2 because refined RRR test should not be used +* in this case + SAWNAN2 = .TRUE. + ENDIF + MAX2 = ABS( WORK( 1 ) ) + DO 7 I = 1, N - 1 + WORK( N+I ) = LD( I ) / WORK( I ) + S = S*WORK( N+I )*L( I ) - RSIGMA + WORK( I+1 ) = D( I+1 ) + S + IF(ABS(WORK(I+1)).LT.PIVMIN) THEN + WORK(I+1) = -PIVMIN +* Need to set SAWNAN2 because refined RRR test should not be used +* in this case + SAWNAN2 = .TRUE. + ENDIF + MAX2 = MAX( MAX2,ABS(WORK(I+1)) ) + 7 CONTINUE + SAWNAN2 = SAWNAN2 .OR. DISNAN( MAX2 ) + + IF( FORCER .OR. + $ (MAX2.LE.GROWTHBOUND .AND. .NOT.SAWNAN2 ) ) THEN + SIGMA = RSIGMA + SHIFT = SRIGHT + GOTO 100 + ENDIF +* If we are at this point, both shifts led to too much element growth + +* Record the better of the two shifts (provided it didn't lead to NaN) + IF(SAWNAN1.AND.SAWNAN2) THEN +* both MAX1 and MAX2 are NaN + GOTO 50 + ELSE + IF( .NOT.SAWNAN1 ) THEN + INDX = 1 + IF(MAX1.LE.SMLGROWTH) THEN + SMLGROWTH = MAX1 + BESTSHIFT = LSIGMA + ENDIF + ENDIF + IF( .NOT.SAWNAN2 ) THEN + IF(SAWNAN1 .OR. MAX2.LE.MAX1) INDX = 2 + IF(MAX2.LE.SMLGROWTH) THEN + SMLGROWTH = MAX2 + BESTSHIFT = RSIGMA + ENDIF + ENDIF + ENDIF + +* If we are here, both the left and the right shift led to +* element growth. If the element growth is moderate, then +* we may still accept the representation, if it passes a +* refined test for RRR. This test supposes that no NaN occurred. +* Moreover, we use the refined RRR test only for isolated clusters. + IF((CLWDTH.LT.MINGAP/DBLE(128)) .AND. + $ (MIN(MAX1,MAX2).LT.FAIL2) + $ .AND.(.NOT.SAWNAN1).AND.(.NOT.SAWNAN2)) THEN + DORRR1 = .TRUE. + ELSE + DORRR1 = .FALSE. + ENDIF + TRYRRR1 = .TRUE. + IF( TRYRRR1 .AND. DORRR1 ) THEN + IF(INDX.EQ.1) THEN + TMP = ABS( DPLUS( N ) ) + ZNM2 = ONE + PROD = ONE + OLDP = ONE + DO 15 I = N-1, 1, -1 + IF( PROD .LE. EPS ) THEN + PROD = + $ ((DPLUS(I+1)*WORK(N+I+1))/(DPLUS(I)*WORK(N+I)))*OLDP + ELSE + PROD = PROD*ABS(WORK(N+I)) + END IF + OLDP = PROD + ZNM2 = ZNM2 + PROD**2 + TMP = MAX( TMP, ABS( DPLUS( I ) * PROD )) + 15 CONTINUE + RRR1 = TMP/( SPDIAM * SQRT( ZNM2 ) ) + IF (RRR1.LE.MAXGROWTH2) THEN + SIGMA = LSIGMA + SHIFT = SLEFT + GOTO 100 + ENDIF + ELSE IF(INDX.EQ.2) THEN + TMP = ABS( WORK( N ) ) + ZNM2 = ONE + PROD = ONE + OLDP = ONE + DO 16 I = N-1, 1, -1 + IF( PROD .LE. EPS ) THEN + PROD = ((WORK(I+1)*LPLUS(I+1))/(WORK(I)*LPLUS(I)))*OLDP + ELSE + PROD = PROD*ABS(LPLUS(I)) + END IF + OLDP = PROD + ZNM2 = ZNM2 + PROD**2 + TMP = MAX( TMP, ABS( WORK( I ) * PROD )) + 16 CONTINUE + RRR2 = TMP/( SPDIAM * SQRT( ZNM2 ) ) + IF (RRR2.LE.MAXGROWTH2) THEN + SIGMA = RSIGMA + SHIFT = SRIGHT + GOTO 100 + ENDIF + END IF + ENDIF + + 50 CONTINUE + + IF (KTRY.LT.KTRYMAX) THEN +* If we are here, both shifts failed also the RRR test. +* Back off to the outside + LSIGMA = MAX( LSIGMA - LDELTA, + $ LSIGMA - LDMAX) + RSIGMA = MIN( RSIGMA + RDELTA, + $ RSIGMA + RDMAX ) + LDELTA = TWO * LDELTA + RDELTA = TWO * RDELTA + KTRY = KTRY + 1 + GOTO 5 + ELSE +* None of the representations investigated satisfied our +* criteria. Take the best one we found. + IF((SMLGROWTH.LT.FAIL).OR.NOFAIL) THEN + LSIGMA = BESTSHIFT + RSIGMA = BESTSHIFT + FORCER = .TRUE. + GOTO 5 + ELSE + INFO = 1 + RETURN + ENDIF + END IF + + 100 CONTINUE + IF (SHIFT.EQ.SLEFT) THEN + ELSEIF (SHIFT.EQ.SRIGHT) THEN +* store new L and D back into DPLUS, LPLUS + CALL DCOPY( N, WORK, 1, DPLUS, 1 ) + CALL DCOPY( N-1, WORK(N+1), 1, LPLUS, 1 ) + ENDIF + + RETURN +* +* End of DLARRF +* + END diff --git a/dspl/liblapack/SRC/dlarrj.f b/dspl/liblapack/SRC/dlarrj.f new file mode 100644 index 0000000..097ba9f --- /dev/null +++ b/dspl/liblapack/SRC/dlarrj.f @@ -0,0 +1,379 @@ +*> \brief \b DLARRJ performs refinement of the initial estimates of the eigenvalues of the matrix T. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRJ( N, D, E2, IFIRST, ILAST, +* RTOL, OFFSET, W, WERR, WORK, IWORK, +* PIVMIN, SPDIAM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IFIRST, ILAST, INFO, N, OFFSET +* DOUBLE PRECISION PIVMIN, RTOL, SPDIAM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E2( * ), W( * ), +* $ WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the initial eigenvalue approximations of T, DLARRJ +*> does bisection to refine the eigenvalues of T, +*> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial +*> guesses for these eigenvalues are input in W, the corresponding estimate +*> of the error in these guesses in WERR. During bisection, intervals +*> [left, right] are maintained by storing their mid-points and +*> semi-widths in the arrays W and WERR respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N-1) +*> The Squares of the (N-1) subdiagonal elements of T. +*> \endverbatim +*> +*> \param[in] IFIRST +*> \verbatim +*> IFIRST is INTEGER +*> The index of the first eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] ILAST +*> \verbatim +*> ILAST is INTEGER +*> The index of the last eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] RTOL +*> \verbatim +*> RTOL is DOUBLE PRECISION +*> Tolerance for the convergence of the bisection intervals. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET +*> through ILAST-OFFSET elements of these arrays are to be used. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are +*> estimates of the eigenvalues of L D L^T indexed IFIRST through +*> ILAST. +*> On output, these estimates are refined. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are +*> the errors in the estimates of the corresponding elements in W. +*> On output, these errors are refined. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is DOUBLE PRECISION +*> The spectral diameter of T. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Error flag. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRJ( N, D, E2, IFIRST, ILAST, + $ RTOL, OFFSET, W, WERR, WORK, IWORK, + $ PIVMIN, SPDIAM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N, OFFSET + DOUBLE PRECISION PIVMIN, RTOL, SPDIAM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E2( * ), W( * ), + $ WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ HALF = 0.5D0 ) + INTEGER MAXITR +* .. +* .. Local Scalars .. + INTEGER CNT, I, I1, I2, II, ITER, J, K, NEXT, NINT, + $ OLNINT, P, PREV, SAVI1 + DOUBLE PRECISION DPLUS, FAC, LEFT, MID, RIGHT, S, TMP, WIDTH +* +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 +* +* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. +* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while +* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) +* for an unconverged interval is set to the index of the next unconverged +* interval, and is -1 or 0 for a converged interval. Thus a linked +* list of unconverged intervals is set up. +* + + I1 = IFIRST + I2 = ILAST +* The number of unconverged intervals + NINT = 0 +* The last unconverged interval found + PREV = 0 + DO 75 I = I1, I2 + K = 2*I + II = I - OFFSET + LEFT = W( II ) - WERR( II ) + MID = W(II) + RIGHT = W( II ) + WERR( II ) + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + +* The following test prevents the test of converged intervals + IF( WIDTH.LT.RTOL*TMP ) THEN +* This interval has already converged and does not need refinement. +* (Note that the gaps might change through refining the +* eigenvalues, however, they can only get bigger.) +* Remove it from the list. + IWORK( K-1 ) = -1 +* Make sure that I1 always points to the first unconverged interval + IF((I.EQ.I1).AND.(I.LT.I2)) I1 = I + 1 + IF((PREV.GE.I1).AND.(I.LE.I2)) IWORK( 2*PREV-1 ) = I + 1 + ELSE +* unconverged interval found + PREV = I +* Make sure that [LEFT,RIGHT] contains the desired eigenvalue +* +* Do while( CNT(LEFT).GT.I-1 ) +* + FAC = ONE + 20 CONTINUE + CNT = 0 + S = LEFT + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 30 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 30 CONTINUE + IF( CNT.GT.I-1 ) THEN + LEFT = LEFT - WERR( II )*FAC + FAC = TWO*FAC + GO TO 20 + END IF +* +* Do while( CNT(RIGHT).LT.I ) +* + FAC = ONE + 50 CONTINUE + CNT = 0 + S = RIGHT + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 60 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 60 CONTINUE + IF( CNT.LT.I ) THEN + RIGHT = RIGHT + WERR( II )*FAC + FAC = TWO*FAC + GO TO 50 + END IF + NINT = NINT + 1 + IWORK( K-1 ) = I + 1 + IWORK( K ) = CNT + END IF + WORK( K-1 ) = LEFT + WORK( K ) = RIGHT + 75 CONTINUE + + + SAVI1 = I1 +* +* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals +* and while (ITER.LT.MAXITR) +* + ITER = 0 + 80 CONTINUE + PREV = I1 - 1 + I = I1 + OLNINT = NINT + + DO 100 P = 1, OLNINT + K = 2*I + II = I - OFFSET + NEXT = IWORK( K-1 ) + LEFT = WORK( K-1 ) + RIGHT = WORK( K ) + MID = HALF*( LEFT + RIGHT ) + +* semiwidth of interval + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + + IF( ( WIDTH.LT.RTOL*TMP ) .OR. + $ (ITER.EQ.MAXITR) )THEN +* reduce number of unconverged intervals + NINT = NINT - 1 +* Mark interval as converged. + IWORK( K-1 ) = 0 + IF( I1.EQ.I ) THEN + I1 = NEXT + ELSE +* Prev holds the last unconverged interval previously examined + IF(PREV.GE.I1) IWORK( 2*PREV-1 ) = NEXT + END IF + I = NEXT + GO TO 100 + END IF + PREV = I +* +* Perform one bisection step +* + CNT = 0 + S = MID + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 90 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 90 CONTINUE + IF( CNT.LE.I-1 ) THEN + WORK( K-1 ) = MID + ELSE + WORK( K ) = MID + END IF + I = NEXT + + 100 CONTINUE + ITER = ITER + 1 +* do another loop if there are still unconverged intervals +* However, in the last iteration, all intervals are accepted +* since this is the best we can do. + IF( ( NINT.GT.0 ).AND.(ITER.LE.MAXITR) ) GO TO 80 +* +* +* At this point, all the intervals have converged + DO 110 I = SAVI1, ILAST + K = 2*I + II = I - OFFSET +* All intervals marked by '0' have been refined. + IF( IWORK( K-1 ).EQ.0 ) THEN + W( II ) = HALF*( WORK( K-1 )+WORK( K ) ) + WERR( II ) = WORK( K ) - W( II ) + END IF + 110 CONTINUE +* + + RETURN +* +* End of DLARRJ +* + END diff --git a/dspl/liblapack/SRC/dlarrk.f b/dspl/liblapack/SRC/dlarrk.f new file mode 100644 index 0000000..e92fe72 --- /dev/null +++ b/dspl/liblapack/SRC/dlarrk.f @@ -0,0 +1,256 @@ +*> \brief \b DLARRK computes one eigenvalue of a symmetric tridiagonal matrix T to suitable accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRK( N, IW, GL, GU, +* D, E2, PIVMIN, RELTOL, W, WERR, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, IW, N +* DOUBLE PRECISION PIVMIN, RELTOL, GL, GU, W, WERR +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARRK computes one eigenvalue of a symmetric tridiagonal +*> matrix T to suitable accuracy. This is an auxiliary code to be +*> called from DSTEMR. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] IW +*> \verbatim +*> IW is INTEGER +*> The index of the eigenvalues to be returned. +*> \endverbatim +*> +*> \param[in] GL +*> \verbatim +*> GL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] GU +*> \verbatim +*> GU is DOUBLE PRECISION +*> An upper and a lower bound on the eigenvalue. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) squared off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot allowed in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is DOUBLE PRECISION +*> The minimum relative width of an interval. When an interval +*> is narrower than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION +*> The error bound on the corresponding eigenvalue approximation +*> in W. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Eigenvalue converged +*> = -1: Eigenvalue did NOT converge +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> FUDGE DOUBLE PRECISION, default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARRK( N, IW, GL, GU, + $ D, E2, PIVMIN, RELTOL, W, WERR, INFO) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, IW, N + DOUBLE PRECISION PIVMIN, RELTOL, GL, GU, W, WERR +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION FUDGE, HALF, TWO, ZERO + PARAMETER ( HALF = 0.5D0, TWO = 2.0D0, + $ FUDGE = TWO, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IT, ITMAX, NEGCNT + DOUBLE PRECISION ATOLI, EPS, LEFT, MID, RIGHT, RTOLI, TMP1, + $ TMP2, TNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* +* Get machine constants + EPS = DLAMCH( 'P' ) + + TNORM = MAX( ABS( GL ), ABS( GU ) ) + RTOLI = RELTOL + ATOLI = FUDGE*TWO*PIVMIN + + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + + INFO = -1 + + LEFT = GL - FUDGE*TNORM*EPS*N - FUDGE*TWO*PIVMIN + RIGHT = GU + FUDGE*TNORM*EPS*N + FUDGE*TWO*PIVMIN + IT = 0 + + 10 CONTINUE +* +* Check if interval converged or maximum number of iterations reached +* + TMP1 = ABS( RIGHT - LEFT ) + TMP2 = MAX( ABS(RIGHT), ABS(LEFT) ) + IF( TMP1.LT.MAX( ATOLI, PIVMIN, RTOLI*TMP2 ) ) THEN + INFO = 0 + GOTO 30 + ENDIF + IF(IT.GT.ITMAX) + $ GOTO 30 + +* +* Count number of negative pivots for mid-point +* + IT = IT + 1 + MID = HALF * (LEFT + RIGHT) + NEGCNT = 0 + TMP1 = D( 1 ) - MID + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NEGCNT = NEGCNT + 1 +* + DO 20 I = 2, N + TMP1 = D( I ) - E2( I-1 ) / TMP1 - MID + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NEGCNT = NEGCNT + 1 + 20 CONTINUE + + IF(NEGCNT.GE.IW) THEN + RIGHT = MID + ELSE + LEFT = MID + ENDIF + GOTO 10 + + 30 CONTINUE +* +* Converged or maximum number of iterations reached +* + W = HALF * (LEFT + RIGHT) + WERR = HALF * ABS( RIGHT - LEFT ) + + RETURN +* +* End of DLARRK +* + END diff --git a/dspl/liblapack/SRC/dlarrr.f b/dspl/liblapack/SRC/dlarrr.f new file mode 100644 index 0000000..7aa2247 --- /dev/null +++ b/dspl/liblapack/SRC/dlarrr.f @@ -0,0 +1,211 @@ +*> \brief \b DLARRR performs tests to decide whether the symmetric tridiagonal matrix T warrants expensive computations which guarantee high relative accuracy in the eigenvalues. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRR( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Perform tests to decide whether the symmetric tridiagonal matrix T +*> warrants expensive computations which guarantee high relative accuracy +*> in the eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The N diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) is set to ZERO. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> INFO = 0(default) : the matrix warrants computations preserving +*> relative accuracy. +*> INFO = 1 : the matrix warrants computations guaranteeing +*> only absolute accuracy. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRR( N, D, E, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER N, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, RELCOND + PARAMETER ( ZERO = 0.0D0, + $ RELCOND = 0.999D0 ) +* .. +* .. Local Scalars .. + INTEGER I + LOGICAL YESREL + DOUBLE PRECISION EPS, SAFMIN, SMLNUM, RMIN, TMP, TMP2, + $ OFFDIG, OFFDIG2 + +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* +* As a default, do NOT go for relative-accuracy preserving computations. + INFO = 1 + + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + RMIN = SQRT( SMLNUM ) + +* Tests for relative accuracy +* +* Test for scaled diagonal dominance +* Scale the diagonal entries to one and check whether the sum of the +* off-diagonals is less than one +* +* The sdd relative error bounds have a 1/(1- 2*x) factor in them, +* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative +* accuracy is promised. In the notation of the code fragment below, +* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. +* We don't think it is worth going into "sdd mode" unless the relative +* condition number is reasonable, not 1/macheps. +* The threshold should be compatible with other thresholds used in the +* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds +* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 +* instead of the current OFFDIG + OFFDIG2 < 1 +* + YESREL = .TRUE. + OFFDIG = ZERO + TMP = SQRT(ABS(D(1))) + IF (TMP.LT.RMIN) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + DO 10 I = 2, N + TMP2 = SQRT(ABS(D(I))) + IF (TMP2.LT.RMIN) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + OFFDIG2 = ABS(E(I-1))/(TMP*TMP2) + IF(OFFDIG+OFFDIG2.GE.RELCOND) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + TMP = TMP2 + OFFDIG = OFFDIG2 + 10 CONTINUE + 11 CONTINUE + + IF( YESREL ) THEN + INFO = 0 + RETURN + ELSE + ENDIF +* + +* +* *** MORE TO BE IMPLEMENTED *** +* + +* +* Test if the lower bidiagonal matrix L from T = L D L^T +* (zero shift facto) is well conditioned +* + +* +* Test if the upper bidiagonal matrix U from T = U D U^T +* (zero shift facto) is well conditioned. +* In this case, the matrix needs to be flipped and, at the end +* of the eigenvector computation, the flip needs to be applied +* to the computed eigenvectors (and the support) +* + +* + RETURN +* +* END OF DLARRR +* + END diff --git a/dspl/liblapack/SRC/dlarrv.f b/dspl/liblapack/SRC/dlarrv.f new file mode 100644 index 0000000..cace17c --- /dev/null +++ b/dspl/liblapack/SRC/dlarrv.f @@ -0,0 +1,1045 @@ +*> \brief \b DLARRV computes the eigenvectors of the tridiagonal matrix T = L D LT given L, D and the eigenvalues of L D LT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARRV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARRV( N, VL, VU, D, L, PIVMIN, +* ISPLIT, M, DOL, DOU, MINRGP, +* RTOL1, RTOL2, W, WERR, WGAP, +* IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER DOL, DOU, INFO, LDZ, M, N +* DOUBLE PRECISION MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), +* $ ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), GERS( * ), L( * ), W( * ), WERR( * ), +* $ WGAP( * ), WORK( * ) +* DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARRV computes the eigenvectors of the tridiagonal matrix +*> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. +*> The input eigenvalues should have been computed by DLARRE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> Lower bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> Upper bound of the interval that contains the desired +*> eigenvalues. VL < VU. +*> Note: VU is currently not used by this implementation of DLARRV, VU is +*> passed to DLARRV because it could be used compute gaps on the right end +*> of the extremal eigenvalues. However, with not much initial accuracy in +*> LAMBDA and VU, the formula can lead to an overestimation of the right gap +*> and thus to inadequately early RQI 'convergence'. This is currently +*> prevented this by forcing a small right gap. And so it turns out that VU +*> is currently not used by this implementation of DLARRV. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the diagonal matrix D. +*> On exit, D may be overwritten. +*> \endverbatim +*> +*> \param[in,out] L +*> \verbatim +*> L is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the unit +*> bidiagonal matrix L are in elements 1 to N-1 of L +*> (if the matrix is not split.) At the end of each block +*> is stored the corresponding shift as given by DLARRE. +*> On exit, L is overwritten. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of input eigenvalues. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] DOL +*> \verbatim +*> DOL is INTEGER +*> \endverbatim +*> +*> \param[in] DOU +*> \verbatim +*> DOU is INTEGER +*> If the user wants to compute only selected eigenvectors from all +*> the eigenvalues supplied, he can specify an index range DOL:DOU. +*> Or else the setting DOL=1, DOU=M should be applied. +*> Note that DOL and DOU refer to the order in which the eigenvalues +*> are stored in W. +*> If the user wants to compute only selected eigenpairs, then +*> the columns DOL-1 to DOU+1 of the eigenvector space Z contain the +*> computed eigenvectors. All other columns of Z are set to zero. +*> \endverbatim +*> +*> \param[in] MINRGP +*> \verbatim +*> MINRGP is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is DOUBLE PRECISION +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements of W contain the APPROXIMATE eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block ( The output array +*> W from DLARRE is expected here ). Furthermore, they are with +*> respect to the shift of the corresponding root representation +*> for their block. On exit, W holds the eigenvalues of the +*> UNshifted matrix. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue in W +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is DOUBLE PRECISION array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[in] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is DOUBLE PRECISION array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should +*> be computed from the original UNshifted matrix. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +*> If INFO = 0, the first M columns of Z contain the +*> orthonormal eigenvectors of the matrix T +*> corresponding to the input eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The I-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*I-1 ) through +*> ISUPPZ( 2*I ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (12*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> > 0: A problem occurred in DLARRV. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in DLARRB when refining a child's eigenvalues. +*> =-2: Problem in DLARRF when computing the RRR of a child. +*> When a child is inside a tight cluster, it can be difficult +*> to find an RRR. A partial remedy from the user's point of +*> view is to make the parameter MINRGP smaller and recompile. +*> However, as the orthogonality of the computed vectors is +*> proportional to 1/MINRGP, the user should be aware that +*> he might be trading in precision when he decreases MINRGP. +*> =-3: Problem in DLARRB when refining a single eigenvalue +*> after the Rayleigh correction was rejected. +*> = 5: The Rayleigh Quotient Iteration failed to converge to +*> full accuracy in MAXITR steps. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DLARRV( N, VL, VU, D, L, PIVMIN, + $ ISPLIT, M, DOL, DOU, MINRGP, + $ RTOL1, RTOL2, W, WERR, WGAP, + $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER DOL, DOU, INFO, LDZ, M, N + DOUBLE PRECISION MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), + $ ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), GERS( * ), L( * ), W( * ), WERR( * ), + $ WGAP( * ), WORK( * ) + DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 10 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, HALF + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, THREE = 3.0D0, + $ FOUR = 4.0D0, HALF = 0.5D0) +* .. +* .. Local Scalars .. + LOGICAL ESKIP, NEEDBS, STP2II, TRYRQC, USEDBS, USEDRQ + INTEGER DONE, I, IBEGIN, IDONE, IEND, II, IINDC1, + $ IINDC2, IINDR, IINDWK, IINFO, IM, IN, INDEIG, + $ INDLD, INDLLD, INDWRK, ISUPMN, ISUPMX, ITER, + $ ITMP1, J, JBLK, K, MINIWSIZE, MINWSIZE, NCLUS, + $ NDEPTH, NEGCNT, NEWCLS, NEWFST, NEWFTT, NEWLST, + $ NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN, OLDLST, + $ OLDNCL, P, PARITY, Q, WBEGIN, WEND, WINDEX, + $ WINDMN, WINDPL, ZFROM, ZTO, ZUSEDL, ZUSEDU, + $ ZUSEDW + DOUBLE PRECISION BSTRES, BSTW, EPS, FUDGE, GAP, GAPTOL, GL, GU, + $ LAMBDA, LEFT, LGAP, MINGMA, NRMINV, RESID, + $ RGAP, RIGHT, RQCORR, RQTOL, SAVGAP, SGNDEF, + $ SIGMA, SPDIAM, SSIGMA, TAU, TMP, TOL, ZTZ +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAR1V, DLARRB, DLARRF, DLASET, + $ DSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* .. + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* The first N entries of WORK are reserved for the eigenvalues + INDLD = N+1 + INDLLD= 2*N+1 + INDWRK= 3*N+1 + MINWSIZE = 12 * N + + DO 5 I= 1,MINWSIZE + WORK( I ) = ZERO + 5 CONTINUE + +* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the +* factorization used to compute the FP vector + IINDR = 0 +* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current +* layer and the one above. + IINDC1 = N + IINDC2 = 2*N + IINDWK = 3*N + 1 + + MINIWSIZE = 7 * N + DO 10 I= 1,MINIWSIZE + IWORK( I ) = 0 + 10 CONTINUE + + ZUSEDL = 1 + IF(DOL.GT.1) THEN +* Set lower bound for use of Z + ZUSEDL = DOL-1 + ENDIF + ZUSEDU = M + IF(DOU.LT.M) THEN +* Set lower bound for use of Z + ZUSEDU = DOU+1 + ENDIF +* The width of the part of Z that is used + ZUSEDW = ZUSEDU - ZUSEDL + 1 + + + CALL DLASET( 'Full', N, ZUSEDW, ZERO, ZERO, + $ Z(1,ZUSEDL), LDZ ) + + EPS = DLAMCH( 'Precision' ) + RQTOL = TWO * EPS +* +* Set expert flags for standard code. + TRYRQC = .TRUE. + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN + ELSE +* Only selected eigenpairs are computed. Since the other evalues +* are not refined by RQ iteration, bisection has to compute to full +* accuracy. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ENDIF + +* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the +* desired eigenvalues. The support of the nonzero eigenvector +* entries is contained in the interval IBEGIN:IEND. +* Remark that if k eigenpairs are desired, then the eigenvectors +* are stored in k contiguous columns of Z. + +* DONE is the number of eigenvectors already computed + DONE = 0 + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, IBLOCK( M ) + IEND = ISPLIT( JBLK ) + SIGMA = L( IEND ) +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. + WEND = WBEGIN - 1 + 15 CONTINUE + IF( WEND.LT.M ) THEN + IF( IBLOCK( WEND+1 ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 15 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 170 + ELSEIF( (WEND.LT.DOL).OR.(WBEGIN.GT.DOU) ) THEN + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + GO TO 170 + END IF + +* Find local spectral diameter of the block + GL = GERS( 2*IBEGIN-1 ) + GU = GERS( 2*IBEGIN ) + DO 20 I = IBEGIN+1 , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 20 CONTINUE + SPDIAM = GU - GL + +* OLDIEN is the last index of the previous block + OLDIEN = IBEGIN - 1 +* Calculate the size of the current block + IN = IEND - IBEGIN + 1 +* The number of eigenvalues in the current block + IM = WEND - WBEGIN + 1 + +* This is for a 1x1 block + IF( IBEGIN.EQ.IEND ) THEN + DONE = DONE+1 + Z( IBEGIN, WBEGIN ) = ONE + ISUPPZ( 2*WBEGIN-1 ) = IBEGIN + ISUPPZ( 2*WBEGIN ) = IBEGIN + W( WBEGIN ) = W( WBEGIN ) + SIGMA + WORK( WBEGIN ) = W( WBEGIN ) + IBEGIN = IEND + 1 + WBEGIN = WBEGIN + 1 + GO TO 170 + END IF + +* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) +* Note that these can be approximations, in this case, the corresp. +* entries of WERR give the size of the uncertainty interval. +* The eigenvalue approximations will be refined when necessary as +* high relative accuracy is required for the computation of the +* corresponding eigenvectors. + CALL DCOPY( IM, W( WBEGIN ), 1, + $ WORK( WBEGIN ), 1 ) + +* We store in W the eigenvalue approximations w.r.t. the original +* matrix T. + DO 30 I=1,IM + W(WBEGIN+I-1) = W(WBEGIN+I-1)+SIGMA + 30 CONTINUE + + +* NDEPTH is the current depth of the representation tree + NDEPTH = 0 +* PARITY is either 1 or 0 + PARITY = 1 +* NCLUS is the number of clusters for the next level of the +* representation tree, we start with NCLUS = 1 for the root + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IM + +* IDONE is the number of eigenvectors already computed in the current +* block + IDONE = 0 +* loop while( IDONE.LT.IM ) +* generate the representation tree for the current block and +* compute the eigenvectors + 40 CONTINUE + IF( IDONE.LT.IM ) THEN +* This is a crude protection against infinitely deep trees + IF( NDEPTH.GT.M ) THEN + INFO = -2 + RETURN + ENDIF +* breadth first processing of the current level of the representation +* tree: OLDNCL = number of clusters on current level + OLDNCL = NCLUS +* reset NCLUS to count the number of child clusters + NCLUS = 0 +* + PARITY = 1 - PARITY + IF( PARITY.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* Process the clusters on the current level + DO 150 I = 1, OLDNCL + J = OLDCLS + 2*I +* OLDFST, OLDLST = first, last index of current cluster. +* cluster indices start with 1 and are relative +* to WBEGIN when accessing W, WGAP, WERR, Z + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN +* Retrieve relatively robust representation (RRR) of cluster +* that has been computed at the previous level +* The RRR is stored in Z and overwritten once the eigenvectors +* have been computed or when the cluster is refined + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Get representation from location of the leftmost evalue +* of the cluster + J = WBEGIN + OLDFST - 1 + ELSE + IF(WBEGIN+OLDFST-1.LT.DOL) THEN +* Get representation from the left end of Z array + J = DOL - 1 + ELSEIF(WBEGIN+OLDFST-1.GT.DOU) THEN +* Get representation from the right end of Z array + J = DOU + ELSE + J = WBEGIN + OLDFST - 1 + ENDIF + ENDIF + CALL DCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 ) + CALL DCOPY( IN-1, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), + $ 1 ) + SIGMA = Z( IEND, J+1 ) + +* Set the corresponding entries in Z to zero + CALL DLASET( 'Full', IN, 2, ZERO, ZERO, + $ Z( IBEGIN, J), LDZ ) + END IF + +* Compute DL and DLL of current RRR + DO 50 J = IBEGIN, IEND-1 + TMP = D( J )*L( J ) + WORK( INDLD-1+J ) = TMP + WORK( INDLLD-1+J ) = TMP*L( J ) + 50 CONTINUE + + IF( NDEPTH.GT.0 ) THEN +* P and Q are index of the first and last eigenvalue to compute +* within the current block + P = INDEXW( WBEGIN-1+OLDFST ) + Q = INDEXW( WBEGIN-1+OLDLST ) +* Offset for the arrays WORK, WGAP and WERR, i.e., the P-OFFSET +* through the Q-OFFSET elements of these arrays are to be used. +* OFFSET = P-OLDFST + OFFSET = INDEXW( WBEGIN ) - 1 +* perform limited bisection (if necessary) to get approximate +* eigenvalues to the precision needed. + CALL DLARRB( IN, D( IBEGIN ), + $ WORK(INDLLD+IBEGIN-1), + $ P, Q, RTOL1, RTOL2, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN),WERR(WBEGIN), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ PIVMIN, SPDIAM, IN, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* We also recompute the extremal gaps. W holds all eigenvalues +* of the unshifted matrix and must be used for computation +* of WGAP, the entries of WORK might stem from RRRs with +* different shifts. The gaps from WBEGIN-1+OLDFST to +* WBEGIN-1+OLDLST are correctly computed in DLARRB. +* However, we only allow the gaps to become greater since +* this is what should happen when we decrease WERR + IF( OLDFST.GT.1) THEN + WGAP( WBEGIN+OLDFST-2 ) = + $ MAX(WGAP(WBEGIN+OLDFST-2), + $ W(WBEGIN+OLDFST-1)-WERR(WBEGIN+OLDFST-1) + $ - W(WBEGIN+OLDFST-2)-WERR(WBEGIN+OLDFST-2) ) + ENDIF + IF( WBEGIN + OLDLST -1 .LT. WEND ) THEN + WGAP( WBEGIN+OLDLST-1 ) = + $ MAX(WGAP(WBEGIN+OLDLST-1), + $ W(WBEGIN+OLDLST)-WERR(WBEGIN+OLDLST) + $ - W(WBEGIN+OLDLST-1)-WERR(WBEGIN+OLDLST-1) ) + ENDIF +* Each time the eigenvalues in WORK get refined, we store +* the newly found approximation with all shifts applied in W + DO 53 J=OLDFST,OLDLST + W(WBEGIN+J-1) = WORK(WBEGIN+J-1)+SIGMA + 53 CONTINUE + END IF + +* Process the current node. + NEWFST = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST ) THEN +* we are at the right end of the cluster, this is also the +* boundary of the child cluster + NEWLST = J + ELSE IF ( WGAP( WBEGIN + J -1).GE. + $ MINRGP* ABS( WORK(WBEGIN + J -1) ) ) THEN +* the right relative gap is big enough, the child cluster +* (NEWFST,..,NEWLST) is well separated from the following + NEWLST = J + ELSE +* inside a child cluster, the relative gap is not +* big enough. + GOTO 140 + END IF + +* Compute size of child cluster found + NEWSIZ = NEWLST - NEWFST + 1 + +* NEWFTT is the place in Z where the new RRR or the computed +* eigenvector is to be stored + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Store representation at location of the leftmost evalue +* of the cluster + NEWFTT = WBEGIN + NEWFST - 1 + ELSE + IF(WBEGIN+NEWFST-1.LT.DOL) THEN +* Store representation at the left end of Z array + NEWFTT = DOL - 1 + ELSEIF(WBEGIN+NEWFST-1.GT.DOU) THEN +* Store representation at the right end of Z array + NEWFTT = DOU + ELSE + NEWFTT = WBEGIN + NEWFST - 1 + ENDIF + ENDIF + + IF( NEWSIZ.GT.1) THEN +* +* Current child is not a singleton but a cluster. +* Compute and store new representation of child. +* +* +* Compute left and right cluster gap. +* +* LGAP and RGAP are not computed from WORK because +* the eigenvalue approximations may stem from RRRs +* different shifts. However, W hold all eigenvalues +* of the unshifted matrix. Still, the entries in WGAP +* have to be computed from WORK since the entries +* in W might be of the same order so that gaps are not +* exhibited correctly for very close eigenvalues. + IF( NEWFST.EQ.1 ) THEN + LGAP = MAX( ZERO, + $ W(WBEGIN)-WERR(WBEGIN) - VL ) + ELSE + LGAP = WGAP( WBEGIN+NEWFST-2 ) + ENDIF + RGAP = WGAP( WBEGIN+NEWLST-1 ) +* +* Compute left- and rightmost eigenvalue of child +* to high precision in order to shift as close +* as possible and obtain as large relative gaps +* as possible +* + DO 55 K =1,2 + IF(K.EQ.1) THEN + P = INDEXW( WBEGIN-1+NEWFST ) + ELSE + P = INDEXW( WBEGIN-1+NEWLST ) + ENDIF + OFFSET = INDEXW( WBEGIN ) - 1 + CALL DLARRB( IN, D(IBEGIN), + $ WORK( INDLLD+IBEGIN-1 ),P,P, + $ RQTOL, RQTOL, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ IN, IINFO ) + 55 CONTINUE +* + IF((WBEGIN+NEWLST-1.LT.DOL).OR. + $ (WBEGIN+NEWFST-1.GT.DOU)) THEN +* if the cluster contains no desired eigenvalues +* skip the computation of that branch of the rep. tree +* +* We could skip before the refinement of the extremal +* eigenvalues of the child, but then the representation +* tree could be different from the one when nothing is +* skipped. For this reason we skip at this place. + IDONE = IDONE + NEWLST - NEWFST + 1 + GOTO 139 + ENDIF +* +* Compute RRR of child cluster. +* Note that the new RRR is stored in Z +* +* DLARRF needs LWORK = 2*N + CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ NEWFST, NEWLST, WORK(WBEGIN), + $ WGAP(WBEGIN), WERR(WBEGIN), + $ SPDIAM, LGAP, RGAP, PIVMIN, TAU, + $ Z(IBEGIN, NEWFTT),Z(IBEGIN, NEWFTT+1), + $ WORK( INDWRK ), IINFO ) + IF( IINFO.EQ.0 ) THEN +* a new RRR for the cluster was found by DLARRF +* update shift and store it + SSIGMA = SIGMA + TAU + Z( IEND, NEWFTT+1 ) = SSIGMA +* WORK() are the midpoints and WERR() the semi-width +* Note that the entries in W are unchanged. + DO 116 K = NEWFST, NEWLST + FUDGE = + $ THREE*EPS*ABS(WORK(WBEGIN+K-1)) + WORK( WBEGIN + K - 1 ) = + $ WORK( WBEGIN + K - 1) - TAU + FUDGE = FUDGE + + $ FOUR*EPS*ABS(WORK(WBEGIN+K-1)) +* Fudge errors + WERR( WBEGIN + K - 1 ) = + $ WERR( WBEGIN + K - 1 ) + FUDGE +* Gaps are not fudged. Provided that WERR is small +* when eigenvalues are close, a zero gap indicates +* that a new representation is needed for resolving +* the cluster. A fudge could lead to a wrong decision +* of judging eigenvalues 'separated' which in +* reality are not. This could have a negative impact +* on the orthogonality of the computed eigenvectors. + 116 CONTINUE + + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFST + IWORK( K ) = NEWLST + ELSE + INFO = -2 + RETURN + ENDIF + ELSE +* +* Compute eigenvector of singleton +* + ITER = 0 +* + TOL = FOUR * LOG(DBLE(IN)) * EPS +* + K = NEWFST + WINDEX = WBEGIN + K - 1 + WINDMN = MAX(WINDEX - 1,1) + WINDPL = MIN(WINDEX + 1,M) + LAMBDA = WORK( WINDEX ) + DONE = DONE + 1 +* Check if eigenvector computation is to be skipped + IF((WINDEX.LT.DOL).OR. + $ (WINDEX.GT.DOU)) THEN + ESKIP = .TRUE. + GOTO 125 + ELSE + ESKIP = .FALSE. + ENDIF + LEFT = WORK( WINDEX ) - WERR( WINDEX ) + RIGHT = WORK( WINDEX ) + WERR( WINDEX ) + INDEIG = INDEXW( WINDEX ) +* Note that since we compute the eigenpairs for a child, +* all eigenvalue approximations are w.r.t the same shift. +* In this case, the entries in WORK should be used for +* computing the gaps since they exhibit even very small +* differences in the eigenvalues, as opposed to the +* entries in W which might "look" the same. + + IF( K .EQ. 1) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VL, the formula +* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) +* can lead to an overestimation of the left gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small left gap. + LGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + LGAP = WGAP(WINDMN) + ENDIF + IF( K .EQ. IM) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VU, the formula +* can lead to an overestimation of the right gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small right gap. + RGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + RGAP = WGAP(WINDEX) + ENDIF + GAP = MIN( LGAP, RGAP ) + IF(( K .EQ. 1).OR.(K .EQ. IM)) THEN +* The eigenvector support can become wrong +* because significant entries could be cut off due to a +* large GAPTOL parameter in LAR1V. Prevent this. + GAPTOL = ZERO + ELSE + GAPTOL = GAP * EPS + ENDIF + ISUPMN = IN + ISUPMX = 1 +* Update WGAP so that it holds the minimum gap +* to the left or the right. This is crucial in the +* case where bisection is used to ensure that the +* eigenvalue is refined up to the required precision. +* The correct value is restored afterwards. + SAVGAP = WGAP(WINDEX) + WGAP(WINDEX) = GAP +* We want to use the Rayleigh Quotient Correction +* as often as possible since it converges quadratically +* when we are close enough to the desired eigenvalue. +* However, the Rayleigh Quotient can have the wrong sign +* and lead us away from the desired eigenvalue. In this +* case, the best we can do is to use bisection. + USEDBS = .FALSE. + USEDRQ = .FALSE. +* Bisection is initially turned off unless it is forced + NEEDBS = .NOT.TRYRQC + 120 CONTINUE +* Check if bisection should be used to refine eigenvalue + IF(NEEDBS) THEN +* Take the bisection as new iterate + USEDBS = .TRUE. + ITMP1 = IWORK( IINDR+WINDEX ) + OFFSET = INDEXW( WBEGIN ) - 1 + CALL DLARRB( IN, D(IBEGIN), + $ WORK(INDLLD+IBEGIN-1),INDEIG,INDEIG, + $ ZERO, TWO*EPS, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ ITMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -3 + RETURN + ENDIF + LAMBDA = WORK( WINDEX ) +* Reset twist index from inaccurate LAMBDA to +* force computation of true MINGMA + IWORK( IINDR+WINDEX ) = 0 + ENDIF +* Given LAMBDA, compute the eigenvector. + CALL DLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + IF(ITER .EQ. 0) THEN + BSTRES = RESID + BSTW = LAMBDA + ELSEIF(RESID.LT.BSTRES) THEN + BSTRES = RESID + BSTW = LAMBDA + ENDIF + ISUPMN = MIN(ISUPMN,ISUPPZ( 2*WINDEX-1 )) + ISUPMX = MAX(ISUPMX,ISUPPZ( 2*WINDEX )) + ITER = ITER + 1 + +* sin alpha <= |resid|/gap +* Note that both the residual and the gap are +* proportional to the matrix, so ||T|| doesn't play +* a role in the quotient + +* +* Convergence test for Rayleigh-Quotient iteration +* (omitted when Bisection has been used) +* + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ RQTOL*ABS( LAMBDA ) .AND. .NOT. USEDBS) + $ THEN +* We need to check that the RQCORR update doesn't +* move the eigenvalue away from the desired one and +* towards a neighbor. -> protection with bisection + IF(INDEIG.LE.NEGCNT) THEN +* The wanted eigenvalue lies to the left + SGNDEF = -ONE + ELSE +* The wanted eigenvalue lies to the right + SGNDEF = ONE + ENDIF +* We only use the RQCORR if it improves the +* the iterate reasonably. + IF( ( RQCORR*SGNDEF.GE.ZERO ) + $ .AND.( LAMBDA + RQCORR.LE. RIGHT) + $ .AND.( LAMBDA + RQCORR.GE. LEFT) + $ ) THEN + USEDRQ = .TRUE. +* Store new midpoint of bisection interval in WORK + IF(SGNDEF.EQ.ONE) THEN +* The current LAMBDA is on the left of the true +* eigenvalue + LEFT = LAMBDA +* We prefer to assume that the error estimate +* is correct. We could make the interval not +* as a bracket but to be modified if the RQCORR +* chooses to. In this case, the RIGHT side should +* be modified as follows: +* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) + ELSE +* The current LAMBDA is on the right of the true +* eigenvalue + RIGHT = LAMBDA +* See comment about assuming the error estimate is +* correct above. +* LEFT = MIN(LEFT, LAMBDA + RQCORR) + ENDIF + WORK( WINDEX ) = + $ HALF * (RIGHT + LEFT) +* Take RQCORR since it has the correct sign and +* improves the iterate reasonably + LAMBDA = LAMBDA + RQCORR +* Update width of error interval + WERR( WINDEX ) = + $ HALF * (RIGHT-LEFT) + ELSE + NEEDBS = .TRUE. + ENDIF + IF(RIGHT-LEFT.LT.RQTOL*ABS(LAMBDA)) THEN +* The eigenvalue is computed to bisection accuracy +* compute eigenvector and stop + USEDBS = .TRUE. + GOTO 120 + ELSEIF( ITER.LT.MAXITR ) THEN + GOTO 120 + ELSEIF( ITER.EQ.MAXITR ) THEN + NEEDBS = .TRUE. + GOTO 120 + ELSE + INFO = 5 + RETURN + END IF + ELSE + STP2II = .FALSE. + IF(USEDRQ .AND. USEDBS .AND. + $ BSTRES.LE.RESID) THEN + LAMBDA = BSTW + STP2II = .TRUE. + ENDIF + IF (STP2II) THEN +* improve error angle by second step + CALL DLAR1V( IN, 1, IN, LAMBDA, + $ D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), + $ ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + ENDIF + WORK( WINDEX ) = LAMBDA + END IF +* +* Compute FP-vector support w.r.t. whole matrix +* + ISUPPZ( 2*WINDEX-1 ) = ISUPPZ( 2*WINDEX-1 )+OLDIEN + ISUPPZ( 2*WINDEX ) = ISUPPZ( 2*WINDEX )+OLDIEN + ZFROM = ISUPPZ( 2*WINDEX-1 ) + ZTO = ISUPPZ( 2*WINDEX ) + ISUPMN = ISUPMN + OLDIEN + ISUPMX = ISUPMX + OLDIEN +* Ensure vector is ok if support in the RQI has changed + IF(ISUPMN.LT.ZFROM) THEN + DO 122 II = ISUPMN,ZFROM-1 + Z( II, WINDEX ) = ZERO + 122 CONTINUE + ENDIF + IF(ISUPMX.GT.ZTO) THEN + DO 123 II = ZTO+1,ISUPMX + Z( II, WINDEX ) = ZERO + 123 CONTINUE + ENDIF + CALL DSCAL( ZTO-ZFROM+1, NRMINV, + $ Z( ZFROM, WINDEX ), 1 ) + 125 CONTINUE +* Update W + W( WINDEX ) = LAMBDA+SIGMA +* Recompute the gaps on the left and right +* But only allow them to become larger and not +* smaller (which can only happen through "bad" +* cancellation and doesn't reflect the theory +* where the initial gaps are underestimated due +* to WERR being too crude.) + IF(.NOT.ESKIP) THEN + IF( K.GT.1) THEN + WGAP( WINDMN ) = MAX( WGAP(WINDMN), + $ W(WINDEX)-WERR(WINDEX) + $ - W(WINDMN)-WERR(WINDMN) ) + ENDIF + IF( WINDEX.LT.WEND ) THEN + WGAP( WINDEX ) = MAX( SAVGAP, + $ W( WINDPL )-WERR( WINDPL ) + $ - W( WINDEX )-WERR( WINDEX) ) + ENDIF + ENDIF + IDONE = IDONE + 1 + ENDIF +* here ends the code for the current child +* + 139 CONTINUE +* Proceed to any remaining child nodes + NEWFST = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* End of DLARRV +* + END diff --git a/dspl/liblapack/SRC/dlarscl2.f b/dspl/liblapack/SRC/dlarscl2.f new file mode 100644 index 0000000..1b5ea53 --- /dev/null +++ b/dspl/liblapack/SRC/dlarscl2.f @@ -0,0 +1,119 @@ +*> \brief \b DLARSCL2 performs reciprocal diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARSCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARSCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> x <-- inv(D) * x +*> where the diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (M) +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLARSCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) / D( I ) + END DO + END DO + + RETURN + END diff --git a/dspl/liblapack/SRC/dlartg.f b/dspl/liblapack/SRC/dlartg.f new file mode 100644 index 0000000..1c7c46f --- /dev/null +++ b/dspl/liblapack/SRC/dlartg.f @@ -0,0 +1,204 @@ +*> \brief \b DLARTG generates a plane rotation with real cosine and real sine. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARTG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARTG( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARTG generate a plane rotation so that +*> +*> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a slower, more accurate version of the BLAS1 routine DROTG, +*> with the following other differences: +*> F and G are unchanged on return. +*> If G=0, then CS=1 and SN=0. +*> If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +*> floating point operations (saves work in DBDSQR when +*> there are zeros on the diagonal). +*> +*> If F exceeds G in magnitude, CS will be positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is DOUBLE PRECISION +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION +*> The nonzero component of the rotated vector. +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of DLARTG +* + END diff --git a/dspl/liblapack/SRC/dlartgp.f b/dspl/liblapack/SRC/dlartgp.f new file mode 100644 index 0000000..0cb0d2d --- /dev/null +++ b/dspl/liblapack/SRC/dlartgp.f @@ -0,0 +1,202 @@ +*> \brief \b DLARTGP generates a plane rotation so that the diagonal is nonnegative. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARTGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARTGP( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARTGP generates a plane rotation so that +*> +*> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a slower, more accurate version of the Level 1 BLAS routine DROTG, +*> with the following other differences: +*> F and G are unchanged on return. +*> If G=0, then CS=(+/-)1 and SN=0. +*> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. +*> +*> The sign is chosen so that R >= 0. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is DOUBLE PRECISION +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION +*> The nonzero component of the rotated vector. +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARTGP( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS, F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SIGN, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = SIGN( ONE, F ) + SN = ZERO + R = ABS( F ) + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = SIGN( ONE, G ) + R = ABS( G ) + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( R.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of DLARTGP +* + END diff --git a/dspl/liblapack/SRC/dlartgs.f b/dspl/liblapack/SRC/dlartgs.f new file mode 100644 index 0000000..29a9eb0 --- /dev/null +++ b/dspl/liblapack/SRC/dlartgs.f @@ -0,0 +1,161 @@ +*> \brief \b DLARTGS generates a plane rotation designed to introduce a bulge in implicit QR iteration for the bidiagonal SVD problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARTGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARTGS( X, Y, SIGMA, CS, SN ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS, SIGMA, SN, X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARTGS generates a plane rotation designed to introduce a bulge in +*> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD +*> problem. X and Y are the top-row entries, and SIGMA is the shift. +*> The computed CS and SN define a plane rotation satisfying +*> +*> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], +*> [ -SN CS ] [ X * Y ] [ 0 ] +*> +*> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the +*> rotation is by PI/2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> The (1,1) entry of an upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> The (1,2) entry of an upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> The shift. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is DOUBLE PRECISION +*> The sine of the rotation. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLARTGS( X, Y, SIGMA, CS, SN ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS, SIGMA, SN, X, Y +* .. +* +* =================================================================== +* +* .. Parameters .. + DOUBLE PRECISION NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION R, S, THRESH, W, Z +* .. +* .. External Subroutines .. + EXTERNAL DLARTGP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. Executable Statements .. +* + THRESH = DLAMCH('E') +* +* Compute the first column of B**T*B - SIGMA^2*I, up to a scale +* factor. +* + IF( (SIGMA .EQ. ZERO .AND. ABS(X) .LT. THRESH) .OR. + $ (ABS(X) .EQ. SIGMA .AND. Y .EQ. ZERO) ) THEN + Z = ZERO + W = ZERO + ELSE IF( SIGMA .EQ. ZERO ) THEN + IF( X .GE. ZERO ) THEN + Z = X + W = Y + ELSE + Z = -X + W = -Y + END IF + ELSE IF( ABS(X) .LT. THRESH ) THEN + Z = -SIGMA*SIGMA + W = ZERO + ELSE + IF( X .GE. ZERO ) THEN + S = ONE + ELSE + S = NEGONE + END IF + Z = S * (ABS(X)-SIGMA) * (S+SIGMA/X) + W = S * Y + END IF +* +* Generate the rotation. +* CALL DLARTGP( Z, W, CS, SN, R ) might seem more natural; +* reordering the arguments ensures that if Z = 0 then the rotation +* is by PI/2. +* + CALL DLARTGP( W, Z, SN, CS, R ) +* + RETURN +* +* End DLARTGS +* + END + diff --git a/dspl/liblapack/SRC/dlartv.f b/dspl/liblapack/SRC/dlartv.f new file mode 100644 index 0000000..dca1cb7 --- /dev/null +++ b/dspl/liblapack/SRC/dlartv.f @@ -0,0 +1,147 @@ +*> \brief \b DLARTV applies a vector of plane rotations with real cosines and real sines to the elements of a pair of vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARTV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARTV applies a vector of real plane rotations to elements of the +*> real vectors x and y. For i = 1,2,...,n +*> +*> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +*> ( y(i) ) ( -s(i) c(i) ) ( y(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, +*> dimension (1+(N-1)*INCY) +*> The vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + DOUBLE PRECISION XI, YI +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - S( IC )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of DLARTV +* + END diff --git a/dspl/liblapack/SRC/dlaruv.f b/dspl/liblapack/SRC/dlaruv.f new file mode 100644 index 0000000..a903c18 --- /dev/null +++ b/dspl/liblapack/SRC/dlaruv.f @@ -0,0 +1,446 @@ +*> \brief \b DLARUV returns a vector of n random real numbers from a uniform distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARUV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARUV( ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* DOUBLE PRECISION X( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARUV returns a vector of n random real numbers from a uniform (0,1) +*> distribution (n <= 128). +*> +*> This is an auxiliary routine called by DLARNV and ZLARNV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. N <= 128. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine uses a multiplicative congruential method with modulus +*> 2**48 and multiplier 33952834046453 (see G.S.Fishman, +*> 'Multiplicative congruential random number generators with modulus +*> 2**b: an exhaustive analysis for b = 32 and a partial analysis for +*> b = 48', Math. Comp. 189, pp 331-344, 1990). +*> +*> 48-bit integers are stored in 4 integer array elements with 12 bits +*> per element. Hence the routine is portable across machines with +*> integers of 32 bits or more. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARUV( ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + DOUBLE PRECISION X( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + INTEGER LV, IPW2 + DOUBLE PRECISION R + PARAMETER ( LV = 128, IPW2 = 4096, R = ONE / IPW2 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J +* .. +* .. Local Arrays .. + INTEGER MM( LV, 4 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MIN, MOD +* .. +* .. Data statements .. + DATA ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508, + $ 2549 / + DATA ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754, + $ 1145 / + DATA ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766, + $ 2253 / + DATA ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572, + $ 305 / + DATA ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893, + $ 3301 / + DATA ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307, + $ 1065 / + DATA ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297, + $ 3133 / + DATA ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966, + $ 2913 / + DATA ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758, + $ 3285 / + DATA ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598, + $ 1241 / + DATA ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406, + $ 1197 / + DATA ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922, + $ 3729 / + DATA ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038, + $ 2501 / + DATA ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934, + $ 1673 / + DATA ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091, + $ 541 / + DATA ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451, + $ 2753 / + DATA ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580, + $ 949 / + DATA ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958, + $ 2361 / + DATA ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055, + $ 1165 / + DATA ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507, + $ 4081 / + DATA ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078, + $ 2725 / + DATA ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273, + $ 3305 / + DATA ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17, + $ 3069 / + DATA ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854, + $ 3617 / + DATA ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916, + $ 3733 / + DATA ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971, + $ 409 / + DATA ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889, + $ 2157 / + DATA ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831, + $ 1361 / + DATA ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621, + $ 3973 / + DATA ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541, + $ 1865 / + DATA ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893, + $ 2525 / + DATA ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736, + $ 1409 / + DATA ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992, + $ 3445 / + DATA ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787, + $ 3577 / + DATA ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125, + $ 77 / + DATA ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364, + $ 3761 / + DATA ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460, + $ 2149 / + DATA ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257, + $ 1449 / + DATA ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574, + $ 3005 / + DATA ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912, + $ 225 / + DATA ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216, + $ 85 / + DATA ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248, + $ 3673 / + DATA ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401, + $ 3117 / + DATA ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124, + $ 3089 / + DATA ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762, + $ 1349 / + DATA ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149, + $ 2057 / + DATA ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245, + $ 413 / + DATA ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166, + $ 65 / + DATA ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466, + $ 1845 / + DATA ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018, + $ 697 / + DATA ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399, + $ 3085 / + DATA ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190, + $ 3441 / + DATA ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879, + $ 1573 / + DATA ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153, + $ 3689 / + DATA ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320, + $ 2941 / + DATA ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18, + $ 929 / + DATA ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712, + $ 533 / + DATA ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159, + $ 2841 / + DATA ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318, + $ 4077 / + DATA ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091, + $ 721 / + DATA ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443, + $ 2821 / + DATA ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510, + $ 2249 / + DATA ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449, + $ 2397 / + DATA ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956, + $ 2817 / + DATA ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201, + $ 245 / + DATA ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137, + $ 1913 / + DATA ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399, + $ 1997 / + DATA ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321, + $ 3121 / + DATA ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271, + $ 997 / + DATA ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667, + $ 1833 / + DATA ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703, + $ 2877 / + DATA ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629, + $ 1633 / + DATA ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365, + $ 981 / + DATA ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431, + $ 2009 / + DATA ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113, + $ 941 / + DATA ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922, + $ 2449 / + DATA ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554, + $ 197 / + DATA ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184, + $ 2441 / + DATA ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099, + $ 285 / + DATA ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228, + $ 1473 / + DATA ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012, + $ 2741 / + DATA ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921, + $ 3129 / + DATA ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452, + $ 909 / + DATA ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901, + $ 2801 / + DATA ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572, + $ 421 / + DATA ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309, + $ 4073 / + DATA ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171, + $ 2813 / + DATA ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817, + $ 2337 / + DATA ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039, + $ 1429 / + DATA ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696, + $ 1177 / + DATA ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256, + $ 1901 / + DATA ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715, + $ 81 / + DATA ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077, + $ 1669 / + DATA ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019, + $ 2633 / + DATA ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497, + $ 2269 / + DATA ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101, + $ 129 / + DATA ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717, + $ 1141 / + DATA ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51, + $ 249 / + DATA ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981, + $ 3917 / + DATA ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978, + $ 2481 / + DATA ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813, + $ 3941 / + DATA ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881, + $ 2217 / + DATA ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76, + $ 2749 / + DATA ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846, + $ 3041 / + DATA ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694, + $ 1877 / + DATA ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682, + $ 345 / + DATA ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124, + $ 2861 / + DATA ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660, + $ 1809 / + DATA ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997, + $ 3141 / + DATA ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479, + $ 2825 / + DATA ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141, + $ 157 / + DATA ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886, + $ 2881 / + DATA ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514, + $ 3637 / + DATA ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301, + $ 1465 / + DATA ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604, + $ 2829 / + DATA ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888, + $ 2161 / + DATA ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836, + $ 3365 / + DATA ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990, + $ 361 / + DATA ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058, + $ 2685 / + DATA ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692, + $ 3745 / + DATA ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194, + $ 2325 / + DATA ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20, + $ 3609 / + DATA ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285, + $ 3821 / + DATA ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046, + $ 3537 / + DATA ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107, + $ 517 / + DATA ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508, + $ 3017 / + DATA ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525, + $ 2141 / + DATA ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801, + $ 1537 / +* .. +* .. Executable Statements .. +* + I1 = ISEED( 1 ) + I2 = ISEED( 2 ) + I3 = ISEED( 3 ) + I4 = ISEED( 4 ) +* + DO 10 I = 1, MIN( N, LV ) +* + 20 CONTINUE +* +* Multiply the seed by i-th power of the multiplier modulo 2**48 +* + IT4 = I4*MM( I, 4 ) + IT3 = IT4 / IPW2 + IT4 = IT4 - IPW2*IT3 + IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 ) + IT2 = IT3 / IPW2 + IT3 = IT3 - IPW2*IT2 + IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 ) + IT1 = IT2 / IPW2 + IT2 = IT2 - IPW2*IT1 + IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) + + $ I4*MM( I, 1 ) + IT1 = MOD( IT1, IPW2 ) +* +* Convert 48-bit integer to a real number in the interval (0,1) +* + X( I ) = R*( DBLE( IT1 )+R*( DBLE( IT2 )+R*( DBLE( IT3 )+R* + $ DBLE( IT4 ) ) ) ) +* + IF (X( I ).EQ.1.0D0) THEN +* If a real number has n bits of precision, and the first +* n bits of the 48-bit integer above happen to be all 1 (which +* will occur about once every 2**n calls), then X( I ) will +* be rounded to exactly 1.0. +* Since X( I ) is not supposed to return exactly 0.0 or 1.0, +* the statistically correct thing to do in this situation is +* simply to iterate again. +* N.B. the case X( I ) = 0.0 should not be possible. + I1 = I1 + 2 + I2 = I2 + 2 + I3 = I3 + 2 + I4 = I4 + 2 + GOTO 20 + END IF +* + 10 CONTINUE +* +* Return final value of seed +* + ISEED( 1 ) = IT1 + ISEED( 2 ) = IT2 + ISEED( 3 ) = IT3 + ISEED( 4 ) = IT4 + RETURN +* +* End of DLARUV +* + END diff --git a/dspl/liblapack/SRC/dlarz.f b/dspl/liblapack/SRC/dlarz.f new file mode 100644 index 0000000..73dc3f5 --- /dev/null +++ b/dspl/liblapack/SRC/dlarz.f @@ -0,0 +1,236 @@ +*> \brief \b DLARZ applies an elementary reflector (as returned by stzrzf) to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, L, LDC, M, N +* DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARZ applies a real elementary reflector H to a real M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> +*> H is a product of k elementary reflectors as returned by DTZRZF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of entries of the vector V containing +*> the meaningful part of the Householder vectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV)) +*> The vector v in the representation of H as returned by +*> DTZRZF. V is not used if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + DOUBLE PRECISION TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = C( 1, 1:n ) +* + CALL DCOPY( N, C, LDC, WORK, 1 ) +* +* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )**T * v( 1:l ) +* + CALL DGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL DAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )**T +* + CALL DGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL DCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL DGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL DAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )**T +* + CALL DGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of DLARZ +* + END diff --git a/dspl/liblapack/SRC/dlarzb.f b/dspl/liblapack/SRC/dlarzb.f new file mode 100644 index 0000000..e34eef9 --- /dev/null +++ b/dspl/liblapack/SRC/dlarzb.f @@ -0,0 +1,323 @@ +*> \brief \b DLARZB applies a block reflector or its transpose to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARZB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, +* LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARZB applies a real block reflector H or its transpose H**T to +*> a real distributed M-by-N C from the left or the right. +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise (not supported yet) +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix V containing the +*> meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,NV). +*> If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DTRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )**T +* + DO 10 J = 1, K + CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )**T * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL DGEMM( 'Transpose', 'Transpose', N, K, L, ONE, + $ C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T**T or W( 1:m, 1:k ) * T +* + CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )**T +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )**T * W( 1:n, 1:k )**T +* + IF( L.GT.0 ) + $ CALL DGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL DGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T**T +* + CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * V( 1:k, 1:l ) +* + IF( L.GT.0 ) + $ CALL DGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) +* + END IF +* + RETURN +* +* End of DLARZB +* + END diff --git a/dspl/liblapack/SRC/dlarzt.f b/dspl/liblapack/SRC/dlarzt.f new file mode 100644 index 0000000..5925569 --- /dev/null +++ b/dspl/liblapack/SRC/dlarzt.f @@ -0,0 +1,264 @@ +*> \brief \b DLARZT forms the triangular factor T of a block reflector H = I - vtvH. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLARZT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLARZT forms the triangular factor T of a real block reflector +*> H of order > n, which is defined as a product of k elementary +*> reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise (not supported yet) +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> ______V_____ +*> ( v1 v2 v3 ) / \ +*> ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +*> V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +*> ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +*> ( v1 v2 v3 ) +*> . . . +*> . . . +*> 1 . . +*> 1 . +*> 1 +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> ______V_____ +*> 1 / \ +*> . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +*> . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +*> . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +*> . . . +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> V = ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DTRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)**T +* + CALL DGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of DLARZT +* + END diff --git a/dspl/liblapack/SRC/dlas2.f b/dspl/liblapack/SRC/dlas2.f new file mode 100644 index 0000000..83873bc --- /dev/null +++ b/dspl/liblapack/SRC/dlas2.f @@ -0,0 +1,183 @@ +*> \brief \b DLAS2 computes singular values of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION F, G, H, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAS2 computes the singular values of the 2-by-2 matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, SSMIN is the smaller singular value and SSMAX is the +*> larger singular value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> The smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is DOUBLE PRECISION +*> The larger singular value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Barring over/underflow, all output quantities are correct to within +*> a few units in the last place (ulps), even in the absence of a guard +*> digit in addition/subtraction. +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows, or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION F, G, H, SSMAX, SSMIN +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of DLAS2 +* + END diff --git a/dspl/liblapack/SRC/dlascl.f b/dspl/liblapack/SRC/dlascl.f new file mode 100644 index 0000000..03e1000 --- /dev/null +++ b/dspl/liblapack/SRC/dlascl.f @@ -0,0 +1,368 @@ +*> \brief \b DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASCL multiplies the M by N real matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See DGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is DOUBLE PRECISION +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); +*> TYPE = 'B', LDA >= KL+1; +*> TYPE = 'Q', LDA >= KU+1; +*> TYPE = 'Z', LDA >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. DISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( DISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DLASCL +* + END diff --git a/dspl/liblapack/SRC/dlascl2.f b/dspl/liblapack/SRC/dlascl2.f new file mode 100644 index 0000000..ae88075 --- /dev/null +++ b/dspl/liblapack/SRC/dlascl2.f @@ -0,0 +1,119 @@ +*> \brief \b DLASCL2 performs diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASCL2 performs a diagonal scaling on a vector: +*> x <-- D * x +*> where the diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_dge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) * D( I ) + END DO + END DO + + RETURN + END diff --git a/dspl/liblapack/SRC/dlasd0.f b/dspl/liblapack/SRC/dlasd0.f new file mode 100644 index 0000000..522ca9a --- /dev/null +++ b/dspl/liblapack/SRC/dlasd0.f @@ -0,0 +1,316 @@ +*> \brief \b DLASD0 computes the singular values of a real upper bidiagonal n-by-m matrix B with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using a divide and conquer approach, DLASD0 computes the singular +*> value decomposition (SVD) of a real upper bidiagonal N-by-M +*> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. +*> The algorithm computes orthogonal matrices U and VT such that +*> B = U * S * VT. The singular values S are overwritten on D. +*> +*> A related subroutine, DLASDA, computes only the singular values, +*> and optionally, the singular vectors in compact form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, the row dimension of the upper bidiagonal matrix. +*> This is also the dimension of the main diagonal array D. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> Specifies the column dimension of the bidiagonal matrix. +*> = 0: The bidiagonal matrix has column dimension M = N; +*> = 1: The bidiagonal matrix has column dimension M = N+1; +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. +*> On exit D, if INFO = 0, contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (M-1) +*> Contains the subdiagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, N) +*> On exit, U contains the left singular vectors. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> On entry, leading dimension of U. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT, M) +*> On exit, VT**T contains the right singular vectors. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> On entry, leading dimension of VT. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> On entry, maximum size of the subproblems at the +*> bottom of the computation tree. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*M**2+2*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, + $ WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR, + $ NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI + DOUBLE PRECISION ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL DLASD1, DLASDQ, DLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + END IF +* + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -6 + ELSE IF( LDVT.LT.M ) THEN + INFO = -8 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD0', -INFO ) + RETURN + END IF +* +* If the input matrix is too small, call DLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK, INFO ) + RETURN + END IF +* +* Set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* For the nodes on bottom level of the tree, solve +* their subproblems by DLASDQ. +* + NDB1 = ( ND+1 ) / 2 + NCC = 0 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NRP1 = NR + 1 + NLF = IC - NL + NRF = IC + 1 + SQREI = 1 + CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ), + $ VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU, + $ U( NLF, NLF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + NLF - 2 + DO 10 J = 1, NL + IWORK( ITEMP+J ) = J + 10 CONTINUE + IF( I.EQ.ND ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + NRP1 = NR + SQREI + CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ), + $ VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU, + $ U( NRF, NRF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + IC + DO 20 J = 1, NR + IWORK( ITEMP+J-1 ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + DO 50 LVL = NLVL, 1, -1 +* +* Find the first node LF and last node LL on the +* current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + IDXQC = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + CALL DLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA, + $ U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT, + $ IWORK( IDXQC ), IWORK( IWK ), WORK, INFO ) +* +* Report the possible convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DLASD0 +* + END diff --git a/dspl/liblapack/SRC/dlasd1.f b/dspl/liblapack/SRC/dlasd1.f new file mode 100644 index 0000000..fe8aad9 --- /dev/null +++ b/dspl/liblapack/SRC/dlasd1.f @@ -0,0 +1,326 @@ +*> \brief \b DLASD1 computes the SVD of an upper bidiagonal matrix B of the specified size. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, +* IDXQ, IWORK, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDU, LDVT, NL, NR, SQRE +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* INTEGER IDXQ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, +*> where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0. +*> +*> A related subroutine DLASD7 handles the case in which the singular +*> values (and the singular vectors in factored form) are desired. +*> +*> DLASD1 computes the SVD as follows: +*> +*> ( D1(in) 0 0 0 ) +*> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) +*> ( 0 0 D2(in) 0 ) +*> +*> = U(out) * ( D(out) 0) * VT(out) +*> +*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M +*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +*> elsewhere; and the entry b is empty if SQRE = 0. +*> +*> The left singular vectors of the original matrix are stored in U, and +*> the transpose of the right singular vectors are stored in VT, and the +*> singular values are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple singular values or when there are zeros in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLASD2. +*> +*> The second stage consists of calculating the updated +*> singular values. This is done by finding the square roots of the +*> roots of the secular equation via the routine DLASD4 (as called +*> by DLASD3). This routine also calculates the singular vectors of +*> the current problem. +*> +*> The final stage consists of computing the updated singular vectors +*> directly using the updated singular values. The singular vectors +*> for the current problem are multiplied with the singular vectors +*> from the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, +*> dimension (N = NL+NR+1). +*> On entry D(1:NL,1:NL) contains the singular values of the +*> upper block; and D(NL+2:N) contains the singular values of +*> the lower block. On exit D(1:N) contains the singular values +*> of the modified matrix. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in,out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension(LDU,N) +*> On entry U(1:NL, 1:NL) contains the left singular vectors of +*> the upper block; U(NL+2:N, NL+2:N) contains the left singular +*> vectors of the lower block. On exit U contains the left +*> singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension(LDVT,M) +*> where M = N + SQRE. +*> On entry VT(1:NL+1, 1:NL+1)**T contains the right singular +*> vectors of the upper block; VT(NL+2:M, NL+2:M)**T contains +*> the right singular vectors of the lower block. On exit +*> VT**T contains the right singular vectors of the +*> bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= max( 1, M ). +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension(N) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, i.e. +*> D( IDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension( 4 * N ) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension( 3*M**2 + 2*M ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, + $ IDXQ, IWORK, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, NL, NR, SQRE + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER IDXQ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2, + $ IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2 + DOUBLE PRECISION ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL DLAMRG, DLASCL, DLASD2, DLASD3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD1', -INFO ) + RETURN + END IF +* + N = NL + NR + 1 + M = N + SQRE +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLASD2 and DLASD3. +* + LDU2 = N + LDVT2 = M +* + IZ = 1 + ISIGMA = IZ + M + IU2 = ISIGMA + N + IVT2 = IU2 + LDU2*N + IQ = IVT2 + LDVT2*M +* + IDX = 1 + IDXC = IDX + N + COLTYP = IDXC + N + IDXP = COLTYP + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Deflate singular values. +* + CALL DLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU, + $ VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2, + $ WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ), + $ IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO ) +* +* Solve Secular Equation and update singular vectors. +* + LDQ = K + CALL DLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ), + $ U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ), + $ LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ), + $ INFO ) +* +* Report the convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of DLASD1 +* + END diff --git a/dspl/liblapack/SRC/dlasd2.f b/dspl/liblapack/SRC/dlasd2.f new file mode 100644 index 0000000..87a1543 --- /dev/null +++ b/dspl/liblapack/SRC/dlasd2.f @@ -0,0 +1,634 @@ +*> \brief \b DLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, +* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, +* IDXC, IDXQ, COLTYP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), +* $ IDXQ( * ) +* DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ), +* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD2 merges the two sets of singular values together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> singular values are close together or if there is a tiny entry in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> +*> DLASD2 is called from DLASD1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension(N) +*> On entry D contains the singular values of the two submatrices +*> to be combined. On exit D contains the trailing (N-K) updated +*> singular values (those which were deflated) sorted into +*> increasing order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension(N) +*> On exit Z contains the updating row vector in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension(LDU,N) +*> On entry U contains the left singular vectors of two +*> submatrices in the two square blocks with corners at (1,1), +*> (NL, NL), and (NL+2, NL+2), (N,N). +*> On exit U contains the trailing (N-K) updated left singular +*> vectors (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= N. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension(LDVT,M) +*> On entry VT**T contains the right singular vectors of two +*> submatrices in the two square blocks with corners at (1,1), +*> (NL+1, NL+1), and (NL+2, NL+2), (M,M). +*> On exit VT**T contains the trailing (N-K) updated right singular +*> vectors (those which were deflated) in its last N-K columns. +*> In case SQRE =1, the last row of VT spans the right null +*> space. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= M. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is DOUBLE PRECISION array, dimension (N) +*> Contains a copy of the diagonal elements (K-1 singular values +*> and one zero) in the secular equation. +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is DOUBLE PRECISION array, dimension(LDU2,N) +*> Contains a copy of the first K-1 left singular vectors which +*> will be used by DLASD3 in a matrix multiply (DGEMM) to solve +*> for the new left singular vectors. U2 is arranged into four +*> blocks. The first block contains a column with 1 at NL+1 and +*> zero everywhere else; the second block contains non-zero +*> entries only at and above NL; the third contains non-zero +*> entries only below NL+1; and the fourth is dense. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2. LDU2 >= N. +*> \endverbatim +*> +*> \param[out] VT2 +*> \verbatim +*> VT2 is DOUBLE PRECISION array, dimension(LDVT2,N) +*> VT2**T contains a copy of the first K right singular vectors +*> which will be used by DLASD3 in a matrix multiply (DGEMM) to +*> solve for the new right singular vectors. VT2 is arranged into +*> three blocks. The first block contains a row that corresponds +*> to the special 0 diagonal element in SIGMA; the second block +*> contains non-zeros only at and before NL +1; the third block +*> contains non-zeros only at and after NL +2. +*> \endverbatim +*> +*> \param[in] LDVT2 +*> \verbatim +*> LDVT2 is INTEGER +*> The leading dimension of the array VT2. LDVT2 >= M. +*> \endverbatim +*> +*> \param[out] IDXP +*> \verbatim +*> IDXP is INTEGER array, dimension(N) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output IDXP(2:K) +*> points to the nondeflated D-values and IDXP(K+1:N) +*> points to the deflated singular values. +*> \endverbatim +*> +*> \param[out] IDX +*> \verbatim +*> IDX is INTEGER array, dimension(N) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[out] IDXC +*> \verbatim +*> IDXC is INTEGER array, dimension(N) +*> This will contain the permutation used to arrange the columns +*> of the deflated U matrix into three groups: the first group +*> contains non-zero entries only at and above NL, the second +*> contains non-zero entries only below NL+2, and the third is +*> dense. +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension(N) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that entries in +*> the first hlaf of this permutation must first be moved one +*> position backward; and entries in the second half +*> must first have NL+1 added to their values. +*> \endverbatim +*> +*> \param[out] COLTYP +*> \verbatim +*> COLTYP is INTEGER array, dimension(N) +*> As workspace, this will contain a label which will indicate +*> which of the following types a column in the U2 matrix or a +*> row in the VT2 matrix is: +*> 1 : non-zero in the upper half only +*> 2 : non-zero in the lower half only +*> 3 : dense +*> 4 : deflated +*> +*> On exit, it is an array of dimension 4, with COLTYP(I) being +*> the dimension of the I-th type columns. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, + $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, + $ IDXC, IDXQ, COLTYP, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), + $ IDXQ( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ EIGHT = 8.0D+0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, + $ N, NLP1, NLP2 + DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDVT.LT.M ) THEN + INFO = -12 + ELSE IF( LDU2.LT.N ) THEN + INFO = -15 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD2', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 +* +* Generate the first part of the vector Z; and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VT( NLP1, NLP1 ) + Z( 1 ) = Z1 + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VT( I, NLP1 ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VT( I, NLP2 ) + 20 CONTINUE +* +* Initialize some reference arrays. +* + DO 30 I = 2, NLP1 + COLTYP( I ) = 1 + 30 CONTINUE + DO 40 I = NLP2, N + COLTYP( I ) = 2 + 40 CONTINUE +* +* Sort the singular values into increasing order +* + DO 50 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 50 CONTINUE +* +* DSIGMA, IDXC, IDXC, and the first column of U2 +* are used as storage space. +* + DO 60 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + U2( I, 1 ) = Z( IDXQ( I ) ) + IDXC( I ) = COLTYP( IDXQ( I ) ) + 60 CONTINUE +* + CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 70 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = U2( IDXI, 1 ) + COLTYP( I ) = IDXC( IDXI ) + 70 CONTINUE +* +* Calculate the allowable deflation tolerance +* + EPS = DLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 80 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + IF( J.EQ.N ) + $ GO TO 120 + ELSE + JPREV = J + GO TO 90 + END IF + 80 CONTINUE + 90 CONTINUE + J = JPREV + 100 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 110 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + C = C / TAU + S = -S / TAU + Z( J ) = TAU + Z( JPREV ) = ZERO +* +* Apply back the Givens rotation to the left and right +* singular vector matrices. +* + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) + CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, + $ S ) + IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN + COLTYP( J ) = 3 + END IF + COLTYP( JPREV ) = 4 + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 100 + 110 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 120 CONTINUE +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four groups of uniform structure (although one or more of these +* groups may be empty). +* + DO 130 J = 1, 4 + CTOT( J ) = 0 + 130 CONTINUE + DO 140 J = 2, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 140 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 2 + PSM( 2 ) = 2 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) +* +* Fill out the IDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's, starting from the +* second column. This applies similarly to the rows of VT. +* + DO 150 J = 2, N + JP = IDXP( J ) + CT = COLTYP( JP ) + IDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 150 CONTINUE +* +* Sort the singular values and corresponding singular vectors into +* DSIGMA, U2, and VT2 respectively. The singular values/vectors +* which were not deflated go into the first K slots of DSIGMA, U2, +* and VT2 respectively, while those which were deflated go into the +* last N - K slots, except that the first column/row will be treated +* separately. +* + DO 160 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) + CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) + 160 CONTINUE +* +* Determine DSIGMA(1), DSIGMA(2) and Z(1) +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = DLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = Z( M ) / Z( 1 ) + END IF + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Move the rest of the updating row to Z. +* + CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) +* +* Determine the first column of U2, the first row of VT2 and the +* last row of VT. +* + CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) + U2( NLP1, 1 ) = ONE + IF( M.GT.N ) THEN + DO 170 I = 1, NLP1 + VT( M, I ) = -S*VT( NLP1, I ) + VT2( 1, I ) = C*VT( NLP1, I ) + 170 CONTINUE + DO 180 I = NLP2, M + VT2( 1, I ) = S*VT( M, I ) + VT( M, I ) = C*VT( M, I ) + 180 CONTINUE + ELSE + CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) + END IF + IF( M.GT.N ) THEN + CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) + END IF +* +* The deflated singular values and their corresponding vectors go +* into the back of D, U, and V respectively. +* + IF( N.GT.K ) THEN + CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) + CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), + $ LDU ) + CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), + $ LDVT ) + END IF +* +* Copy CTOT into COLTYP for referencing in DLASD3. +* + DO 190 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 190 CONTINUE +* + RETURN +* +* End of DLASD2 +* + END diff --git a/dspl/liblapack/SRC/dlasd3.f b/dspl/liblapack/SRC/dlasd3.f new file mode 100644 index 0000000..7662be3 --- /dev/null +++ b/dspl/liblapack/SRC/dlasd3.f @@ -0,0 +1,469 @@ +*> \brief \b DLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, +* LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, +* $ SQRE +* .. +* .. Array Arguments .. +* INTEGER CTOT( * ), IDXC( * ) +* DOUBLE PRECISION D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), +* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD3 finds all the square roots of the roots of the secular +*> equation, as defined by the values in D and Z. It makes the +*> appropriate calls to DLASD4 and then updates the singular +*> vectors by matrix multiplication. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> +*> DLASD3 is called from DLASD1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The size of the secular equation, 1 =< K = < N. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension(K) +*> On exit the square roots of the roots of the secular equation, +*> in ascending order. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,K) +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= K. +*> \endverbatim +*> +*> \param[in,out] DSIGMA +*> \verbatim +*> DSIGMA is DOUBLE PRECISION array, dimension(K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, N) +*> The last N - K columns of this matrix contain the deflated +*> left singular vectors. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= N. +*> \endverbatim +*> +*> \param[in] U2 +*> \verbatim +*> U2 is DOUBLE PRECISION array, dimension (LDU2, N) +*> The first K columns of this matrix contain the non-deflated +*> left singular vectors for the split problem. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2. LDU2 >= N. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT, M) +*> The last M - K columns of VT**T contain the deflated +*> right singular vectors. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= N. +*> \endverbatim +*> +*> \param[in,out] VT2 +*> \verbatim +*> VT2 is DOUBLE PRECISION array, dimension (LDVT2, N) +*> The first K columns of VT2**T contain the non-deflated +*> right singular vectors for the split problem. +*> \endverbatim +*> +*> \param[in] LDVT2 +*> \verbatim +*> LDVT2 is INTEGER +*> The leading dimension of the array VT2. LDVT2 >= N. +*> \endverbatim +*> +*> \param[in] IDXC +*> \verbatim +*> IDXC is INTEGER array, dimension ( N ) +*> The permutation used to arrange the columns of U (and rows of +*> VT) into three groups: the first group contains non-zero +*> entries only at and above (or before) NL +1; the second +*> contains non-zero entries only at and below (or after) NL+2; +*> and the third is dense. The first column of U and the row of +*> VT are treated separately, however. +*> +*> The rows of the singular vectors found by DLASD4 +*> must be likewise permuted before the matrix multiplies can +*> take place. +*> \endverbatim +*> +*> \param[in] CTOT +*> \verbatim +*> CTOT is INTEGER array, dimension ( 4 ) +*> A count of the total number of the various types of columns +*> in U (or rows in VT), as described in IDXC. The fourth column +*> type is any column which has been deflated. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating row vector. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, + $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, + $ SQRE +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), IDXC( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0, + $ NEGONE = -1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 + DOUBLE PRECISION RHO, TEMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DLACPY, DLASCL, DLASD4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE + NLP1 = NL + 1 + NLP2 = NL + 2 +* + IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.K ) THEN + INFO = -7 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDU2.LT.N ) THEN + INFO = -12 + ELSE IF( LDVT.LT.M ) THEN + INFO = -14 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + CALL DCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) + IF( Z( 1 ).GT.ZERO ) THEN + CALL DCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) + ELSE + DO 10 I = 1, N + U( I, 1 ) = -U2( I, 1 ) + 10 CONTINUE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DSIGMA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 20 I = 1, K + DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 20 CONTINUE +* +* Keep a copy of Z. +* + CALL DCOPY( K, Z, 1, Q, 1 ) +* +* Normalize Z. +* + RHO = DNRM2( K, Z, 1 ) + CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Find the new singular values. +* + DO 30 J = 1, K + CALL DLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), + $ VT( 1, J ), INFO ) +* +* If the zero finder fails, report the convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 30 CONTINUE +* +* Compute updated Z. +* + DO 60 I = 1, K + Z( I ) = U( I, K )*VT( I, K ) + DO 40 J = 1, I - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J ) ) / + $ ( DSIGMA( I )+DSIGMA( J ) ) ) + 40 CONTINUE + DO 50 J = I, K - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / + $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) + 50 CONTINUE + Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) + 60 CONTINUE +* +* Compute left singular vectors of the modified diagonal matrix, +* and store related information for the right singular vectors. +* + DO 90 I = 1, K + VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) + U( 1, I ) = NEGONE + DO 70 J = 2, K + VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) + U( J, I ) = DSIGMA( J )*VT( J, I ) + 70 CONTINUE + TEMP = DNRM2( K, U( 1, I ), 1 ) + Q( 1, I ) = U( 1, I ) / TEMP + DO 80 J = 2, K + JC = IDXC( J ) + Q( J, I ) = U( JC, I ) / TEMP + 80 CONTINUE + 90 CONTINUE +* +* Update the left singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL DGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, + $ LDU ) + GO TO 100 + END IF + IF( CTOT( 1 ).GT.0 ) THEN + CALL DGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, + $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) + END IF + ELSE IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + ELSE + CALL DLACPY( 'F', NL, K, U2, LDU2, U, LDU ) + END IF + CALL DCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) + KTEMP = 2 + CTOT( 1 ) + CTEMP = CTOT( 2 ) + CTOT( 3 ) + CALL DGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, + $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) +* +* Generate the right singular vectors. +* + 100 CONTINUE + DO 120 I = 1, K + TEMP = DNRM2( K, VT( 1, I ), 1 ) + Q( I, 1 ) = VT( 1, I ) / TEMP + DO 110 J = 2, K + JC = IDXC( J ) + Q( I, J ) = VT( JC, I ) / TEMP + 110 CONTINUE + 120 CONTINUE +* +* Update the right singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL DGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, + $ VT, LDVT ) + RETURN + END IF + KTEMP = 1 + CTOT( 1 ) + CALL DGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, + $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + IF( KTEMP.LE.LDVT2 ) + $ CALL DGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), + $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), + $ LDVT ) +* + KTEMP = CTOT( 1 ) + 1 + NRP1 = NR + SQRE + IF( KTEMP.GT.1 ) THEN + DO 130 I = 1, K + Q( I, KTEMP ) = Q( I, 1 ) + 130 CONTINUE + DO 140 I = NLP2, M + VT2( KTEMP, I ) = VT2( 1, I ) + 140 CONTINUE + END IF + CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) + CALL DGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, + $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) +* + RETURN +* +* End of DLASD3 +* + END diff --git a/dspl/liblapack/SRC/dlasd4.f b/dspl/liblapack/SRC/dlasd4.f new file mode 100644 index 0000000..8b4a876 --- /dev/null +++ b/dspl/liblapack/SRC/dlasd4.f @@ -0,0 +1,1061 @@ +*> \brief \b DLASD4 computes the square root of the i-th updated eigenvalue of a positive symmetric rank-one modification to a positive diagonal matrix. Used by dbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER I, INFO, N +* DOUBLE PRECISION RHO, SIGMA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the square root of the I-th updated +*> eigenvalue of a positive symmetric rank-one modification to +*> a positive diagonal matrix whose entries are given as the squares +*> of the corresponding entries in the array d, and that +*> +*> 0 <= D(i) < D(j) for i < j +*> +*> and that RHO > 0. This is arranged by the calling routine, and is +*> no loss in generality. The rank-one modified system is thus +*> +*> diag( D ) * diag( D ) + RHO * Z * Z_transpose. +*> +*> where we assume the Euclidean norm of Z is 1. +*> +*> The method consists of approximating the rational functions in the +*> secular equation by simpler interpolating rational functions. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of all arrays. +*> \endverbatim +*> +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. 1 <= I <= N. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( N ) +*> The original eigenvalues. It is assumed that they are in +*> order, 0 <= D(I) < D(J) for I < J. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( N ) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension ( N ) +*> If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th +*> component. If N = 1, then DELTA(1) = 1. The vector DELTA +*> contains the information necessary to construct the +*> (singular) eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> The computed sigma_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( N ) +*> If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th +*> component. If N = 1, then WORK( 1 ) = 1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, the updating process failed. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> Logical variable ORGATI (origin-at-i?) is used for distinguishing +*> whether D(i) or D(i+1) is treated as the origin. +*> +*> ORGATI = .true. origin at i +*> ORGATI = .false. origin at i+1 +*> +*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting +*> if we are working with THREE poles! +*> +*> MAXIT is the maximum number of iterations allowed for each +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + DOUBLE PRECISION RHO, SIGMA +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 400 ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ THREE = 3.0D+0, FOUR = 4.0D+0, EIGHT = 8.0D+0, + $ TEN = 10.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3, GEOMAVG + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + DOUBLE PRECISION A, B, C, DELSQ, DELSQ2, SQ2, DPHI, DPSI, DTIIM, + $ DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS, + $ ERRETM, ETA, PHI, PREW, PSI, RHOINV, SGLB, + $ SGUB, TAU, TAU2, TEMP, TEMP1, TEMP2, W +* .. +* .. Local Arrays .. + DOUBLE PRECISION DD( 3 ), ZZ( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DLAED6, DLASD5 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) ) + DELTA( 1 ) = ONE + WORK( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL DLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = DLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO + TAU2= ZERO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + TEMP = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) ) + DO 10 J = 1, N + WORK( J ) = D( J ) + D( N ) + TEMP1 + DELTA( J ) = ( D( J )-D( N ) ) - TEMP1 + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) + + $ Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) ) +* + IF( W.LE.ZERO ) THEN + TEMP1 = SQRT( D( N )*D( N )+RHO ) + TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )* + $ ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) + + $ Z( N )*Z( N ) / RHO +* +* The following TAU2 is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) + END IF +* +* It can be proved that +* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU2 <= D(N)^2+RHO +* + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ +* +* The following TAU2 is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) + +* +* It can be proved that +* D(N)^2 < D(N)^2+TAU2 < SIGMA(N)^2 < D(N)^2+RHO/2 +* + END IF +* +* The following TAU is to approximate SIGMA_n - D( N ) +* +* TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) +* + SIGMA = D( N ) + TAU + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( N ) ) - TAU + WORK( J ) = D( J ) + D( N ) + TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / ( DELTA( J )*WORK( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( DELTA( N )*WORK( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI ) + B = DTNSQ*DTNSQ1*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN + ETA = RHO - SIGMA*SIGMA + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.GT.RHO ) + $ ETA = RHO + DTNSQ +* + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 50 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TAU2 = WORK( N )*DELTA( N ) + TEMP = Z( N ) / TAU2 + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI ) + B = DTNSQ1*DTNSQ*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.LE.ZERO ) + $ ETA = ETA / TWO +* + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 70 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TAU2 = WORK( N )*DELTA( N ) + TEMP = Z( N ) / TAU2 + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + GO TO 240 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) ) + DELSQ2 = DELSQ / TWO + SQ2=SQRT( ( D( I )*D( I )+D( IP1 )*D( IP1 ) ) / TWO ) + TEMP = DELSQ2 / ( D( I )+SQ2 ) + DO 100 J = 1, N + WORK( J ) = D( J ) + D( I ) + TEMP + DELTA( J ) = ( D( J )-D( I ) ) - TEMP + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) + + $ Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) ) +* + GEOMAVG = .FALSE. + IF( W.GT.ZERO ) THEN +* +* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + II = I + SGLB = ZERO + SGUB = DELSQ2 / ( D( I )+SQ2 ) + A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DELSQ + IF( A.GT.ZERO ) THEN + TAU2 = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU2 = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU2 now is an estimation of SIGMA^2 - D( I )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( I ). +* + TAU = TAU2 / ( D( I )+SQRT( D( I )*D( I )+TAU2 ) ) + TEMP = SQRT(EPS) + IF( (D(I).LE.TEMP*D(IP1)).AND.(ABS(Z(I)).LE.TEMP) + $ .AND.(D(I).GT.ZERO) ) THEN + TAU = MIN( TEN*D(I), SGUB ) + GEOMAVG = .TRUE. + END IF + ELSE +* +* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + II = IP1 + SGLB = -DELSQ2 / ( D( II )+SQ2 ) + SGUB = ZERO + A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DELSQ + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU2 = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU2 now is an estimation of SIGMA^2 - D( IP1 )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( IP1 ). +* + TAU = TAU2 / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+ + $ TAU2 ) ) ) + END IF +* + SIGMA = D( II ) + TAU + DO 130 J = 1, N + WORK( J ) = D( J ) + D( II ) + TAU + DELTA( J ) = ( D( J )-D( II ) ) - TAU + 130 CONTINUE + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SGLB = MAX( SGLB, TAU ) + ELSE + SGUB = MIN( SGUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIP*( DPSI+DPHI ) ) - + $ ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIM*( DPSI+DPHI ) ) - + $ ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) +* + IF( INFO.NE.0 ) THEN +* +* If INFO is not 0, i.e., DLAED6 failed, switch back +* to 2 pole interpolation. +* + SWTCH3 = .FALSE. + INFO = 0 + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + END IF + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW +* + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) + TEMP = TAU + ETA + IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SGUB-TAU ) / TWO + ELSE + ETA = ( SGLB-TAU ) / TWO + END IF + IF( GEOMAVG ) THEN + IF( W .LT. ZERO ) THEN + IF( TAU .GT. ZERO ) THEN + ETA = SQRT(SGUB*TAU)-TAU + END IF + ELSE + IF( SGLB .GT. ZERO ) THEN + ETA = SQRT(SGLB*TAU)-TAU + END IF + END IF + END IF + END IF +* + PREW = W +* + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 170 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 170 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 180 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 180 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 190 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 190 CONTINUE +* + TAU2 = WORK( II )*DELTA( II ) + TEMP = Z( II ) / TAU2 + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* +* Main loop to update the values of the array DELTA and WORK +* + ITER = NITER + 1 +* + DO 230 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN +* $ .OR. (SGUB-SGLB).LE.EIGHT*ABS(SGUB+SGLB) ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SGLB = MAX( SGLB, TAU ) + ELSE + SGUB = MIN( SGUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DTIIM*DPSI - DTIIP*DPHI + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIM1 )-D( IIP1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIP1 )-D( IIM1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) +* + IF( INFO.NE.0 ) THEN +* +* If INFO is not 0, i.e., DLAED6 failed, switch +* back to two pole interpolation +* + SWTCH3 = .FALSE. + INFO = 0 + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I )/DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 )/DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + END IF + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW +* + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) + TEMP=TAU+ETA + IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SGUB-TAU ) / TWO + ELSE + ETA = ( SGLB-TAU ) / TWO + END IF + IF( GEOMAVG ) THEN + IF( W .LT. ZERO ) THEN + IF( TAU .GT. ZERO ) THEN + ETA = SQRT(SGUB*TAU)-TAU + END IF + ELSE + IF( SGLB .GT. ZERO ) THEN + ETA = SQRT(SGLB*TAU)-TAU + END IF + END IF + END IF + END IF +* + PREW = W +* + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 200 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 200 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 210 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 210 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 220 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 220 CONTINUE +* + TAU2 = WORK( II )*DELTA( II ) + TEMP = Z( II ) / TAU2 + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 230 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 +* + END IF +* + 240 CONTINUE + RETURN +* +* End of DLASD4 +* + END diff --git a/dspl/liblapack/SRC/dlasd5.f b/dspl/liblapack/SRC/dlasd5.f new file mode 100644 index 0000000..4896ba6 --- /dev/null +++ b/dspl/liblapack/SRC/dlasd5.f @@ -0,0 +1,231 @@ +*> \brief \b DLASD5 computes the square root of the i-th eigenvalue of a positive symmetric rank-one modification of a 2-by-2 diagonal matrix. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* .. Scalar Arguments .. +* INTEGER I +* DOUBLE PRECISION DSIGMA, RHO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the square root of the I-th eigenvalue +*> of a positive symmetric rank-one modification of a 2-by-2 diagonal +*> matrix +*> +*> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . +*> +*> The diagonal entries in the array D are assumed to satisfy +*> +*> 0 <= D(i) < D(j) for i < j . +*> +*> We also assume RHO > 0 and that the Euclidean norm of the vector +*> Z is one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. I = 1 or I = 2. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( 2 ) +*> The original eigenvalues. We assume 0 <= D(1) < D(2). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 2 ) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is DOUBLE PRECISION array, dimension ( 2 ) +*> Contains (D(j) - sigma_I) in its j-th component. +*> The vector DELTA contains the information necessary +*> to construct the eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is DOUBLE PRECISION +*> The computed sigma_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( 2 ) +*> WORK contains (D(j) + sigma_I) in its j-th component. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I + DOUBLE PRECISION DSIGMA, RHO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ THREE = 3.0D+0, FOUR = 4.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION B, C, DEL, DELSQ, TAU, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + DELSQ = DEL*( D( 2 )+D( 1 ) ) + IF( I.EQ.1 ) THEN + W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )- + $ Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DELSQ +* +* B > ZERO, always +* +* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) +* +* The following TAU is DSIGMA - D( 1 ) +* + TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) ) + DSIGMA = D( 1 ) + TAU + DELTA( 1 ) = -TAU + DELTA( 2 ) = DEL - TAU + WORK( 1 ) = TWO*D( 1 ) + TAU + WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 ) +* DELTA( 1 ) = -Z( 1 ) / TAU +* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU + END IF +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End of DLASD5 +* + END diff --git a/dspl/liblapack/SRC/dlasd6.f b/dspl/liblapack/SRC/dlasd6.f new file mode 100644 index 0000000..5cab78a --- /dev/null +++ b/dspl/liblapack/SRC/dlasd6.f @@ -0,0 +1,443 @@ +*> \brief \b DLASD6 computes the SVD of an updated upper bidiagonal matrix obtained by merging two smaller ones by appending a row. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, +* IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, +* LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, +* $ NR, SQRE +* DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), +* $ PERM( * ) +* DOUBLE PRECISION D( * ), DIFL( * ), DIFR( * ), +* $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), +* $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD6 computes the SVD of an updated upper bidiagonal matrix B +*> obtained by merging two smaller ones by appending a row. This +*> routine is used only for the problem which requires all singular +*> values and optionally singular vector matrices in factored form. +*> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. +*> A related subroutine, DLASD1, handles the case in which all singular +*> values and singular vectors of the bidiagonal matrix are desired. +*> +*> DLASD6 computes the SVD as follows: +*> +*> ( D1(in) 0 0 0 ) +*> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) +*> ( 0 0 D2(in) 0 ) +*> +*> = U(out) * ( D(out) 0) * VT(out) +*> +*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M +*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +*> elsewhere; and the entry b is empty if SQRE = 0. +*> +*> The singular values of B can be computed using D1, D2, the first +*> components of all the right singular vectors of the lower block, and +*> the last components of all the right singular vectors of the upper +*> block. These components are stored and updated in VF and VL, +*> respectively, in DLASD6. Hence U and VT are not explicitly +*> referenced. +*> +*> The singular values are stored in D. The algorithm consists of two +*> stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple singular values or if there is a zero +*> in the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLASD7. +*> +*> The second stage consists of calculating the updated +*> singular values. This is done by finding the roots of the +*> secular equation via the routine DLASD4 (as called by DLASD8). +*> This routine also updates VF and VL and computes the distances +*> between the updated singular values and the old singular +*> values. +*> +*> DLASD6 is called from DLASDA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors in factored form as well. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( NL+NR+1 ). +*> On entry D(1:NL,1:NL) contains the singular values of the +*> upper block, and D(NL+2:N) contains the singular values +*> of the lower block. On exit D(1:N) contains the singular +*> values of the modified matrix. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is DOUBLE PRECISION array, dimension ( M ) +*> On entry, VF(1:NL+1) contains the first components of all +*> right singular vectors of the upper block; and VF(NL+2:M) +*> contains the first components of all right singular vectors +*> of the lower block. On exit, VF contains the first components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension ( M ) +*> On entry, VL(1:NL+1) contains the last components of all +*> right singular vectors of the upper block; and VL(NL+2:M) +*> contains the last components of all right singular vectors of +*> the lower block. On exit, VL contains the last components of +*> all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in,out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension ( N ) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, i.e. +*> D( IDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) to be applied +*> to each block. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value to be used in the +*> corresponding Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of GIVNUM and POLES, must be at least N. +*> \endverbatim +*> +*> \param[out] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> On exit, POLES(1,*) is an array containing the new singular +*> values obtained from solving the secular equation, and +*> POLES(2,*) is an array containing the poles in the secular +*> equation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( N ) +*> On exit, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, +*> dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +*> dimension ( K ) if ICOMPQ = 0. +*> On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +*> defined and will not be referenced. +*> +*> If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +*> normalizing factors for the right singular vector matrix. +*> +*> See DLASD8 for details on DIFL and DIFR. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( M ) +*> The first elements of this array contain the components +*> of the deflation-adjusted updating row vector. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( 4 * M ) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension ( 3 * N ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, + $ IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, + $ LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), + $ PERM( * ) + DOUBLE PRECISION D( * ), DIFL( * ), DIFR( * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M, + $ N, N1, N2 + DOUBLE PRECISION ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DLASCL, DLASD7, DLASD8, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -14 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD6', -INFO ) + RETURN + END IF +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLASD7 and DLASD8. +* + ISIGMA = 1 + IW = ISIGMA + N + IVFW = IW + M + IVLW = IVFW + M +* + IDX = 1 + IDXC = IDX + N + IDXP = IDXC + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Sort and Deflate singular values. +* + CALL DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF, + $ WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA, + $ WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S, + $ INFO ) +* +* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. +* + CALL DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM, + $ WORK( ISIGMA ), WORK( IW ), INFO ) +* +* Report the possible convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Save the poles if ICOMPQ = 1. +* + IF( ICOMPQ.EQ.1 ) THEN + CALL DCOPY( K, D, 1, POLES( 1, 1 ), 1 ) + CALL DCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 ) + END IF +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of DLASD6 +* + END diff --git a/dspl/liblapack/SRC/dlasd7.f b/dspl/liblapack/SRC/dlasd7.f new file mode 100644 index 0000000..e0ddede --- /dev/null +++ b/dspl/liblapack/SRC/dlasd7.f @@ -0,0 +1,580 @@ +*> \brief \b DLASD7 merges the two sets of singular values together into a single sorted set. Then it tries to deflate the size of the problem. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, +* VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* C, S, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, +* $ NR, SQRE +* DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), +* $ IDXQ( * ), PERM( * ) +* DOUBLE PRECISION D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), +* $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), +* $ ZW( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD7 merges the two sets of singular values together into a single +*> sorted set. Then it tries to deflate the size of the problem. There +*> are two ways in which deflation can occur: when two or more singular +*> values are close together or if there is a tiny entry in the Z +*> vector. For each such occurrence the order of the related +*> secular equation problem is reduced by one. +*> +*> DLASD7 is called from DLASD6. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed +*> in compact form, as follows: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors of upper +*> bidiagonal matrix in compact form. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has +*> N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, this is +*> the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( N ) +*> On entry D contains the singular values of the two submatrices +*> to be combined. On exit D contains the trailing (N-K) updated +*> singular values (those which were deflated) sorted into +*> increasing order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( M ) +*> On exit Z contains the updating row vector in the secular +*> equation. +*> \endverbatim +*> +*> \param[out] ZW +*> \verbatim +*> ZW is DOUBLE PRECISION array, dimension ( M ) +*> Workspace for Z. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is DOUBLE PRECISION array, dimension ( M ) +*> On entry, VF(1:NL+1) contains the first components of all +*> right singular vectors of the upper block; and VF(NL+2:M) +*> contains the first components of all right singular vectors +*> of the lower block. On exit, VF contains the first components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] VFW +*> \verbatim +*> VFW is DOUBLE PRECISION array, dimension ( M ) +*> Workspace for VF. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension ( M ) +*> On entry, VL(1:NL+1) contains the last components of all +*> right singular vectors of the upper block; and VL(NL+2:M) +*> contains the last components of all right singular vectors +*> of the lower block. On exit, VL contains the last components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] VLW +*> \verbatim +*> VLW is DOUBLE PRECISION array, dimension ( M ) +*> Workspace for VL. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is DOUBLE PRECISION array, dimension ( N ) +*> Contains a copy of the diagonal elements (K-1 singular values +*> and one zero) in the secular equation. +*> \endverbatim +*> +*> \param[out] IDX +*> \verbatim +*> IDX is INTEGER array, dimension ( N ) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[out] IDXP +*> \verbatim +*> IDXP is INTEGER array, dimension ( N ) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output IDXP(2:K) +*> points to the nondeflated D-values and IDXP(K+1:N) +*> points to the deflated singular values. +*> \endverbatim +*> +*> \param[in] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension ( N ) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that entries in +*> the first half of this permutation must first be moved one +*> position backward; and entries in the second half +*> must first have NL+1 added to their values. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) to be applied +*> to each singular block. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value to be used in the +*> corresponding Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of GIVNUM, must be at least N. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, + $ VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ C, S, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + DOUBLE PRECISION ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), + $ IDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), + $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), + $ ZW( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ EIGHT = 8.0D+0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N, + $ NLP1, NLP2 + DOUBLE PRECISION EPS, HLFTOL, TAU, TOL, Z1 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DROT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -22 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -24 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD7', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = 0 + END IF +* +* Generate the first part of the vector Z and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VL( NLP1 ) + VL( NLP1 ) = ZERO + TAU = VF( NLP1 ) + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VL( I ) + VL( I ) = ZERO + VF( I+1 ) = VF( I ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE + VF( 1 ) = TAU +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VF( I ) + VF( I ) = ZERO + 20 CONTINUE +* +* Sort the singular values into increasing order +* + DO 30 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 30 CONTINUE +* +* DSIGMA, IDXC, IDXC, and ZW are used as storage space. +* + DO 40 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + ZW( I ) = Z( IDXQ( I ) ) + VFW( I ) = VF( IDXQ( I ) ) + VLW( I ) = VL( IDXQ( I ) ) + 40 CONTINUE +* + CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 50 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = ZW( IDXI ) + VF( I ) = VFW( IDXI ) + VL( I ) = VLW( IDXI ) + 50 CONTINUE +* +* Calculate the allowable deflation tolerence +* + EPS = DLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 60 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JPREV = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = JPREV + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + Z( J ) = TAU + Z( JPREV ) = ZERO + C = C / TAU + S = -S / TAU +* +* Record the appropriate Givens rotation +* + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = GIVPTR + 1 + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + GIVCOL( GIVPTR, 2 ) = IDXJP + GIVCOL( GIVPTR, 1 ) = IDXJ + GIVNUM( GIVPTR, 2 ) = C + GIVNUM( GIVPTR, 1 ) = S + END IF + CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S ) + CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S ) + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 80 + 90 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 100 CONTINUE +* +* Sort the singular values into DSIGMA. The singular values which +* were not deflated go into the first K slots of DSIGMA, except +* that DSIGMA(1) is treated separately. +* + DO 110 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + VFW( J ) = VF( JP ) + VLW( J ) = VL( JP ) + 110 CONTINUE + IF( ICOMPQ.EQ.1 ) THEN + DO 120 J = 2, N + JP = IDXP( J ) + PERM( J ) = IDXQ( IDX( JP )+1 ) + IF( PERM( J ).LE.NLP1 ) THEN + PERM( J ) = PERM( J ) - 1 + END IF + 120 CONTINUE + END IF +* +* The deflated singular values go back into the last N - K slots of +* D. +* + CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) +* +* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and +* VL(M). +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = DLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = -Z( M ) / Z( 1 ) + END IF + CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S ) + CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S ) + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Restore Z, VF, and VL. +* + CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 ) + CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 ) + CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 ) +* + RETURN +* +* End of DLASD7 +* + END diff --git a/dspl/liblapack/SRC/dlasd8.f b/dspl/liblapack/SRC/dlasd8.f new file mode 100644 index 0000000..fc5c48c --- /dev/null +++ b/dspl/liblapack/SRC/dlasd8.f @@ -0,0 +1,342 @@ +*> \brief \b DLASD8 finds the square roots of the roots of the secular equation, and stores, for each element in D, the distance to its two nearest poles. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASD8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, +* DSIGMA, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), DIFL( * ), DIFR( LDDIFR, * ), +* $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASD8 finds the square roots of the roots of the secular equation, +*> as defined by the values in DSIGMA and Z. It makes the appropriate +*> calls to DLASD4, and stores, for each element in D, the distance +*> to its two nearest poles (elements in DSIGMA). It also updates +*> the arrays VF and VL, the first and last components of all the +*> right singular vectors of the original bidiagonal matrix. +*> +*> DLASD8 is called from DLASD6. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form in the calling routine: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors in factored form as well. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved +*> by DLASD4. K >= 1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( K ) +*> On output, D contains the updated singular values. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( K ) +*> On entry, the first K elements of this array contain the +*> components of the deflation-adjusted updating row vector. +*> On exit, Z is updated. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is DOUBLE PRECISION array, dimension ( K ) +*> On entry, VF contains information passed through DBEDE8. +*> On exit, VF contains the first K components of the first +*> components of all right singular vectors of the bidiagonal +*> matrix. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension ( K ) +*> On entry, VL contains information passed through DBEDE8. +*> On exit, VL contains the first K components of the last +*> components of all right singular vectors of the bidiagonal +*> matrix. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( K ) +*> On exit, DIFL(I) = D(I) - DSIGMA(I). +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, +*> dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +*> dimension ( K ) if ICOMPQ = 0. +*> On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +*> defined and will not be referenced. +*> +*> If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +*> normalizing factors for the right singular vector matrix. +*> \endverbatim +*> +*> \param[in] LDDIFR +*> \verbatim +*> LDDIFR is INTEGER +*> The leading dimension of DIFR, must be at least K. +*> \endverbatim +*> +*> \param[in,out] DSIGMA +*> \verbatim +*> DSIGMA is DOUBLE PRECISION array, dimension ( K ) +*> On entry, the first K elements of this array contain the old +*> roots of the deflated updating problem. These are the poles +*> of the secular equation. +*> On exit, the elements of DSIGMA may be very slightly altered +*> in value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*K) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), DIFL( * ), DIFR( LDDIFR, * ), + $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASCL, DLASD4, DLASET, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DDOT, DLAMC3, DNRM2 + EXTERNAL DDOT, DLAMC3, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -2 + ELSE IF( LDDIFR.LT.K ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASD8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = DNRM2( K, Z, 1 ) + CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, report the convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = DNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of DLASD8 +* + END + diff --git a/dspl/liblapack/SRC/dlasda.f b/dspl/liblapack/SRC/dlasda.f new file mode 100644 index 0000000..f41a108 --- /dev/null +++ b/dspl/liblapack/SRC/dlasda.f @@ -0,0 +1,514 @@ +*> \brief \b DLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASDA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, +* DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, +* PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* DOUBLE PRECISION C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), +* $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), +* $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), +* $ Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using a divide and conquer approach, DLASDA computes the singular +*> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix +*> B with diagonal D and offdiagonal E, where M = N + SQRE. The +*> algorithm computes the singular values in the SVD B = U * S * VT. +*> The orthogonal matrices U and VT are optionally computed in +*> compact form. +*> +*> A related subroutine, DLASD0, computes the singular values and +*> the singular vectors in explicit form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed +*> in compact form, as follows +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors of upper bidiagonal +*> matrix in compact form. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row dimension of the upper bidiagonal matrix. This is +*> also the dimension of the main diagonal array D. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> Specifies the column dimension of the bidiagonal matrix. +*> = 0: The bidiagonal matrix has column dimension M = N; +*> = 1: The bidiagonal matrix has column dimension M = N + 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension ( N ) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit D, if INFO = 0, contains its singular values. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension ( M-1 ) +*> Contains the subdiagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is DOUBLE PRECISION array, +*> dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left +*> singular vector matrices of all subproblems at the bottom +*> level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, POLES, +*> GIVNUM, and Z. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, +*> dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT**T contains the right +*> singular vector matrices of all subproblems at the bottom +*> level. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER array, +*> dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. +*> If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th +*> secular equation on the computation tree. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ), +*> where NLVL = floor(log_2 (N/SMLSIZ))). +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and +*> dimension ( N ) if ICOMPQ = 0. +*> If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) +*> record distances between singular values on the I-th +*> level and singular values on the (I -1)-th level, and +*> DIFR(1:N, 2 * I ) contains the normalizing factors for +*> the right singular vector matrix. See DLASD8 for details. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, +*> dimension ( LDU, NLVL ) if ICOMPQ = 1 and +*> dimension ( N ) if ICOMPQ = 0. +*> The first K elements of Z(1, I) contain the components of +*> the deflation-adjusted updating row vector for subproblems +*> on the I-th level. +*> \endverbatim +*> +*> \param[out] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and +*> POLES(1, 2*I) contain the new and old singular values +*> involved in the secular equations on the I-th level. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, +*> dimension ( N ) if ICOMPQ = 1, and not referenced if +*> ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records +*> the number of Givens rotations performed on the I-th +*> problem on the computation tree. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, +*> dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not +*> referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +*> GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations +*> of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, +*> dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records +*> permutations done on the I-th level of the computation tree. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not +*> referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +*> GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- +*> values of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, +*> dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. +*> If ICOMPQ = 1 and the I-th subproblem is not square, on exit, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension ( N ) if +*> ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 +*> and the I-th subproblem is not square, on exit, S( I ) +*> contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, + $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, + $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU, + $ NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI + DOUBLE PRECISION ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLASD6, DLASDQ, DLASDT, DLASET, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDU.LT.( N+SQRE ) ) THEN + INFO = -8 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASDA', -INFO ) + RETURN + END IF +* + M = N + SQRE +* +* If the input matrix is too small, call DLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + ELSE + CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + END IF + RETURN + END IF +* +* Book-keeping and set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N +* + NCC = 0 + NRU = 0 +* + SMLSZP = SMLSIZ + 1 + VF = 1 + VL = VF + M + NWORK1 = VL + M + NWORK2 = NWORK1 + SMLSZP*SMLSZP +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* for the nodes on bottom level of the tree, solve +* their subproblems by DLASDQ. +* + NDB1 = ( ND+1 ) / 2 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + IDXQI = IDXQ + NLF - 2 + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + SQREI = 1 + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL DLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ), + $ E( NLF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NL, WORK( NWORK2 ), NL, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + NL*SMLSZP + CALL DCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL DLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU ) + CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU ) + CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), + $ E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU, + $ U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL DCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 10 J = 1, NL + IWORK( IDXQI+J ) = J + 10 CONTINUE + IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN + SQREI = 0 + ELSE + SQREI = 1 + END IF + IDXQI = IDXQI + NLP1 + VFI = VFI + NLP1 + VLI = VLI + NLP1 + NRP1 = NR + SQREI + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL DLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ), + $ E( NRF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NR, WORK( NWORK2 ), NR, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP + CALL DCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL DLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU ) + CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU ) + CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), + $ E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU, + $ U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL DCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 ) + CALL DCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 20 J = 1, NR + IWORK( IDXQI+J ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + J = 2**NLVL + DO 50 LVL = NLVL, 1, -1 + LVL2 = LVL*2 - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + IDXQI = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + IF( ICOMPQ.EQ.0 ) THEN + CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL, + $ LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z, + $ K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + ELSE + J = J - 1 + CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, + $ POLES( NLF, LVL2 ), DIFL( NLF, LVL ), + $ DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ), + $ C( J ), S( J ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of DLASDA +* + END diff --git a/dspl/liblapack/SRC/dlasdq.f b/dspl/liblapack/SRC/dlasdq.f new file mode 100644 index 0000000..e7d3575 --- /dev/null +++ b/dspl/liblapack/SRC/dlasdq.f @@ -0,0 +1,413 @@ +*> \brief \b DLASDQ computes the SVD of a real bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASDQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, +* U, LDU, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASDQ computes the singular value decomposition (SVD) of a real +*> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal +*> E, accumulating the transformations if desired. Letting B denote +*> the input bidiagonal matrix, the algorithm computes orthogonal +*> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose +*> of P). The singular values S are overwritten on D. +*> +*> The input matrix U is changed to U * Q if desired. +*> The input matrix VT is changed to P**T * VT if desired. +*> The input matrix C is changed to Q**T * C if desired. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3, for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the input bidiagonal matrix +*> is upper or lower bidiagonal, and whether it is square are +*> not. +*> UPLO = 'U' or 'u' B is upper bidiagonal. +*> UPLO = 'L' or 'l' B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: then the input matrix is N-by-N. +*> = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and +*> (N+1)-by-N if UPLU = 'L'. +*> +*> The bidiagonal matrix has +*> N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns +*> in the matrix. N must be at least 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> On entry, NCVT specifies the number of columns of +*> the matrix VT. NCVT must be at least 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> On entry, NRU specifies the number of rows of +*> the matrix U. NRU must be at least 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> On entry, NCC specifies the number of columns of +*> the matrix C. NCC must be at least 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the diagonal entries of the +*> bidiagonal matrix whose SVD is desired. On normal exit, +*> D contains the singular values in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array. +*> dimension is (N-1) if SQRE = 0 and N if SQRE = 1. +*> On entry, the entries of E contain the offdiagonal entries +*> of the bidiagonal matrix whose SVD is desired. On normal +*> exit, E will contain 0. If the algorithm does not converge, +*> D and E will contain the diagonal and superdiagonal entries +*> of a bidiagonal matrix orthogonally equivalent to the one +*> given as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension (LDVT, NCVT) +*> On entry, contains a matrix which on exit has been +*> premultiplied by P**T, dimension N-by-NCVT if SQRE = 0 +*> and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> On entry, LDVT specifies the leading dimension of VT as +*> declared in the calling (sub) program. LDVT must be at +*> least 1. If NCVT is nonzero LDVT must also be at least N. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU, N) +*> On entry, contains a matrix which on exit has been +*> postmultiplied by Q, dimension NRU-by-N if SQRE = 0 +*> and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> On entry, LDU specifies the leading dimension of U as +*> declared in the calling (sub) program. LDU must be at +*> least max( 1, NRU ) . +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, NCC) +*> On entry, contains an N-by-NCC matrix which on exit +*> has been premultiplied by Q**T dimension N-by-NCC if SQRE = 0 +*> and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the leading dimension of C as +*> declared in the calling (sub) program. LDC must be at +*> least 1. If NCC is nonzero, LDC must also be at least N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> Workspace. Only referenced if one of NCVT, NRU, or NCC is +*> nonzero, and if N is at least 2. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, a value of 0 indicates a successful exit. +*> If INFO < 0, argument number -INFO is illegal. +*> If INFO > 0, the algorithm did not converge, and INFO +*> specifies how many superdiagonals did not converge. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, + $ U, LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ROTATE + INTEGER I, ISUB, IUPLO, J, NP1, SQRE1 + DOUBLE PRECISION CS, R, SMIN, SN +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DLARTG, DLASR, DSWAP, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -4 + ELSE IF( NRU.LT.0 ) THEN + INFO = -5 + ELSE IF( NCC.LT.0 ) THEN + INFO = -6 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -12 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASDQ', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) + NP1 = N + 1 + SQRE1 = SQRE +* +* If matrix non-square upper bidiagonal, rotate to be lower +* bidiagonal. The rotations are on the right. +* + IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 10 CONTINUE + CALL DLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + E( N ) = ZERO + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + IUPLO = 2 + SQRE1 = 0 +* +* Update singular vectors if desired. +* + IF( NCVT.GT.0 ) + $ CALL DLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ), + $ WORK( NP1 ), VT, LDVT ) + END IF +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left. +* + IF( IUPLO.EQ.2 ) THEN + DO 20 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 20 CONTINUE +* +* If matrix (N+1)-by-N lower bidiagonal, one additional +* rotation is needed. +* + IF( SQRE1.EQ.1 ) THEN + CALL DLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + END IF +* +* Update singular vectors if desired. +* + IF( NRU.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + ELSE + CALL DLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + END IF + END IF + IF( NCC.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + ELSE + CALL DLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + END IF + END IF + END IF +* +* Call DBDSQR to compute the SVD of the reduced real +* N-by-N upper bidiagonal matrix. +* + CALL DBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, + $ LDC, WORK, INFO ) +* +* Sort the singular values into ascending order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 40 I = 1, N +* +* Scan for smallest D(I). +* + ISUB = I + SMIN = D( I ) + DO 30 J = I + 1, N + IF( D( J ).LT.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 30 CONTINUE + IF( ISUB.NE.I ) THEN +* +* Swap singular values and vectors. +* + D( ISUB ) = D( I ) + D( I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 ) + IF( NCC.GT.0 ) + $ CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC ) + END IF + 40 CONTINUE +* + RETURN +* +* End of DLASDQ +* + END diff --git a/dspl/liblapack/SRC/dlasdt.f b/dspl/liblapack/SRC/dlasdt.f new file mode 100644 index 0000000..37da2d0 --- /dev/null +++ b/dspl/liblapack/SRC/dlasdt.f @@ -0,0 +1,172 @@ +*> \brief \b DLASDT creates a tree of subproblems for bidiagonal divide and conquer. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASDT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* .. Scalar Arguments .. +* INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. +* INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASDT creates a tree of subproblems for bidiagonal divide and +*> conquer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, the number of diagonal elements of the +*> bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] LVL +*> \verbatim +*> LVL is INTEGER +*> On exit, the number of levels on the computation tree. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> On exit, the number of nodes on the tree. +*> \endverbatim +*> +*> \param[out] INODE +*> \verbatim +*> INODE is INTEGER array, dimension ( N ) +*> On exit, centers of subproblems. +*> \endverbatim +*> +*> \param[out] NDIML +*> \verbatim +*> NDIML is INTEGER array, dimension ( N ) +*> On exit, row dimensions of left children. +*> \endverbatim +*> +*> \param[out] NDIMR +*> \verbatim +*> NDIMR is INTEGER array, dimension ( N ) +*> On exit, row dimensions of right children. +*> \endverbatim +*> +*> \param[in] MSUB +*> \verbatim +*> MSUB is INTEGER +*> On entry, the maximum row dimension each subproblem at the +*> bottom of the tree can be of. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. + INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IR, LLST, MAXN, NCRNT, NLVL + DOUBLE PRECISION TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Find the number of levels on the tree. +* + MAXN = MAX( 1, N ) + TEMP = LOG( DBLE( MAXN ) / DBLE( MSUB+1 ) ) / LOG( TWO ) + LVL = INT( TEMP ) + 1 +* + I = N / 2 + INODE( 1 ) = I + 1 + NDIML( 1 ) = I + NDIMR( 1 ) = N - I - 1 + IL = 0 + IR = 1 + LLST = 1 + DO 20 NLVL = 1, LVL - 1 +* +* Constructing the tree at (NLVL+1)-st level. The number of +* nodes created on this level is LLST * 2. +* + DO 10 I = 0, LLST - 1 + IL = IL + 2 + IR = IR + 2 + NCRNT = LLST + I + NDIML( IL ) = NDIML( NCRNT ) / 2 + NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1 + INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1 + NDIML( IR ) = NDIMR( NCRNT ) / 2 + NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1 + INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1 + 10 CONTINUE + LLST = LLST*2 + 20 CONTINUE + ND = LLST*2 - 1 +* + RETURN +* +* End of DLASDT +* + END diff --git a/dspl/liblapack/SRC/dlaset.f b/dspl/liblapack/SRC/dlaset.f new file mode 100644 index 0000000..3a0c469 --- /dev/null +++ b/dspl/liblapack/SRC/dlaset.f @@ -0,0 +1,184 @@ +*> \brief \b DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASET + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASET initializes an m-by-n matrix A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set; the strictly lower +*> triangular part of A is not changed. +*> = 'L': Lower triangular part is set; the strictly upper +*> triangular part of A is not changed. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> The constant to which the offdiagonal elements are to be set. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> The constant to which the diagonal elements are to be set. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On exit, the leading m-by-n submatrix of A is set as follows: +*> +*> if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +*> if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +*> otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +*> +*> and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of DLASET +* + END diff --git a/dspl/liblapack/SRC/dlasq1.f b/dspl/liblapack/SRC/dlasq1.f new file mode 100644 index 0000000..468676e --- /dev/null +++ b/dspl/liblapack/SRC/dlasq1.f @@ -0,0 +1,224 @@ +*> \brief \b DLASQ1 computes the singular values of a real square bidiagonal matrix. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ1 computes the singular values of a real N-by-N bidiagonal +*> matrix with diagonal D and off-diagonal E. The singular values +*> are computed to high relative accuracy, in the absence of +*> denormalization, underflow and overflow. The algorithm was first +*> presented in +*> +*> "Accurate singular values and differential qd algorithms" by K. V. +*> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +*> 1994, +*> +*> and the present implementation is described in "An implementation of +*> the dqds Algorithm (Positive Case)", LAPACK Working Note. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the diagonal elements of the +*> bidiagonal matrix whose SVD is desired. On normal exit, +*> D contains the singular values in decreasing order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, elements E(1:N-1) contain the off-diagonal elements +*> of the bidiagonal matrix whose SVD is desired. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop) On exit D and E +*> represent a matrix with the same singular values +*> which the calling subroutine could use to finish the +*> computation, or even feed back into DLASQ1 +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + DOUBLE PRECISION EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAS2, DLASCL, DLASQ2, DLASRT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL DLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL DLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL DCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL DLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + ELSE IF( INFO.EQ.2 ) THEN +* +* Maximum number of iterations exceeded. Move data from WORK +* into D and E so the calling subroutine can try to finish +* + DO I = 1, N + D( I ) = SQRT( WORK( 2*I-1 ) ) + E( I ) = SQRT( WORK( 2*I ) ) + END DO + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, E, N, IINFO ) + END IF +* + RETURN +* +* End of DLASQ1 +* + END diff --git a/dspl/liblapack/SRC/dlasq2.f b/dspl/liblapack/SRC/dlasq2.f new file mode 100644 index 0000000..68d9228 --- /dev/null +++ b/dspl/liblapack/SRC/dlasq2.f @@ -0,0 +1,582 @@ +*> \brief \b DLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ2( N, Z, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ2 computes all the eigenvalues of the symmetric positive +*> definite tridiagonal matrix associated with the qd array Z to high +*> relative accuracy are computed to high relative accuracy, in the +*> absence of denormalization, underflow and overflow. +*> +*> To see the relation of Z to the tridiagonal matrix, let L be a +*> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +*> let U be an upper bidiagonal matrix with 1's above and diagonal +*> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +*> symmetric tridiagonal to which it is similar. +*> +*> Note : DLASQ2 defines a logical variable, IEEE, which is true +*> on machines which follow ieee-754 floating-point standard in their +*> handling of infinities and NaNs, and false otherwise. This variable +*> is passed to DLASQ3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> On entry Z holds the qd array. On exit, entries 1 to N hold +*> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +*> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +*> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +*> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +*> shifts that failed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if the i-th argument is a scalar and had an illegal +*> value, then INFO = -i, if the i-th argument is an +*> array and the j-entry had an illegal value, then +*> INFO = -(i*100+j) +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop). On exit Z holds +*> a qd array with the same eigenvalues as the given Z. +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Local Variables: I0:N0 defines a current unreduced segment of Z. +*> The shifts are accumulated in SIGMA. Iteration count is in ITER. +*> Ping-pong is controlled by PP (alternates between 0 and 1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASQ2( N, Z, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0, + $ TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I1, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, + $ K, KMIN, N0, N1, NBIG, NDIV, NFAIL, PP, SPLT, + $ TTYPE + DOUBLE PRECISION D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX, + $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL, + $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ +* .. +* .. External Subroutines .. + EXTERNAL DLASQ3, DLASRT, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case DLASQ2 is not called by DLASQ1) +* + INFO = 0 + EPS = DLAMCH( 'Precision' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'DLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'DLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL DLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* + IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. + $ ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* +* Initialise variables to pass to DLASQ3. +* + TTYPE = 0 + DMIN1 = ZERO + DMIN2 = ZERO + DN = ZERO + DN1 = ZERO + DN2 = ZERO + G = ZERO + TAU = ZERO +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 160 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 170 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 + PP = 0 +* + IF( N0-I0.GT.1 ) THEN + DEE = Z( 4*I0-3 ) + DEEMIN = DEE + KMIN = I0 + DO 110 I4 = 4*I0+1, 4*N0-3, 4 + DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) ) + IF( DEE.LE.DEEMIN ) THEN + DEEMIN = DEE + KMIN = ( I4+3 )/4 + END IF + 110 CONTINUE + IF( (KMIN-I0)*2.LT.N0-KMIN .AND. + $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN + IPN4 = 4*( I0+N0 ) + PP = 2 + DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-2 ) + Z( I4-2 ) = Z( IPN4-I4-2 ) + Z( IPN4-I4-2 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + TEMP = Z( I4 ) + Z( I4 ) = Z( IPN4-I4-4 ) + Z( IPN4-I4-4 ) = TEMP + 120 CONTINUE + END IF + END IF +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. +* PP = 0 for ping, PP = 1 for pong. +* PP = 2 indicates that flipping was applied to the Z array and +* and that the tests for deflation upon entry in DLASQ3 +* should not be performed. +* + NBIG = 100*( N0-I0+1 ) + DO 140 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 150 +* +* While submatrix unfinished take a good dqds step. +* + CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 130 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 130 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 140 CONTINUE +* + INFO = 2 +* +* Maximum number of iterations exceeded, restore the shift +* SIGMA and place the new d's and e's in a qd array. +* This might need to be done for several blocks +* + I1 = I0 + N1 = N0 + 145 CONTINUE + TEMPQ = Z( 4*I0-3 ) + Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA + DO K = I0+1, N0 + TEMPE = Z( 4*K-5 ) + Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 )) + TEMPQ = Z( 4*K-3 ) + Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 ) + END DO +* +* Prepare to do this on the previous block if there is one +* + IF( I1.GT.1 ) THEN + N1 = I1-1 + DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) ) + I1 = I1 - 1 + END DO + SIGMA = -Z(4*N1-1) + GO TO 145 + END IF + + DO K = 1, N + Z( 2*K-1 ) = Z( 4*K-3 ) +* +* Only the block 1..N0 is unfinished. The rest of the e's +* must be essentially zero, although sometimes other data +* has been stored in them. +* + IF( K.LT.N0 ) THEN + Z( 2*K ) = Z( 4*K-1 ) + ELSE + Z( 2*K ) = 0 + END IF + END DO + RETURN +* +* end IWHILB +* + 150 CONTINUE +* + 160 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 170 CONTINUE +* +* Move q's to the front. +* + DO 180 K = 2, N + Z( K ) = Z( 4*K-3 ) + 180 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL DLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 190 K = N, 1, -1 + E = E + Z( K ) + 190 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = DBLE( ITER ) + Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER ) + RETURN +* +* End of DLASQ2 +* + END diff --git a/dspl/liblapack/SRC/dlasq3.f b/dspl/liblapack/SRC/dlasq3.f new file mode 100644 index 0000000..c095bdb --- /dev/null +++ b/dspl/liblapack/SRC/dlasq3.f @@ -0,0 +1,421 @@ +*> \brief \b DLASQ3 checks for deflation, computes a shift and calls dqds. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, +* ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, +* DN2, G, TAU ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, ITER, N0, NDIV, NFAIL, PP +* DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, +* $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +*> In case of failure it changes shifts, and tries again until output +*> is positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in,out] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N0 ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in,out] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> PP=2 indicates that flipping was applied to the Z array +*> and that the initial tests for deflation should not be +*> performed. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> Sum of shifts used in current segment. +*> \endverbatim +*> +*> \param[in,out] DESIG +*> \verbatim +*> DESIG is DOUBLE PRECISION +*> Lower order part of SIGMA +*> \endverbatim +*> +*> \param[in] QMAX +*> \verbatim +*> QMAX is DOUBLE PRECISION +*> Maximum value of q. +*> \endverbatim +*> +*> \param[in,out] NFAIL +*> \verbatim +*> NFAIL is INTEGER +*> Increment NFAIL by 1 each time the shift was too big. +*> \endverbatim +*> +*> \param[in,out] ITER +*> \verbatim +*> ITER is INTEGER +*> Increment ITER by 1 for each iteration. +*> \endverbatim +*> +*> \param[in,out] NDIV +*> \verbatim +*> NDIV is INTEGER +*> Increment NDIV by 1 for each division. +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). +*> \endverbatim +*> +*> \param[in,out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN1 +*> \verbatim +*> DN1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] DN2 +*> \verbatim +*> DN2 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in,out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> +*> These are passed as arguments in order to save their values +*> between calls to DLASQ3. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + DOUBLE PRECISION DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, + $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CBIAS + PARAMETER ( CBIAS = 1.50D0 ) + DOUBLE PRECISION ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0D0, QURTR = 0.250D0, HALF = 0.5D0, + $ ONE = 1.0D0, TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + DOUBLE PRECISION EPS, S, T, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL DLASQ4, DLASQ5, DLASQ6 +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + LOGICAL DISNAN + EXTERNAL DISNAN, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = DLAMCH( 'Precision' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2.AND.T.NE.ZERO ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE + IF( PP.EQ.2 ) + $ PP = 0 +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* +* Choose a shift. +* + CALL DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE, G ) +* +* Call dqds until DMIN > 0. +* + 70 CONTINUE +* + CALL DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE, EPS ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GE.ZERO ) THEN +* +* Success. +* + GO TO 90 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 90 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 70 + ELSE IF( DISNAN( DMIN ) ) THEN +* +* NaN. +* + IF( TAU.EQ.ZERO ) THEN + GO TO 80 + ELSE + TAU = ZERO + GO TO 70 + END IF + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 80 + END IF +* +* Risk of underflow. +* + 80 CONTINUE + CALL DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 90 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of DLASQ3 +* + END diff --git a/dspl/liblapack/SRC/dlasq4.f b/dspl/liblapack/SRC/dlasq4.f new file mode 100644 index 0000000..d4ddbbc --- /dev/null +++ b/dspl/liblapack/SRC/dlasq4.f @@ -0,0 +1,424 @@ +*> \brief \b DLASQ4 computes an approximation to the smallest eigenvalue using values of d from the previous transform. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, +* DN1, DN2, TAU, TTYPE, G ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, N0IN, PP, TTYPE +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ4 computes an approximation TAU to the smallest eigenvalue +*> using values of d from the previous transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N0 ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] N0IN +*> \verbatim +*> N0IN is INTEGER +*> The value of N0 at start of EIGTEST. +*> \endverbatim +*> +*> \param[in] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[in] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[in] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[in] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N) +*> \endverbatim +*> +*> \param[in] DN1 +*> \verbatim +*> DN1 is DOUBLE PRECISION +*> d(N-1) +*> \endverbatim +*> +*> \param[in] DN2 +*> \verbatim +*> DN2 is DOUBLE PRECISION +*> d(N-2) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> This is the shift. +*> \endverbatim +*> +*> \param[out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is DOUBLE PRECISION +*> G is passed as an argument in order to save its value between +*> calls to DLASQ4. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CNST1 = 9/16 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE, G ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630D0, CNST2 = 1.010D0, + $ CNST3 = 1.050D0 ) + DOUBLE PRECISION QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250D0, THIRD = 0.3330D0, + $ HALF = 0.50D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, HUNDRD = 100.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + DOUBLE PRECISION A2, B1, B2, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of DLASQ4 +* + END diff --git a/dspl/liblapack/SRC/dlasq5.f b/dspl/liblapack/SRC/dlasq5.f new file mode 100644 index 0000000..3812c87 --- /dev/null +++ b/dspl/liblapack/SRC/dlasq5.f @@ -0,0 +1,410 @@ +*> \brief \b DLASQ5 computes one dqds transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2, IEEE, EPS ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, N0, PP +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, SIGMA, EPS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ5 computes one dqds transform in ping-pong form, one +*> version for IEEE machines another for non IEEE machines. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION +*> This is the shift. +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is DOUBLE PRECISION +*> This is the accumulated shift up to this step. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is DOUBLE PRECISION +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is DOUBLE PRECISION +*> d(N0-2). +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> This is the value of epsilon used. +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, + $ DN, DNM1, DNM2, IEEE, EPS ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, + $ SIGMA, EPS +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO, HALF + PARAMETER ( ZERO = 0.0D0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, TEMP, DTHRESH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + DTHRESH = EPS*(SIGMA+TAU) + IF( TAU.LT.DTHRESH*HALF ) TAU = ZERO + IF( TAU.NE.ZERO ) THEN + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF + ELSE +* This is the version that sets d's to zero if they are small enough + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 50 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 50 CONTINUE + ELSE + DO 60 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 60 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 70 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + IF( D.LT.DTHRESH) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 70 CONTINUE + ELSE + DO 80 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + IF( D.LT.DTHRESH) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 80 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF + END IF +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ5 +* + END diff --git a/dspl/liblapack/SRC/dlasq6.f b/dspl/liblapack/SRC/dlasq6.f new file mode 100644 index 0000000..d871386 --- /dev/null +++ b/dspl/liblapack/SRC/dlasq6.f @@ -0,0 +1,254 @@ +*> \brief \b DLASQ6 computes one dqd transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASQ6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2 ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, PP +* DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASQ6 computes one dqd (shift equal to zero) transform in +*> ping-pong form, with protection against underflow and overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is DOUBLE PRECISION +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is DOUBLE PRECISION +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is DOUBLE PRECISION +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is DOUBLE PRECISION +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is DOUBLE PRECISION +*> d(N0-2). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + DOUBLE PRECISION DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + DOUBLE PRECISION D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = DLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of DLASQ6 +* + END diff --git a/dspl/liblapack/SRC/dlasr.f b/dspl/liblapack/SRC/dlasr.f new file mode 100644 index 0000000..6059c62 --- /dev/null +++ b/dspl/liblapack/SRC/dlasr.f @@ -0,0 +1,436 @@ +*> \brief \b DLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASR applies a sequence of plane rotations to a real matrix A, +*> from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of DLASR +* + END diff --git a/dspl/liblapack/SRC/dlasrt.f b/dspl/liblapack/SRC/dlasrt.f new file mode 100644 index 0000000..4705311 --- /dev/null +++ b/dspl/liblapack/SRC/dlasrt.f @@ -0,0 +1,303 @@ +*> \brief \b DLASRT sorts numbers in increasing or decreasing order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASRT( ID, N, D, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ID +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Sort the numbers in D in increasing order (if ID = 'I') or +*> in decreasing order (if ID = 'D' ). +*> +*> Use Quick Sort, reverting to Insertion sort on arrays of +*> size <= 20. Dimension of STACK limits N to about 2**32. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ID +*> \verbatim +*> ID is CHARACTER*1 +*> = 'I': sort D in increasing order; +*> = 'D': sort D in decreasing order. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the array D. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the array to be sorted. +*> On exit, D has been sorted into increasing order +*> (D(1) <= ... <= D(N) ) or into decreasing order +*> (D(1) >= ... >= D(N) ), depending on ID. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DLASRT( ID, N, D, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + DOUBLE PRECISION D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of DLASRT +* + END diff --git a/dspl/liblapack/SRC/dlassq.f b/dspl/liblapack/SRC/dlassq.f new file mode 100644 index 0000000..885395e --- /dev/null +++ b/dspl/liblapack/SRC/dlassq.f @@ -0,0 +1,155 @@ +*> \brief \b DLASSQ updates a sum of squares represented in scaled form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASSQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. +* DOUBLE PRECISION X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASSQ returns the values scl and smsq such that +*> +*> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +*> +*> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +*> assumed to be non-negative and scl returns the value +*> +*> scl = max( scale, abs( x( i ) ) ). +*> +*> scale and sumsq must be supplied in SCALE and SUMSQ and +*> scl and smsq are overwritten on SCALE and SUMSQ respectively. +*> +*> The routine makes only one pass through the vector x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements to be used from the vector X. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> The vector for which a scaled sum of squares is computed. +*> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> INCX > 0. +*> \endverbatim +*> +*> \param[in,out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On entry, the value scale in the equation above. +*> On exit, SCALE is overwritten with scl , the scaling factor +*> for the sum of squares. +*> \endverbatim +*> +*> \param[in,out] SUMSQ +*> \verbatim +*> SUMSQ is DOUBLE PRECISION +*> On entry, the value sumsq in the equation above. +*> On exit, SUMSQ is overwritten with smsq , the basic sum of +*> squares from which scl has been factored out. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + DOUBLE PRECISION X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION ABSXI +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + ABSXI = ABS( X( IX ) ) + IF( ABSXI.GT.ZERO.OR.DISNAN( ABSXI ) ) THEN + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of DLASSQ +* + END diff --git a/dspl/liblapack/SRC/dlasv2.f b/dspl/liblapack/SRC/dlasv2.f new file mode 100644 index 0000000..9371d6d --- /dev/null +++ b/dspl/liblapack/SRC/dlasv2.f @@ -0,0 +1,325 @@ +*> \brief \b DLASV2 computes the singular value decomposition of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASV2 computes the singular value decomposition of a 2-by-2 +*> triangular matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +*> smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +*> right singular vectors for abs(SSMAX), giving the decomposition +*> +*> [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +*> [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is DOUBLE PRECISION +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is DOUBLE PRECISION +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is DOUBLE PRECISION +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> abs(SSMIN) is the smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is DOUBLE PRECISION +*> abs(SSMAX) is the larger singular value. +*> \endverbatim +*> +*> \param[out] SNL +*> \verbatim +*> SNL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] CSL +*> \verbatim +*> CSL is DOUBLE PRECISION +*> The vector (CSL, SNL) is a unit left singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +*> +*> \param[out] SNR +*> \verbatim +*> SNR is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] CSR +*> \verbatim +*> CSR is DOUBLE PRECISION +*> The vector (CSR, SNR) is a unit right singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Any input parameter may be aliased with any output parameter. +*> +*> Barring over/underflow and assuming a guard digit in subtraction, all +*> output quantities are correct to within a few units in the last +*> place (ulps). +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D0 ) + DOUBLE PRECISION FOUR + PARAMETER ( FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + DOUBLE PRECISION A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.DLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of DLASV2 +* + END diff --git a/dspl/liblapack/SRC/dlaswlq.f b/dspl/liblapack/SRC/dlaswlq.f new file mode 100644 index 0000000..6e4ca20 --- /dev/null +++ b/dspl/liblapack/SRC/dlaswlq.f @@ -0,0 +1,258 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASWLQ computes a blocked Short-Wide LQ factorization of a +*> M-by-N matrix A, where N >= M: +*> A = L * Q +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal +*> of the array contain the N-by-N lower triangular matrix L; +*> the elements above the diagonal represent Q by the rows +*> of blocked V (see Further Details). +*> +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((N-M)/(NB-M)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= MB*M. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, + $ INFO) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ), T( LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL DGELQT, DTPLQT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.LT.M ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.M .AND. M.GT.0 )) THEN + INFO = -3 + ELSE IF( NB.LE.M ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -8 + ELSE IF( ( LWORK.LT.M*MB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = MB*M + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLASWLQ', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF((M.GE.N).OR.(NB.LE.M).OR.(NB.GE.N)) THEN + CALL DGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF +* + KK = MOD((N-M),(NB-M)) + II=N-KK+1 +* +* Compute the LQ factorization of the first block A(1:M,1:NB) +* + CALL DGELQT( M, NB, MB, A(1,1), LDA, T, LDT, WORK, INFO) + CTR = 1 +* + DO I = NB+1, II-NB+M , (NB-M) +* +* Compute the QR factorization of the current block A(1:M,I:I+NB-M) +* + CALL DTPLQT( M, NB-M, 0, MB, A(1,1), LDA, A( 1, I ), + $ LDA, T(1, CTR * M + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(1:M,II:N) +* + IF (II.LE.N) THEN + CALL DTPLQT( M, KK, 0, MB, A(1,1), LDA, A( 1, II ), + $ LDA, T(1, CTR * M + 1), LDT, + $ WORK, INFO ) + END IF +* + WORK( 1 ) = M * MB + RETURN +* +* End of DLASWLQ +* + END diff --git a/dspl/liblapack/SRC/dlaswp.f b/dspl/liblapack/SRC/dlaswp.f new file mode 100644 index 0000000..202fd8d --- /dev/null +++ b/dspl/liblapack/SRC/dlaswp.f @@ -0,0 +1,193 @@ +*> \brief \b DLASWP performs a series of row interchanges on a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASWP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASWP performs a series of row interchanges on the matrix A. +*> One row interchange is initiated for each of rows K1 through K2 of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the matrix of column dimension N to which the row +*> interchanges will be applied. +*> On exit, the permuted matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> \endverbatim +*> +*> \param[in] K1 +*> \verbatim +*> K1 is INTEGER +*> The first element of IPIV for which a row interchange will +*> be done. +*> \endverbatim +*> +*> \param[in] K2 +*> \verbatim +*> K2 is INTEGER +*> (K2-K1+1) is the number of elements of IPIV for which a row +*> interchange will be done. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) +*> The vector of pivot indices. Only the elements in positions +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of IPIV. If INCX +*> is negative, the pivots are applied in reverse order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by +*> R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = K1 + ( K1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of DLASWP +* + END diff --git a/dspl/liblapack/SRC/dlasy2.f b/dspl/liblapack/SRC/dlasy2.f new file mode 100644 index 0000000..2afad2b --- /dev/null +++ b/dspl/liblapack/SRC/dlasy2.f @@ -0,0 +1,482 @@ +*> \brief \b DLASY2 solves the Sylvester matrix equation where the matrices are of order 1 or 2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, +* LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LTRANL, LTRANR +* INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 +* DOUBLE PRECISION SCALE, XNORM +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in +*> +*> op(TL)*X + ISGN*X*op(TR) = SCALE*B, +*> +*> where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or +*> -1. op(T) = T or T**T, where T**T denotes the transpose of T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRANL +*> \verbatim +*> LTRANL is LOGICAL +*> On entry, LTRANL specifies the op(TL): +*> = .FALSE., op(TL) = TL, +*> = .TRUE., op(TL) = TL**T. +*> \endverbatim +*> +*> \param[in] LTRANR +*> \verbatim +*> LTRANR is LOGICAL +*> On entry, LTRANR specifies the op(TR): +*> = .FALSE., op(TR) = TR, +*> = .TRUE., op(TR) = TR**T. +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> On entry, ISGN specifies the sign of the equation +*> as described before. ISGN may only be 1 or -1. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> On entry, N1 specifies the order of matrix TL. +*> N1 may only be 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> On entry, N2 specifies the order of matrix TR. +*> N2 may only be 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] TL +*> \verbatim +*> TL is DOUBLE PRECISION array, dimension (LDTL,2) +*> On entry, TL contains an N1 by N1 matrix. +*> \endverbatim +*> +*> \param[in] LDTL +*> \verbatim +*> LDTL is INTEGER +*> The leading dimension of the matrix TL. LDTL >= max(1,N1). +*> \endverbatim +*> +*> \param[in] TR +*> \verbatim +*> TR is DOUBLE PRECISION array, dimension (LDTR,2) +*> On entry, TR contains an N2 by N2 matrix. +*> \endverbatim +*> +*> \param[in] LDTR +*> \verbatim +*> LDTR is INTEGER +*> The leading dimension of the matrix TR. LDTR >= max(1,N2). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,2) +*> On entry, the N1 by N2 matrix B contains the right-hand +*> side of the equation. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1,N1). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> less than or equal to 1 to prevent the solution overflowing. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,2) +*> On exit, X contains the N1 by N2 solution. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the matrix X. LDX >= max(1,N1). +*> \endverbatim +*> +*> \param[out] XNORM +*> \verbatim +*> XNORM is DOUBLE PRECISION +*> On exit, XNORM is the infinity-norm of the solution. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO is set to +*> 0: successful exit. +*> 1: TL and TR have too close eigenvalues, so TL or +*> TR is perturbed to get a nonsingular equation. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYauxiliary +* +* ===================================================================== + SUBROUTINE DLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, + $ LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL LTRANL, LTRANR + INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 + DOUBLE PRECISION SCALE, XNORM +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO, HALF, EIGHT + PARAMETER ( TWO = 2.0D+0, HALF = 0.5D+0, EIGHT = 8.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BSWAP, XSWAP + INTEGER I, IP, IPIV, IPSV, J, JP, JPSV, K + DOUBLE PRECISION BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1, + $ TEMP, U11, U12, U22, XMAX +* .. +* .. Local Arrays .. + LOGICAL BSWPIV( 4 ), XSWPIV( 4 ) + INTEGER JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ), + $ LOCU22( 4 ) + DOUBLE PRECISION BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Data statements .. + DATA LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / , + $ LOCU22 / 4, 3, 2, 1 / + DATA XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. / +* .. +* .. Executable Statements .. +* +* Do not check the input parameters for errors +* + INFO = 0 +* +* Quick return if possible +* + IF( N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + SGN = ISGN +* + K = N1 + N1 + N2 - 2 + GO TO ( 10, 20, 30, 50 )K +* +* 1 by 1: TL11*X + SGN*X*TR11 = B11 +* + 10 CONTINUE + TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 ) + BET = ABS( TAU1 ) + IF( BET.LE.SMLNUM ) THEN + TAU1 = SMLNUM + BET = SMLNUM + INFO = 1 + END IF +* + SCALE = ONE + GAM = ABS( B( 1, 1 ) ) + IF( SMLNUM*GAM.GT.BET ) + $ SCALE = ONE / GAM +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1 + XNORM = ABS( X( 1, 1 ) ) + RETURN +* +* 1 by 2: +* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] +* [TR21 TR22] +* + 20 CONTINUE +* + SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ), + $ ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + IF( LTRANR ) THEN + TMP( 2 ) = SGN*TR( 2, 1 ) + TMP( 3 ) = SGN*TR( 1, 2 ) + ELSE + TMP( 2 ) = SGN*TR( 1, 2 ) + TMP( 3 ) = SGN*TR( 2, 1 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 1, 2 ) + GO TO 40 +* +* 2 by 1: +* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] +* [TL21 TL22] [X21] [X21] [B21] +* + 30 CONTINUE + SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ), + $ ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + IF( LTRANL ) THEN + TMP( 2 ) = TL( 1, 2 ) + TMP( 3 ) = TL( 2, 1 ) + ELSE + TMP( 2 ) = TL( 2, 1 ) + TMP( 3 ) = TL( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + 40 CONTINUE +* +* Solve 2 by 2 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + IPIV = IDAMAX( 4, TMP, 1 ) + U11 = TMP( IPIV ) + IF( ABS( U11 ).LE.SMIN ) THEN + INFO = 1 + U11 = SMIN + END IF + U12 = TMP( LOCU12( IPIV ) ) + L21 = TMP( LOCL21( IPIV ) ) / U11 + U22 = TMP( LOCU22( IPIV ) ) - U12*L21 + XSWAP = XSWPIV( IPIV ) + BSWAP = BSWPIV( IPIV ) + IF( ABS( U22 ).LE.SMIN ) THEN + INFO = 1 + U22 = SMIN + END IF + IF( BSWAP ) THEN + TEMP = BTMP( 2 ) + BTMP( 2 ) = BTMP( 1 ) - L21*TEMP + BTMP( 1 ) = TEMP + ELSE + BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 ) + END IF + SCALE = ONE + IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR. + $ ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN + SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + END IF + X2( 2 ) = BTMP( 2 ) / U22 + X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 ) + IF( XSWAP ) THEN + TEMP = X2( 2 ) + X2( 2 ) = X2( 1 ) + X2( 1 ) = TEMP + END IF + X( 1, 1 ) = X2( 1 ) + IF( N1.EQ.1 ) THEN + X( 1, 2 ) = X2( 2 ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + ELSE + X( 2, 1 ) = X2( 2 ) + XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) ) + END IF + RETURN +* +* 2 by 2: +* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] +* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] +* +* Solve equivalent 4 by 4 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + 50 CONTINUE + SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ), + $ ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ) + SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ), + $ ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ) + SMIN = MAX( EPS*SMIN, SMLNUM ) + BTMP( 1 ) = ZERO + CALL DCOPY( 16, BTMP, 0, T16, 1 ) + T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 ) + IF( LTRANL ) THEN + T16( 1, 2 ) = TL( 2, 1 ) + T16( 2, 1 ) = TL( 1, 2 ) + T16( 3, 4 ) = TL( 2, 1 ) + T16( 4, 3 ) = TL( 1, 2 ) + ELSE + T16( 1, 2 ) = TL( 1, 2 ) + T16( 2, 1 ) = TL( 2, 1 ) + T16( 3, 4 ) = TL( 1, 2 ) + T16( 4, 3 ) = TL( 2, 1 ) + END IF + IF( LTRANR ) THEN + T16( 1, 3 ) = SGN*TR( 1, 2 ) + T16( 2, 4 ) = SGN*TR( 1, 2 ) + T16( 3, 1 ) = SGN*TR( 2, 1 ) + T16( 4, 2 ) = SGN*TR( 2, 1 ) + ELSE + T16( 1, 3 ) = SGN*TR( 2, 1 ) + T16( 2, 4 ) = SGN*TR( 2, 1 ) + T16( 3, 1 ) = SGN*TR( 1, 2 ) + T16( 4, 2 ) = SGN*TR( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + BTMP( 3 ) = B( 1, 2 ) + BTMP( 4 ) = B( 2, 2 ) +* +* Perform elimination +* + DO 100 I = 1, 3 + XMAX = ZERO + DO 70 IP = I, 4 + DO 60 JP = I, 4 + IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( T16( IP, JP ) ) + IPSV = IP + JPSV = JP + END IF + 60 CONTINUE + 70 CONTINUE + IF( IPSV.NE.I ) THEN + CALL DSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 ) + TEMP = BTMP( I ) + BTMP( I ) = BTMP( IPSV ) + BTMP( IPSV ) = TEMP + END IF + IF( JPSV.NE.I ) + $ CALL DSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 ) + JPIV( I ) = JPSV + IF( ABS( T16( I, I ) ).LT.SMIN ) THEN + INFO = 1 + T16( I, I ) = SMIN + END IF + DO 90 J = I + 1, 4 + T16( J, I ) = T16( J, I ) / T16( I, I ) + BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I ) + DO 80 K = I + 1, 4 + T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K ) + 80 CONTINUE + 90 CONTINUE + 100 CONTINUE + IF( ABS( T16( 4, 4 ) ).LT.SMIN ) THEN + INFO = 1 + T16( 4, 4 ) = SMIN + END IF + SCALE = ONE + IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN + SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ), + $ ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + BTMP( 3 ) = BTMP( 3 )*SCALE + BTMP( 4 ) = BTMP( 4 )*SCALE + END IF + DO 120 I = 1, 4 + K = 5 - I + TEMP = ONE / T16( K, K ) + TMP( K ) = BTMP( K )*TEMP + DO 110 J = K + 1, 4 + TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J ) + 110 CONTINUE + 120 CONTINUE + DO 130 I = 1, 3 + IF( JPIV( 4-I ).NE.4-I ) THEN + TEMP = TMP( 4-I ) + TMP( 4-I ) = TMP( JPIV( 4-I ) ) + TMP( JPIV( 4-I ) ) = TEMP + END IF + 130 CONTINUE + X( 1, 1 ) = TMP( 1 ) + X( 2, 1 ) = TMP( 2 ) + X( 1, 2 ) = TMP( 3 ) + X( 2, 2 ) = TMP( 4 ) + XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ), + $ ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) ) + RETURN +* +* End of DLASY2 +* + END diff --git a/dspl/liblapack/SRC/dlasyf.f b/dspl/liblapack/SRC/dlasyf.f new file mode 100644 index 0000000..de705e4 --- /dev/null +++ b/dspl/liblapack/SRC/dlasyf.f @@ -0,0 +1,822 @@ +*> \brief \b DLASYF computes a partial factorization of a real symmetric matrix using the Bunch-Kaufman diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASYF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASYF computes a partial factorization of a real symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The partial +*> factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1, + $ ROWMAX, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DGEMV, DSCAL, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL DCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL DCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL DCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( IMAX, KW+1 ), LDW, ONE, + $ W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IDAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL DCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + IF( KP.GT.1 ) + $ CALL DCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL DSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL DSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored. +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / A( K, K ) + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored. +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL DGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE, + $ A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL DSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL DCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL DCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL DCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IDAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL DCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + IF( KP.LT.N ) + $ CALL DCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL DSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL DSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL DSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of DLASYF +* + END diff --git a/dspl/liblapack/SRC/dlasyf_aa.f b/dspl/liblapack/SRC/dlasyf_aa.f new file mode 100644 index 0000000..6b75e46 --- /dev/null +++ b/dspl/liblapack/SRC/dlasyf_aa.f @@ -0,0 +1,493 @@ +*> \brief \b DLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a real symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by DSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (M) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is DOUBLE PRECISION workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + DOUBLE PRECISION PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + EXTERNAL LSAME, ILAENV, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DAXPY, DCOPY, DSWAP, DSCAL, DLASET, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from DSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL DGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:M, i) into WORK +* + CALL DCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) +* + ALPHA = -A( K-1, J ) + CALL DAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL DAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = IDAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL DSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) +* + CALL DSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL DSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL DSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J:M, J), +* + CALL DCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL DCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL DSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL DLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from DSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL DGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:M, J) into WORK +* + CALL DCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL DAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL DAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = IDAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL DSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) +* + CALL DSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL DSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL DSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J+1:M, J), +* + CALL DCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL DCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL DSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL DLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of DLASYF_AA +* + END diff --git a/dspl/liblapack/SRC/dlasyf_rk.f b/dspl/liblapack/SRC/dlasyf_rk.f new file mode 100644 index 0000000..209b4c8 --- /dev/null +++ b/dspl/liblapack/SRC/dlasyf_rk.f @@ -0,0 +1,965 @@ +*> \brief \b DLASYF_RK computes a partial factorization of a real symmetric indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASYF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DLASYF_RK computes a partial factorization of a real symmetric +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> DLASYF_RK is an auxiliary routine called by DSYTRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, K, KK, KW, KKW, + $ KP, KSTEP, P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ DTEMP, R1, ROWMAX, T, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DGEMV, DSCAL, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = ZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL DCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = ZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL DCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL DCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ ONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IDAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IDAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = ABS( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(ABS( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL DCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL DCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL DSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL DSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL DCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL DSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL DSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = ONE / ( D11*D22-ONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = ZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL DGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -ONE, A( 1, K+1 ), LDA, W( J, KW+1 ), + $ LDW, ONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = ZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL DCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = ZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL DCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL DCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IDAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IDAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = ABS( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL DCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL DCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL DSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL DSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL DCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL DSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL DSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = ZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF +* + RETURN +* +* End of DLASYF_RK +* + END diff --git a/dspl/liblapack/SRC/dlasyf_rook.f b/dspl/liblapack/SRC/dlasyf_rook.f new file mode 100644 index 0000000..49ee7a6 --- /dev/null +++ b/dspl/liblapack/SRC/dlasyf_rook.f @@ -0,0 +1,892 @@ +*> \brief \b DLASYF_ROOK *> DLASYF_ROOK computes a partial factorization of a real symmetric matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLASYF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARADLATER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLASYF_ROOK computes a partial factorization of a real symmetric +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> DLASYF_ROOK is an auxiliary routine called by DSYTRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, JP1, JP2, K, KK, + $ KW, KKW, KP, KSTEP, P, II + + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ DTEMP, R1, ROWMAX, T, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMM, DGEMV, DSCAL, DSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL DCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL DCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL DCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL DGEMV( 'No transpose', K, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ ONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IDAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IDAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = ABS( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(ABS( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL DCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL DCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL DSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL DSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL DCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL DSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL DSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = ONE / ( D11*D22-ONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL DGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -ONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ ONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL DSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = J - 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL DSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL DCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL DCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL DCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IDAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IDAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = ABS( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL DCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL DCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL DSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL DSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL DCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL DCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL DSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL DSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL DSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL DGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL DSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = J + 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL DSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of DLASYF_ROOK +* + END diff --git a/dspl/liblapack/SRC/dlat2s.f b/dspl/liblapack/SRC/dlat2s.f new file mode 100644 index 0000000..fa6cc5d --- /dev/null +++ b/dspl/liblapack/SRC/dlat2s.f @@ -0,0 +1,173 @@ +*> \brief \b DLAT2S converts a double-precision triangular matrix to a single-precision triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAT2S + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAT2S( UPLO, N, A, LDA, SA, LDSA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDSA, N +* .. +* .. Array Arguments .. +* REAL SA( LDSA, * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAT2S converts a DOUBLE PRECISION triangular matrix, SA, to a SINGLE +*> PRECISION triangular matrix, A. +*> +*> RMAX is the overflow for the SINGLE PRECISION arithmetic +*> DLAS2S checks that all the entries of A are between -RMAX and +*> RMAX. If not the conversion is aborted and a flag is raised. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N triangular coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SA +*> \verbatim +*> SA is REAL array, dimension (LDSA,N) +*> Only the UPLO part of SA is referenced. On exit, if INFO=0, +*> the N-by-N coefficient matrix SA; if INFO>0, the content of +*> the UPLO part of SA is unspecified. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> = 1: an entry of the matrix A is greater than the SINGLE +*> PRECISION overflow threshold, in this case, the content +*> of the UPLO part of SA in exit is unspecified. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAT2S( UPLO, N, A, LDA, SA, LDSA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDSA, N +* .. +* .. Array Arguments .. + REAL SA( LDSA, * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION RMAX + LOGICAL UPPER +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL SLAMCH, LSAME +* .. +* .. Executable Statements .. +* + RMAX = SLAMCH( 'O' ) + UPPER = LSAME( UPLO, 'U' ) + IF( UPPER ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + IF( ( A( I, J ).LT.-RMAX ) .OR. ( A( I, J ).GT.RMAX ) ) + $ THEN + INFO = 1 + GO TO 50 + END IF + SA( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + IF( ( A( I, J ).LT.-RMAX ) .OR. ( A( I, J ).GT.RMAX ) ) + $ THEN + INFO = 1 + GO TO 50 + END IF + SA( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + 50 CONTINUE +* + RETURN +* +* End of DLAT2S +* + END diff --git a/dspl/liblapack/SRC/dlatbs.f b/dspl/liblapack/SRC/dlatbs.f new file mode 100644 index 0000000..1489d53 --- /dev/null +++ b/dspl/liblapack/SRC/dlatbs.f @@ -0,0 +1,812 @@ +*> \brief \b DLATBS solves a triangular banded system of equations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATBS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, +* SCALE, CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATBS solves one of the triangular systems +*> +*> A *x = s*b or A**T*x = s*b +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular band matrix. Here A**T denotes the transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine DTBSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of subdiagonals or superdiagonals in the +*> triangular matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, DTBSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = DASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = DASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTBSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL DAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL DAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + 110 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + SUMJ = DDOT( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ SUMJ = DDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 120 I = 1, JLEN + SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 120 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 130 I = 1, JLEN + SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATBS +* + END diff --git a/dspl/liblapack/SRC/dlatdf.f b/dspl/liblapack/SRC/dlatdf.f new file mode 100644 index 0000000..fd05059 --- /dev/null +++ b/dspl/liblapack/SRC/dlatdf.f @@ -0,0 +1,323 @@ +*> \brief \b DLATDF uses the LU factorization of the n-by-n matrix computed by sgetc2 and computes a contribution to the reciprocal Dif-estimate. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATDF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, +* JPIV ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, LDZ, N +* DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* DOUBLE PRECISION RHS( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATDF uses the LU factorization of the n-by-n matrix Z computed by +*> DGETC2 and computes a contribution to the reciprocal Dif-estimate +*> by solving Z * x = b for x, and choosing the r.h.s. b such that +*> the norm of x is as large as possible. On entry RHS = b holds the +*> contribution from earlier solved sub-systems, and on return RHS = x. +*> +*> The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q, +*> where P and Q are permutation matrices. L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> IJOB = 2: First compute an approximative null-vector e +*> of Z using DGECON, e is normalized and solve for +*> Zx = +-e - f with the sign giving the greater value +*> of 2-norm(x). About 5 times as expensive as Default. +*> IJOB .ne. 2: Local look ahead strategy where all entries of +*> the r.h.s. b is chosen as either +1 or -1 (Default). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Z. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix Z computed by DGETC2: Z = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is DOUBLE PRECISION array, dimension (N) +*> On entry, RHS contains contributions from other subsystems. +*> On exit, RHS contains the solution of the subsystem with +*> entries acoording to the value of IJOB (see above). +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is DOUBLE PRECISION +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by DTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is DOUBLE PRECISION +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when DTGSY2 is called by +*> DTGSYL. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> This routine is a further developed implementation of algorithm +*> BSOLVE in [1] using complete pivoting in the LU factorization. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> +*> [1] Bo Kagstrom and Lars Westin, +*> Generalized Schur Methods with Condition Estimators for +*> Solving the Generalized Sylvester Equation, IEEE Transactions +*> on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +*> +*> [2] Peter Poromaa, +*> On Efficient and Robust Estimators for the Separation +*> between two Regular Matrix Pairs with Applications in +*> Condition Estimation. Report IMINF-95.05, Departement of +*> Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + DOUBLE PRECISION RHS( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 8 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + DOUBLE PRECISION BM, BP, PMONE, SMINU, SPLUS, TEMP +* .. +* .. Local Arrays .. + INTEGER IWORK( MAXDIM ) + DOUBLE PRECISION WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGECON, DGESC2, DLASSQ, DLASWP, + $ DSCAL +* .. +* .. External Functions .. + DOUBLE PRECISION DASUM, DDOT + EXTERNAL DASUM, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL DLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -ONE +* + DO 10 J = 1, N - 1 + BP = RHS( J ) + ONE + BM = RHS( J ) - ONE + SPLUS = ONE +* +* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and +* SMIN computed more efficiently than in BSOLVE [1]. +* + SPLUS = SPLUS + DDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 ) + SMINU = DDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + SPLUS = SPLUS*RHS( J ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens +* we choose -1, thereafter +1. This is a simple way to +* get good estimates of matrices like Byers well-known +* example (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = ONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL DAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) +* + 10 CONTINUE +* +* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done +* in BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL DCOPY( N-1, RHS, 1, XP, 1 ) + XP( N ) = RHS( N ) + ONE + RHS( N ) = RHS( N ) - ONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = ONE / Z( I, I ) + XP( I ) = XP( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( XP( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL DCOPY( N, XP, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL DLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + ELSE +* +* IJOB = 2, Compute approximate nullvector XM of Z +* + CALL DGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO ) + CALL DCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL DLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = ONE / SQRT( DDOT( N, XM, 1, XM, 1 ) ) + CALL DSCAL( N, TEMP, XM, 1 ) + CALL DCOPY( N, XM, 1, XP, 1 ) + CALL DAXPY( N, ONE, RHS, 1, XP, 1 ) + CALL DAXPY( N, -ONE, XM, 1, RHS, 1 ) + CALL DGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP ) + CALL DGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP ) + IF( DASUM( N, XP, 1 ).GT.DASUM( N, RHS, 1 ) ) + $ CALL DCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + END IF +* + RETURN +* +* End of DLATDF +* + END diff --git a/dspl/liblapack/SRC/dlatps.f b/dspl/liblapack/SRC/dlatps.f new file mode 100644 index 0000000..c340578 --- /dev/null +++ b/dspl/liblapack/SRC/dlatps.f @@ -0,0 +1,795 @@ +*> \brief \b DLATPS solves a triangular system of equations with the matrix held in packed storage. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATPS solves one of the triangular systems +*> +*> A *x = s*b or A**T*x = s*b +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular matrix stored in packed form. Here A**T denotes the +*> transpose of A, x and b are n-element vectors, and s is a scaling +*> factor, usually less than or equal to 1, chosen so that the +*> components of x will be less than the overflow threshold. If the +*> unscaled problem will not cause overflow, the Level 2 BLAS routine +*> DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +*> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, DTPSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = DASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = DASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTPSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL DAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL DAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 110 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = DDOT( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = DDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = 1, N - J + SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATPS +* + END diff --git a/dspl/liblapack/SRC/dlatrd.f b/dspl/liblapack/SRC/dlatrd.f new file mode 100644 index 0000000..a1df43e --- /dev/null +++ b/dspl/liblapack/SRC/dlatrd.f @@ -0,0 +1,336 @@ +*> \brief \b DLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an orthogonal similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRD reduces NB rows and columns of a real symmetric matrix A to +*> symmetric tridiagonal form by an orthogonal similarity +*> transformation Q**T * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', DLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', DLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by DSYTRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= (1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a symmetric rank-2k update of the form: +*> A := A - V*W**T - W*V**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DGEMV, DLARFG, DSCAL, DSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL DGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL DGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL DLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL DSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL DGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL DGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL DGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL DSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*DDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL DAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL DSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL DGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL DSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*DDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of DLATRD +* + END diff --git a/dspl/liblapack/SRC/dlatrs.f b/dspl/liblapack/SRC/dlatrs.f new file mode 100644 index 0000000..5ad5f66 --- /dev/null +++ b/dspl/liblapack/SRC/dlatrs.f @@ -0,0 +1,787 @@ +*> \brief \b DLATRS solves a triangular system of equations with the scale factor set to prevent overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRS solves one of the triangular systems +*> +*> A *x = s*b or A**T *x = s*b +*> +*> with scaling to prevent overflow. Here A is an upper or lower +*> triangular matrix, A**T denotes the transpose of A, x and b are +*> n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine DTRSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max (1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, DTRSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DASUM, DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DASUM, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = DASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = DASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine DTRSV can be used. +* + J = IDAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL DSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 110 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 100 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 100 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL DSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL DAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = IDAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL DAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + IDAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 110 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + DO 160 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call DDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = DDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = DDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 120 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 130 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 150 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL DSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 140 I = 1, N + X( I ) = ZERO + 140 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 150 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 160 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of DLATRS +* + END diff --git a/dspl/liblapack/SRC/dlatrz.f b/dspl/liblapack/SRC/dlatrz.f new file mode 100644 index 0000000..8fbe875 --- /dev/null +++ b/dspl/liblapack/SRC/dlatrz.f @@ -0,0 +1,200 @@ +*> \brief \b DLATRZ factors an upper trapezoidal matrix by means of orthogonal transformations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLATRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* .. Scalar Arguments .. +* INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix +*> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means +*> of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal +*> matrix and, R and A1 are M-by-M upper triangular matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing the +*> meaningful part of the Householder vectors. N-M >= L >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements N-L+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**T, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an l element vector. tau and z( k ) +*> are chosen to annihilate the elements of the kth row of A2. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A2, such that the elements of z( k ) are +*> in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A1. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DLARZ +* .. +* .. Executable Statements .. +* +* Test the input arguments +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL DLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL DLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ TAU( I ), A( 1, I ), LDA, WORK ) +* + 20 CONTINUE +* + RETURN +* +* End of DLATRZ +* + END diff --git a/dspl/liblapack/SRC/dlatsqr.f b/dspl/liblapack/SRC/dlatsqr.f new file mode 100644 index 0000000..1ce7c4d --- /dev/null +++ b/dspl/liblapack/SRC/dlatsqr.f @@ -0,0 +1,256 @@ +* +* Definition: +* =========== +* +* SUBROUTINE DLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATSQR computes a blocked Tall-Skinny QR factorization of +*> an M-by-N matrix A, where M >= N: +*> A = Q * R . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> MB > N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal +*> of the array contain the N-by-N upper triangular matrix R; +*> the elements below the diagonal represent Q by the columns +*> of blocked V (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((M-N)/(MB-N)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= NB*N. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( * ), T(LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL DGEQRT, DTPQRT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. ( NB.GT.N .AND. N.GT.0 )) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(N*NB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = NB*N + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLATSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF ((MB.LE.N).OR.(MB.GE.M)) THEN + CALL DGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF +* + KK = MOD((M-N),(MB-N)) + II=M-KK+1 +* +* Compute the QR factorization of the first block A(1:MB,1:N) +* + CALL DGEQRT( MB, N, NB, A(1,1), LDA, T, LDT, WORK, INFO ) +* + CTR = 1 + DO I = MB+1, II-MB+N , (MB-N) +* +* Compute the QR factorization of the current block A(I:I+MB-N,1:N) +* + CALL DTPQRT( MB-N, N, 0, NB, A(1,1), LDA, A( I, 1 ), LDA, + $ T(1, CTR * N + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(II:M,1:N) +* + IF (II.LE.M) THEN + CALL DTPQRT( KK, N, 0, NB, A(1,1), LDA, A( II, 1 ), LDA, + $ T(1, CTR * N + 1), LDT, + $ WORK, INFO ) + END IF +* + WORK( 1 ) = N*NB + RETURN +* +* End of DLATSQR +* + END diff --git a/dspl/liblapack/SRC/dlauu2.f b/dspl/liblapack/SRC/dlauu2.f new file mode 100644 index 0000000..59cff25 --- /dev/null +++ b/dspl/liblapack/SRC/dlauu2.f @@ -0,0 +1,198 @@ +*> \brief \b DLAUU2 computes the product UUH or LHL, where U and L are upper or lower triangular matrices (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAUU2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAUU2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAUU2 computes the product U * U**T or L**T * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the unblocked form of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**T; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**T * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U**T. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = DDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA ) + CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 ) + ELSE + CALL DSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L**T * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = DDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 ) + CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, AII, A( I, 1 ), LDA ) + ELSE + CALL DSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DLAUU2 +* + END diff --git a/dspl/liblapack/SRC/dlauum.f b/dspl/liblapack/SRC/dlauum.f new file mode 100644 index 0000000..31b1ddd --- /dev/null +++ b/dspl/liblapack/SRC/dlauum.f @@ -0,0 +1,218 @@ +*> \brief \b DLAUUM computes the product UUH or LHL, where U and L are upper or lower triangular matrices (blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAUUM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAUUM computes the product U * U**T or L**T * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the blocked form of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**T; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**T * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLAUU2, DSYRK, DTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U**T. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', + $ I-1, IB, ONE, A( I, I ), LDA, A( 1, I ), + $ LDA ) + CALL DLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL DGEMM( 'No transpose', 'Transpose', I-1, IB, + $ N-I-IB+1, ONE, A( 1, I+IB ), LDA, + $ A( I, I+IB ), LDA, ONE, A( 1, I ), LDA ) + CALL DSYRK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L**T * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL DTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB, + $ I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA ) + CALL DLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL DGEMM( 'Transpose', 'No transpose', IB, I-1, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA ) + CALL DSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE, + $ A( I+IB, I ), LDA, ONE, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of DLAUUM +* + END diff --git a/dspl/liblapack/SRC/dopgtr.f b/dspl/liblapack/SRC/dopgtr.f new file mode 100644 index 0000000..0e061b2 --- /dev/null +++ b/dspl/liblapack/SRC/dopgtr.f @@ -0,0 +1,232 @@ +*> \brief \b DOPGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DOPGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DOPGTR generates a real orthogonal matrix Q which is defined as the +*> product of n-1 elementary reflectors H(i) of order n, as returned by +*> DSPTRD using packed storage: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to DSPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to DSPTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The vectors which define the elementary reflectors, as +*> returned by DSPTRD. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSPTRD. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> The N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N-1) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DORG2L, DORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DOPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = ZERO + 30 CONTINUE + Q( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL DORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to DSPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = ONE + DO 40 I = 2, N + Q( I, 1 ) = ZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = ZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL DORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of DOPGTR +* + END diff --git a/dspl/liblapack/SRC/dopmtr.f b/dspl/liblapack/SRC/dopmtr.f new file mode 100644 index 0000000..dd9286b --- /dev/null +++ b/dspl/liblapack/SRC/dopmtr.f @@ -0,0 +1,339 @@ +*> \brief \b DOPMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DOPMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DOPMTR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by DSPTRD using packed +*> storage: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to DSPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to DSPTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension +*> (M*(M+1)/2) if SIDE = 'L' +*> (N*(N+1)/2) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by DSPTRD. AP is modified by the routine but +*> restored on exit. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (M-1) if SIDE = 'L' +*> or (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSPTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DOPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) +* + AII = AP( II ) + AP( II ) = ONE + CALL DLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to DSPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) +* + CALL DLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of DOPMTR +* + END diff --git a/dspl/liblapack/SRC/dorbdb.f b/dspl/liblapack/SRC/dorbdb.f new file mode 100644 index 0000000..d616579 --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb.f @@ -0,0 +1,687 @@ +*> \brief \b DORBDB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, +* X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, +* TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIGNS, TRANS +* INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, +* $ Q +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI( * ), THETA( * ) +* DOUBLE PRECISION TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), +* $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), +* $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORBDB simultaneously bidiagonalizes the blocks of an M-by-M +*> partitioned orthogonal matrix X: +*> +*> [ B11 | B12 0 0 ] +*> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**T +*> X = [-----------] = [---------] [----------------] [---------] . +*> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] +*> [ 0 | 0 0 I ] +*> +*> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is +*> not the case, then X must be transposed and/or permuted. This can be +*> done in constant time using the TRANS and SIGNS options. See DORCSD +*> for details.) +*> +*> The orthogonal matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- +*> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are +*> represented implicitly by Householder vectors. +*> +*> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, the top-left block of the orthogonal matrix to be +*> reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X11) specify reflectors for P1, +*> the rows of triu(X11,1) specify reflectors for Q1; +*> else TRANS = 'T', and +*> the rows of triu(X11) specify reflectors for P1, +*> the columns of tril(X11,-1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. If TRANS = 'N', then LDX11 >= +*> P; else LDX11 >= Q. +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is DOUBLE PRECISION array, dimension (LDX12,M-Q) +*> On entry, the top-right block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X12) specify the first P reflectors for +*> Q2; +*> else TRANS = 'T', and +*> the columns of tril(X12) specify the first P reflectors +*> for Q2. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. If TRANS = 'N', then LDX12 >= +*> P; else LDX11 >= M-Q. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, the bottom-left block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X21) specify reflectors for P2; +*> else TRANS = 'T', and +*> the rows of triu(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. If TRANS = 'N', then LDX21 >= +*> M-P; else LDX21 >= Q. +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is DOUBLE PRECISION array, dimension (LDX22,M-Q) +*> On entry, the bottom-right block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X22(Q+1:M-P,P+1:M-Q)) specify the last +*> M-P-Q reflectors for Q2, +*> else TRANS = 'T', and +*> the columns of tril(X22(P+1:M-Q,Q+1:M-P)) specify the last +*> M-P-Q reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X22. If TRANS = 'N', then LDX22 >= +*> M-P; else LDX22 >= M-Q. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is DOUBLE PRECISION array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is DOUBLE PRECISION array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is DOUBLE PRECISION array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] TAUQ2 +*> \verbatim +*> TAUQ2 is DOUBLE PRECISION array, dimension (M-Q) +*> The scalar factors of the elementary reflectors that define +*> Q2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The bidiagonal blocks B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1), ..., THETA(Q) and PHI(1), ..., +*> PHI(Q-1). B11 and B21 are upper bidiagonal, while B21 and B22 are +*> lower bidiagonal. Every entry in each bidiagonal band is a product +*> of a sine or cosine of a THETA with a sine or cosine of a PHI. See +*> [1] or DORCSD for details. +*> +*> P1, P2, Q1, and Q2 are represented as products of elementary +*> reflectors. See DORCSD for details on generating P1, P2, Q1, and Q2 +*> using DORGQR and DORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, + $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIGNS, TRANS + INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, + $ Q +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI( * ), THETA( * ) + DOUBLE PRECISION TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), + $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), + $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION REALONE + PARAMETER ( REALONE = 1.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY + INTEGER I, LWORKMIN, LWORKOPT + DOUBLE PRECISION Z1, Z2, Z3, Z4 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARF, DLARFGP, DSCAL, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + LOGICAL LSAME + EXTERNAL DNRM2, LSAME +* .. +* .. Intrinsic Functions + INTRINSIC ATAN2, COS, MAX, SIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + IF( .NOT. LSAME( SIGNS, 'O' ) ) THEN + Z1 = REALONE + Z2 = REALONE + Z3 = REALONE + Z4 = REALONE + ELSE + Z1 = REALONE + Z2 = -REALONE + Z3 = REALONE + Z4 = -REALONE + END IF + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -3 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -4 + ELSE IF( Q .LT. 0 .OR. Q .GT. P .OR. Q .GT. M-P .OR. + $ Q .GT. M-Q ) THEN + INFO = -5 + ELSE IF( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( .NOT.COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -7 + ELSE IF( COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -9 + ELSE IF( .NOT.COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -9 + ELSE IF( COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -11 + ELSE IF( .NOT.COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF( COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -13 + ELSE IF( .NOT.COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + LWORKOPT = M - Q + LWORKMIN = M - Q + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -21 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'xORBDB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Handle column-major and row-major separately +* + IF( COLMAJOR ) THEN +* +* Reduce columns 1, ..., Q of X11, X12, X21, and X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL DSCAL( P-I+1, Z1, X11(I,I), 1 ) + ELSE + CALL DSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), 1 ) + CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I,I-1), + $ 1, X11(I,I), 1 ) + END IF + IF( I .EQ. 1 ) THEN + CALL DSCAL( M-P-I+1, Z2, X21(I,I), 1 ) + ELSE + CALL DSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), 1 ) + CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I,I-1), + $ 1, X21(I,I), 1 ) + END IF +* + THETA(I) = ATAN2( DNRM2( M-P-I+1, X21(I,I), 1 ), + $ DNRM2( P-I+1, X11(I,I), 1 ) ) +* + IF( P .GT. I ) THEN + CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + ELSE IF( P .EQ. I ) THEN + CALL DLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) + END IF + X11(I,I) = ONE + IF ( M-P .GT. I ) THEN + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, + $ TAUP2(I) ) + ELSE IF ( M-P .EQ. I ) THEN + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), + $ X11(I,I+1), LDX11, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL DLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, TAUP1(I), + $ X12(I,I), LDX12, WORK ) + END IF + IF ( Q .GT. I ) THEN + CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL DLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, TAUP2(I), + $ X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL DSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I,I+1), + $ LDX11 ) + CALL DAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), LDX21, + $ X11(I,I+1), LDX11 ) + END IF + CALL DSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), LDX12 ) + CALL DAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), LDX22, + $ X12(I,I), LDX12 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( DNRM2( Q-I, X11(I,I+1), LDX11 ), + $ DNRM2( M-Q-I+1, X12(I,I), LDX12 ) ) +* + IF( I .LT. Q ) THEN + IF ( Q-I .EQ. 1 ) THEN + CALL DLARFGP( Q-I, X11(I,I+1), X11(I,I+1), LDX11, + $ TAUQ1(I) ) + ELSE + CALL DLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, + $ TAUQ1(I) ) + END IF + X11(I,I+1) = ONE + END IF + IF ( Q+I-1 .LT. M ) THEN + IF ( M-Q .EQ. I ) THEN + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL DLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL DLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + IF ( P .GT. I ) THEN + CALL DLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF ( M-P .GT. I ) THEN + CALL DLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + END IF +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL DSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), LDX12 ) + IF ( I .GE. M-Q ) THEN + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL DLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL DLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL DSCAL( M-P-Q-I+1, Z2*Z4, X22(Q+I,P+I), LDX22 ) + IF ( I .EQ. M-P-Q ) THEN + CALL DLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I), + $ LDX22, TAUQ2(P+I) ) + ELSE + CALL DLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), + $ LDX22, TAUQ2(P+I) ) + END IF + X22(Q+I,P+I) = ONE + IF ( I .LT. M-P-Q ) THEN + CALL DLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, + $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) + END IF +* + END DO +* + ELSE +* +* Reduce columns 1, ..., Q of X11, X12, X21, X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL DSCAL( P-I+1, Z1, X11(I,I), LDX11 ) + ELSE + CALL DSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), LDX11 ) + CALL DAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1,I), + $ LDX12, X11(I,I), LDX11 ) + END IF + IF( I .EQ. 1 ) THEN + CALL DSCAL( M-P-I+1, Z2, X21(I,I), LDX21 ) + ELSE + CALL DSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), LDX21 ) + CALL DAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1,I), + $ LDX22, X21(I,I), LDX21 ) + END IF +* + THETA(I) = ATAN2( DNRM2( M-P-I+1, X21(I,I), LDX21 ), + $ DNRM2( P-I+1, X11(I,I), LDX11 ) ) +* + CALL DLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) + X11(I,I) = ONE + IF ( I .EQ. M-P ) THEN + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, + $ TAUP2(I) ) + ELSE + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL DLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL DLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), X12(I,I), LDX12, WORK ) + END IF + IF ( Q .GT. I ) THEN + CALL DLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), + $ X21(I+1,I), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL DLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL DSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I+1,I), 1 ) + CALL DAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I+1,I), 1, + $ X11(I+1,I), 1 ) + END IF + CALL DSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), 1 ) + CALL DAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), 1, + $ X12(I,I), 1 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( DNRM2( Q-I, X11(I+1,I), 1 ), + $ DNRM2( M-Q-I+1, X12(I,I), 1 ) ) +* + IF( I .LT. Q ) THEN + IF ( Q-I .EQ. 1) THEN + CALL DLARFGP( Q-I, X11(I+1,I), X11(I+1,I), 1, + $ TAUQ1(I) ) + ELSE + CALL DLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, + $ TAUQ1(I) ) + END IF + X11(I+1,I) = ONE + END IF + IF ( M-Q .GT. I ) THEN + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) + ELSE + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I,I), 1, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL DLARF( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL DLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + CALL DLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) + IF ( M-P-I .GT. 0 ) THEN + CALL DLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, TAUQ2(I), + $ X22(I,I+1), LDX22, WORK ) + END IF +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL DSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), 1 ) + CALL DLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL DLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL DLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, TAUQ2(I), + $ X22(I,Q+1), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL DSCAL( M-P-Q-I+1, Z2*Z4, X22(P+I,Q+I), 1 ) + IF ( M-P-Q .EQ. I ) THEN + CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), 1, + $ TAUQ2(P+I) ) + ELSE + CALL DLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + $ TAUQ2(P+I) ) + CALL DLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, + $ TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, WORK ) + END IF + X22(P+I,Q+I) = ONE +* + END DO +* + END IF +* + RETURN +* +* End of DORBDB +* + END + diff --git a/dspl/liblapack/SRC/dorbdb1.f b/dspl/liblapack/SRC/dorbdb1.f new file mode 100644 index 0000000..675028a --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb1.f @@ -0,0 +1,323 @@ +*> \brief \b DORBDB1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, +*> M-P, or M-Q. Routines DORBDB2, DORBDB3, and DORBDB4 handle cases in +*> which Q is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is DOUBLE PRECISION array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is DOUBLE PRECISION array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is DOUBLE PRECISION array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or DORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See DORCSD2BY1 for details on generating P1, P2, and Q1 using DORGQR +*> and DORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + EXTERNAL DNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. Q .OR. M-P .LT. Q ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. M-Q .LT. Q ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-2 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., Q of X11 and X21 +* + DO I = 1, Q +* + CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + THETA(I) = ATAN2( X21(I,I), X11(I,I) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I) = ONE + X21(I,I) = ONE + CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) + CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + IF( I .LT. Q ) THEN + CALL DROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, S ) + CALL DLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + S = X21(I,I+1) + X21(I,I+1) = ONE + CALL DLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL DLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + C = SQRT( DNRM2( P-I, X11(I+1,I+1), 1 )**2 + $ + DNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + CALL DORBDB5( P-I, M-P-I, Q-I-1, X11(I+1,I+1), 1, + $ X21(I+1,I+1), 1, X11(I+1,I+2), LDX11, + $ X21(I+1,I+2), LDX21, WORK(IORBDB5), LORBDB5, + $ CHILDINFO ) + END IF +* + END DO +* + RETURN +* +* End of DORBDB1 +* + END + diff --git a/dspl/liblapack/SRC/dorbdb2.f b/dspl/liblapack/SRC/dorbdb2.f new file mode 100644 index 0000000..ea43d4f --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb2.f @@ -0,0 +1,333 @@ +*> \brief \b DORBDB2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, +*> Q, or M-Q. Routines DORBDB1, DORBDB3, and DORBDB4 handle cases in +*> which P is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= min(M-P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is DOUBLE PRECISION array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is DOUBLE PRECISION array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is DOUBLE PRECISION array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or DORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See DORCSD2BY1 for details on generating P1, P2, and Q1 using DORGQR +*> and DORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION NEGONE, ONE + PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, DSCAL, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + EXTERNAL DNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. 0 .OR. P .GT. M-P ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. Q .LT. P .OR. M-Q .LT. P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., P of X11 and X21 +* + DO I = 1, P +* + IF( I .GT. 1 ) THEN + CALL DROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, S ) + END IF + CALL DLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + C = X11(I,I) + X11(I,I) = ONE + CALL DLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL DLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + S = SQRT( DNRM2( P-I, X11(I+1,I), 1 )**2 + $ + DNRM2( M-P-I+1, X21(I,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL DORBDB5( P-I, M-P-I+1, Q-I, X11(I+1,I), 1, X21(I,I), 1, + $ X11(I+1,I+1), LDX11, X21(I,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL DSCAL( P-I, NEGONE, X11(I+1,I), 1 ) + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + IF( I .LT. P ) THEN + CALL DLARFGP( P-I, X11(I+1,I), X11(I+2,I), 1, TAUP1(I) ) + PHI(I) = ATAN2( X11(I+1,I), X21(I,I) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X11(I+1,I) = ONE + CALL DLARF( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + END IF + X21(I,I) = ONE + CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X21 to the identity matrix +* + DO I = P + 1, Q + CALL DLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + X21(I,I) = ONE + CALL DLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of DORBDB2 +* + END + diff --git a/dspl/liblapack/SRC/dorbdb3.f b/dspl/liblapack/SRC/dorbdb3.f new file mode 100644 index 0000000..f5667b4 --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb3.f @@ -0,0 +1,332 @@ +*> \brief \b DORBDB3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, +*> Q, or M-Q. Routines DORBDB1, DORBDB2, and DORBDB4 handle cases in +*> which M-P is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. M-P <= min(P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is DOUBLE PRECISION array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is DOUBLE PRECISION array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is DOUBLE PRECISION array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or DORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See DORCSD2BY1 for details on generating P1, P2, and Q1 using DORGQR +*> and DORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + DOUBLE PRECISION TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + EXTERNAL DNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( 2*P .LT. M .OR. P .GT. M ) THEN + INFO = -2 + ELSE IF( Q .LT. M-P .OR. M-Q .LT. M-P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., M-P of X11 and X21 +* + DO I = 1, M-P +* + IF( I .GT. 1 ) THEN + CALL DROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, S ) + END IF +* + CALL DLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + S = X21(I,I) + X21(I,I) = ONE + CALL DLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL DLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + C = SQRT( DNRM2( P-I+1, X11(I,I), 1 )**2 + $ + DNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL DORBDB5( P-I+1, M-P-I, Q-I, X11(I,I), 1, X21(I+1,I), 1, + $ X11(I,I+1), LDX11, X21(I+1,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + IF( I .LT. M-P ) THEN + CALL DLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + PHI(I) = ATAN2( X21(I+1,I), X11(I,I) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X21(I+1,I) = ONE + CALL DLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + END IF + X11(I,I) = ONE + CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X11 to the identity matrix +* + DO I = M-P + 1, Q + CALL DLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + X11(I,I) = ONE + CALL DLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) + END DO +* + RETURN +* +* End of DORBDB3 +* + END + diff --git a/dspl/liblapack/SRC/dorbdb4.f b/dspl/liblapack/SRC/dorbdb4.f new file mode 100644 index 0000000..ff141b9 --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb4.f @@ -0,0 +1,377 @@ +*> \brief \b DORBDB4 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* DOUBLE PRECISION PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), +* $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, +*> M-P, or Q. Routines DORBDB1, DORBDB2, and DORBDB3 handle cases in +*> which M-Q is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M and +*> M-Q <= min(P,M-P,Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is DOUBLE PRECISION array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is DOUBLE PRECISION array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is DOUBLE PRECISION array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] PHANTOM +*> \verbatim +*> PHANTOM is DOUBLE PRECISION array, dimension (M) +*> The routine computes an M-by-1 column vector Y that is +*> orthogonal to the columns of [ X11; X21 ]. PHANTOM(1:P) and +*> PHANTOM(P+1:M) contain Householder vectors for Y(1:P) and +*> Y(P+1:M), respectively. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or DORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See DORCSD2BY1 for details on generating P1, P2, and Q1 using DORGQR +*> and DORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + DOUBLE PRECISION PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), + $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, J, LLARF, + $ LORBDB5, LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DLARFGP, DORBDB5, DROT, DSCAL, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + EXTERNAL DNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. M-Q .OR. M-P .LT. M-Q ) THEN + INFO = -2 + ELSE IF( Q .LT. M-Q .OR. Q .GT. M ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( Q-1, P-1, M-P-1 ) + IORBDB5 = 2 + LORBDB5 = Q + LWORKOPT = ILARF + LLARF - 1 + LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB4', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., M-Q of X11 and X21 +* + DO I = 1, M-Q +* + IF( I .EQ. 1 ) THEN + DO J = 1, M + PHANTOM(J) = ZERO + END DO + CALL DORBDB5( P, M-P, Q, PHANTOM(1), 1, PHANTOM(P+1), 1, + $ X11, LDX11, X21, LDX21, WORK(IORBDB5), + $ LORBDB5, CHILDINFO ) + CALL DSCAL( P, NEGONE, PHANTOM(1), 1 ) + CALL DLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) + CALL DLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + THETA(I) = ATAN2( PHANTOM(1), PHANTOM(P+1) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + PHANTOM(1) = ONE + PHANTOM(P+1) = ONE + CALL DLARF( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, LDX11, + $ WORK(ILARF) ) + CALL DLARF( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), X21, + $ LDX21, WORK(ILARF) ) + ELSE + CALL DORBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, + $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), + $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL DSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) + CALL DLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL DLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, + $ TAUP2(I) ) + THETA(I) = ATAN2( X11(I,I-1), X21(I,I-1) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I-1) = ONE + X21(I,I-1) = ONE + CALL DLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL DLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, TAUP2(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + END IF +* + CALL DROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) + CALL DLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + C = X21(I,I) + X21(I,I) = ONE + CALL DLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL DLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + IF( I .LT. M-Q ) THEN + S = SQRT( DNRM2( P-I, X11(I+1,I), 1 )**2 + $ + DNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + END IF +* + END DO +* +* Reduce the bottom-right portion of X11 to [ I 0 ] +* + DO I = M - Q + 1, P + CALL DLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + X11(I,I) = ONE + CALL DLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL DLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + END DO +* +* Reduce the bottom-right portion of X21 to [ 0 I ] +* + DO I = P + 1, Q + CALL DLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + $ TAUQ1(I) ) + X21(M-Q+I-P,I) = ONE + CALL DLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), + $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of DORBDB4 +* + END + diff --git a/dspl/liblapack/SRC/dorbdb5.f b/dspl/liblapack/SRC/dorbdb5.f new file mode 100644 index 0000000..313c6d5 --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb5.f @@ -0,0 +1,274 @@ +*> \brief \b DORBDB5 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB5 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then some other vector from the orthogonal complement +*> is returned. This vector is chosen in an arbitrary but deterministic +*> way. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is DOUBLE PRECISION array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is DOUBLE PRECISION array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is DOUBLE PRECISION array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, J +* .. +* .. External Subroutines .. + EXTERNAL DORBDB6, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DNRM2 + EXTERNAL DNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB5', -INFO ) + RETURN + END IF +* +* Project X onto the orthogonal complement of Q +* + CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, + $ WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF +* +* Project each standard basis vector e_1,...,e_M1 in turn, stopping +* when a nonzero projection is found +* + DO I = 1, M1 + DO J = 1, M1 + X1(J) = ZERO + END DO + X1(I) = ONE + DO J = 1, M2 + X2(J) = ZERO + END DO + CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( DNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* +* Project each standard basis vector e_(M1+1),...,e_(M1+M2) in turn, +* stopping when a nonzero projection is found +* + DO I = 1, M2 + DO J = 1, M1 + X1(J) = ZERO + END DO + DO J = 1, M2 + X2(J) = ZERO + END DO + X2(I) = ONE + CALL DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( DNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* + RETURN +* +* End of DORBDB5 +* + END + diff --git a/dspl/liblapack/SRC/dorbdb6.f b/dspl/liblapack/SRC/dorbdb6.f new file mode 100644 index 0000000..c92c9fe --- /dev/null +++ b/dspl/liblapack/SRC/dorbdb6.f @@ -0,0 +1,312 @@ +*> \brief \b DORBDB6 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORBDB6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORBDB6 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then the zero vector is returned. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is DOUBLE PRECISION array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is DOUBLE PRECISION array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is DOUBLE PRECISION array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is DOUBLE PRECISION array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ALPHASQ, REALONE, REALZERO + PARAMETER ( ALPHASQ = 0.01D0, REALONE = 1.0D0, + $ REALZERO = 0.0D0 ) + DOUBLE PRECISION NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0D0, ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION NORMSQ1, NORMSQ2, SCL1, SCL2, SSQ1, SSQ2 +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLASSQ, XERBLA +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORBDB6', -INFO ) + RETURN + END IF +* +* First, project X onto the orthogonal complement of Q's column +* space +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL DLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL DLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ1 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL DGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL DGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL DGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL DGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL DLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL DLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If projection is sufficiently large in norm, then stop. +* If projection is zero, then stop. +* Otherwise, project again. +* + IF( NORMSQ2 .GE. ALPHASQ*NORMSQ1 ) THEN + RETURN + END IF +* + IF( NORMSQ2 .EQ. ZERO ) THEN + RETURN + END IF +* + NORMSQ1 = NORMSQ2 +* + DO I = 1, N + WORK(I) = ZERO + END DO +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL DGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL DGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL DGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL DGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL DLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL DLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If second projection is sufficiently large in norm, then do +* nothing more. Alternatively, if it shrunk significantly, then +* truncate it to zero. +* + IF( NORMSQ2 .LT. ALPHASQ*NORMSQ1 ) THEN + DO I = 1, M1 + X1(I) = ZERO + END DO + DO I = 1, M2 + X2(I) = ZERO + END DO + END IF +* + RETURN +* +* End of DORBDB6 +* + END + diff --git a/dspl/liblapack/SRC/dorcsd.f b/dspl/liblapack/SRC/dorcsd.f new file mode 100644 index 0000000..f0284ce --- /dev/null +++ b/dspl/liblapack/SRC/dorcsd.f @@ -0,0 +1,616 @@ +*> \brief \b DORCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DORCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, +* SIGNS, M, P, Q, X11, LDX11, X12, +* LDX12, X21, LDX21, X22, LDX22, THETA, +* U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, +* LDV2T, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, +* $ LDX21, LDX22, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION THETA( * ) +* DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), +* $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, +* $ * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORCSD computes the CS decomposition of an M-by-M partitioned +*> orthogonal matrix X: +*> +*> [ I 0 0 | 0 0 0 ] +*> [ 0 C 0 | 0 -S 0 ] +*> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**T +*> X = [-----------] = [---------] [---------------------] [---------] . +*> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] +*> [ 0 S 0 | 0 C 0 ] +*> [ 0 0 I | 0 0 0 ] +*> +*> X11 is P-by-Q. The orthogonal matrices U1, U2, V1, and V2 are P-by-P, +*> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are +*> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in +*> which R = MIN(P,M-P,Q,M-Q). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is computed; +*> otherwise: V2T is not computed. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is DOUBLE PRECISION array, dimension (LDX12,M-Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. LDX12 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X11. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is DOUBLE PRECISION array, dimension (LDX22,M-Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X11. LDX22 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is DOUBLE PRECISION array, dimension (LDU1,P) +*> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] V2T +*> \verbatim +*> V2T is DOUBLE PRECISION array, dimension (LDV2T,M-Q) +*> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) orthogonal +*> matrix V2**T. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >= +*> MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> If INFO > 0 on exit, WORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P, M-P, Q, M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: DBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE DORCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, + $ SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, X21, LDX21, X22, LDX22, THETA, + $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, + $ LDX21, LDX22, LWORK, M, P, Q +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION THETA( * ) + DOUBLE PRECISION U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), + $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, + $ * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, + $ ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST, SIGNST + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN, + $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN, + $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN, + $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN, + $ LORGQRWORKOPT, LWORKMIN, LWORKOPT + LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2, + $ WANTV1T, WANTV2T +* .. +* .. External Subroutines .. + EXTERNAL DBBCSD, DLACPY, DLAPMR, DLAPMT, + $ DORBDB, DORGLQ, DORGQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' ) + LQUERY = LWORK .EQ. -1 + IF( M .LT. 0 ) THEN + INFO = -7 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -8 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -9 + ELSE IF ( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -11 + ELSE IF (.NOT. COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF (COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF (.NOT. COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + ELSE IF (COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -15 + ELSE IF (.NOT. COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -15 + ELSE IF (COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -17 + ELSE IF (.NOT. COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -17 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -20 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -22 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -24 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -26 + END IF +* +* Work with transpose if convenient +* + IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN + IF( COLMAJOR ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL DORCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M, + $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22, + $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1, + $ U2, LDU2, WORK, LWORK, IWORK, INFO ) + RETURN + END IF +* +* Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if +* convenient +* + IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL DORCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M, + $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11, + $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T, + $ LDV1T, WORK, LWORK, IWORK, INFO ) + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN +* + IPHI = 2 + ITAUP1 = IPHI + MAX( 1, Q - 1 ) + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M - P ) + ITAUQ2 = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ2 + MAX( 1, M - Q ) + CALL DORGQR( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGQRWORKOPT = INT( WORK(1) ) + LORGQRWORKMIN = MAX( 1, M - Q ) + IORGLQ = ITAUQ2 + MAX( 1, M - Q ) + CALL DORGLQ( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGLQWORKOPT = INT( WORK(1) ) + LORGLQWORKMIN = MAX( 1, M - Q ) + IORBDB = ITAUQ2 + MAX( 1, M - Q ) + CALL DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, V1T, U1, U2, V1T, + $ V2T, WORK, -1, CHILDINFO ) + LORBDBWORKOPT = INT( WORK(1) ) + LORBDBWORKMIN = LORBDBWORKOPT + IB11D = ITAUQ2 + MAX( 1, M - Q ) + IB11E = IB11D + MAX( 1, Q ) + IB12D = IB11E + MAX( 1, Q - 1 ) + IB12E = IB12D + MAX( 1, Q ) + IB21D = IB12E + MAX( 1, Q - 1 ) + IB21E = IB21D + MAX( 1, Q ) + IB22D = IB21E + MAX( 1, Q - 1 ) + IB22E = IB22D + MAX( 1, Q ) + IBBCSD = IB22E + MAX( 1, Q - 1 ) + CALL DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, THETA, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, U1, U1, U1, U1, U1, U1, U1, U1, WORK, -1, + $ CHILDINFO ) + LBBCSDWORKOPT = INT( WORK(1) ) + LBBCSDWORKMIN = LBBCSDWORKOPT + LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT, + $ IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKOPT ) - 1 + LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN, + $ IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKMIN ) - 1 + WORK(1) = MAX(LWORKOPT,LWORKMIN) +* + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -22 + ELSE + LORGQRWORK = LWORK - IORGQR + 1 + LORGLQWORK = LWORK - IORGLQ + 1 + LORBDBWORK = LWORK - IORBDB + 1 + LBBCSDWORK = LWORK - IBBCSD + 1 + END IF + END IF +* +* Abort if any illegal arguments +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Transform to bidiagonal block form +* + CALL DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21, + $ LDX21, X22, LDX22, THETA, WORK(IPHI), WORK(ITAUP1), + $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2), + $ WORK(IORBDB), LORBDBWORK, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( COLMAJOR ) THEN + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL DORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQRWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL DORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL DORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL DLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T ) + IF (M-P .GT. Q) Then + CALL DLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + END IF + IF (M .GT. Q) THEN + CALL DORGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + END IF + ELSE + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 ) + CALL DORGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ), + $ LORGLQWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 ) + CALL DORGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL DORGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL DLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T ) + CALL DLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + CALL DORGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + END IF +* +* Compute the CSD of the matrix in bidiagonal-block form +* + CALL DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA, + $ WORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), WORK(IB22D), + $ WORK(IB22E), WORK(IBBCSD), LBBCSDWORK, INFO ) +* +* Permute rows and columns to place identity submatrices in top- +* left corner of (1,1)-block and/or bottom-right corner of (1,2)- +* block and/or bottom-right corner of (2,1)-block and/or top-left +* corner of (2,2)-block +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + IF( COLMAJOR ) THEN + CALL DLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + ELSE + CALL DLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + END IF + IF( M .GT. 0 .AND. WANTV2T ) THEN + DO I = 1, P + IWORK(I) = M - P - Q + I + END DO + DO I = P + 1, M - Q + IWORK(I) = I - P + END DO + IF( .NOT. COLMAJOR ) THEN + CALL DLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + ELSE + CALL DLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + END IF + END IF +* + RETURN +* +* End DORCSD +* + END + diff --git a/dspl/liblapack/SRC/dorcsd2by1.f b/dspl/liblapack/SRC/dorcsd2by1.f new file mode 100644 index 0000000..4f98800 --- /dev/null +++ b/dspl/liblapack/SRC/dorcsd2by1.f @@ -0,0 +1,740 @@ +*> \brief \b DORCSD2BY1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORCSD2BY1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, +* X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, +* LDV1T, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T +* INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, +* $ M, P, Q +* .. +* .. Array Arguments .. +* DOUBLE PRECISION THETA(*) +* DOUBLE PRECISION U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* INTEGER IWORK(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> DORCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with +*> orthonormal columns that has been partitioned into a 2-by-1 block +*> structure: +*> +*> [ I1 0 0 ] +*> [ 0 C 0 ] +*> [ X11 ] [ U1 | ] [ 0 0 0 ] +*> X = [-----] = [---------] [----------] V1**T . +*> [ X21 ] [ | U2 ] [ 0 0 0 ] +*> [ 0 S 0 ] +*> [ 0 0 I2] +*> +*> X11 is P-by-Q. The orthogonal matrices U1, U2, and V1 are P-by-P, +*> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R +*> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which +*> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a +*> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is DOUBLE PRECISION array, dimension (LDX11,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is DOUBLE PRECISION array, dimension (LDX21,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is DOUBLE PRECISION array, dimension (P) +*> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is DOUBLE PRECISION array, dimension (M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is DOUBLE PRECISION array, dimension (Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> If INFO > 0 on exit, WORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: DBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, + $ X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, + $ LDV1T, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T + INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, + $ M, P, Q +* .. +* .. Array Arguments .. + DOUBLE PRECISION THETA(*) + DOUBLE PRECISION U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) + INTEGER IWORK(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ J, LBBCSD, LORBDB, LORGLQ, LORGLQMIN, + $ LORGLQOPT, LORGQR, LORGQRMIN, LORGQROPT, + $ LWORKMIN, LWORKOPT, R + LOGICAL LQUERY, WANTU1, WANTU2, WANTV1T +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM1(1), DUM2(1,1) +* .. +* .. External Subroutines .. + EXTERNAL DBBCSD, DCOPY, DLACPY, DLAPMR, DLAPMT, DORBDB1, + $ DORBDB2, DORBDB3, DORBDB4, DORGLQ, DORGQR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Function .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -4 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -5 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -6 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -10 + ELSE IF( WANTU1 .AND. LDU1 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF( WANTU2 .AND. LDU2 .LT. MAX( 1, M - P ) ) THEN + INFO = -15 + ELSE IF( WANTV1T .AND. LDV1T .LT. MAX( 1, Q ) ) THEN + INFO = -17 + END IF +* + R = MIN( P, M-P, Q, M-Q ) +* +* Compute workspace +* +* WORK layout: +* |-------------------------------------------------------| +* | LWORKOPT (1) | +* |-------------------------------------------------------| +* | PHI (MAX(1,R-1)) | +* |-------------------------------------------------------| +* | TAUP1 (MAX(1,P)) | B11D (R) | +* | TAUP2 (MAX(1,M-P)) | B11E (R-1) | +* | TAUQ1 (MAX(1,Q)) | B12D (R) | +* |-----------------------------------------| B12E (R-1) | +* | DORBDB WORK | DORGQR WORK | DORGLQ WORK | B21D (R) | +* | | | | B21E (R-1) | +* | | | | B22D (R) | +* | | | | B22E (R-1) | +* | | | | DBBCSD WORK | +* |-------------------------------------------------------| +* + IF( INFO .EQ. 0 ) THEN + IPHI = 2 + IB11D = IPHI + MAX( 1, R-1 ) + IB11E = IB11D + MAX( 1, R ) + IB12D = IB11E + MAX( 1, R - 1 ) + IB12E = IB12D + MAX( 1, R ) + IB21D = IB12E + MAX( 1, R - 1 ) + IB21E = IB21D + MAX( 1, R ) + IB22D = IB21E + MAX( 1, R - 1 ) + IB22E = IB22D + MAX( 1, R ) + IBBCSD = IB22E + MAX( 1, R - 1 ) + ITAUP1 = IPHI + MAX( 1, R-1 ) + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M-P ) + IORBDB = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ1 + MAX( 1, Q ) + IORGLQ = ITAUQ1 + MAX( 1, Q ) + LORGQRMIN = 1 + LORGQROPT = 1 + LORGLQMIN = 1 + LORGLQOPT = 1 + IF( R .EQ. Q ) THEN + CALL DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, WORK, + $ -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DORGQR( P, P, Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + ENDIF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DORGQR( M-P, M-P, Q, U2, LDU2, DUM1, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DORGLQ( Q-1, Q-1, Q-1, V1T, LDV1T, + $ DUM1, WORK(1), -1, CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q-1 ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL DBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ DUM1, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ DUM2, 1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, + $ DUM1, WORK(1), -1, CHILDINFO ) + LBBCSD = INT( WORK(1) ) + ELSE IF( R .EQ. P ) THEN + CALL DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DORGQR( P-1, P-1, P-1, U1(2,2), LDU1, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DORGQR( M-P, M-P, Q, U2, LDU2, DUM1, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DORGLQ( Q, Q, R, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL DBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ DUM1, V1T, LDV1T, DUM2, 1, U1, LDU1, + $ U2, LDU2, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, + $ DUM1, WORK(1), -1, CHILDINFO ) + LBBCSD = INT( WORK(1) ) + ELSE IF( R .EQ. M-P ) THEN + CALL DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DORGQR( P, P, Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DORGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, + $ DUM1, WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DORGLQ( Q, Q, R, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL DBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, DUM1, DUM2, 1, V1T, LDV1T, U2, + $ LDU2, U1, LDU1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, + $ DUM1, WORK(1), -1, CHILDINFO ) + LBBCSD = INT( WORK(1) ) + ELSE + CALL DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, + $ DUM1, WORK(1), -1, CHILDINFO ) + LORBDB = M + INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DORGQR( P, P, M-Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DORGQR( M-P, M-P, M-Q, U2, LDU2, DUM1, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DORGLQ( Q, Q, Q, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL DBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, DUM1, U2, LDU2, U1, LDU1, DUM2, + $ 1, V1T, LDV1T, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, + $ DUM1, WORK(1), -1, CHILDINFO ) + LBBCSD = INT( WORK(1) ) + END IF + LWORKMIN = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQRMIN-1, + $ IORGLQ+LORGLQMIN-1, + $ IBBCSD+LBBCSD-1 ) + LWORKOPT = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQROPT-1, + $ IORGLQ+LORGLQOPT-1, + $ IBBCSD+LBBCSD-1 ) + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'DORCSD2BY1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF + LORGQR = LWORK-IORGQR+1 + LORGLQ = LWORK-IORGLQ+1 +* +* Handle four cases separately: R = Q, R = P, R = M-P, and R = M-Q, +* in which R = MIN(P,M-P,Q,M-Q) +* + IF( R .EQ. Q ) THEN +* +* Case 1: R = Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL DORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL DORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL DORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + V1T(1,1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL DLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2), + $ LDV1T ) + CALL DORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL DBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ WORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, + $ DUM2, 1, WORK(IB11D), WORK(IB11E), + $ WORK(IB12D), WORK(IB12E), WORK(IB21D), + $ WORK(IB21E), WORK(IB22D), WORK(IB22E), + $ WORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place zero submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL DLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. P ) THEN +* +* Case 2: R = P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL DORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + U1(1,1) = ONE + DO J = 2, P + U1(1,J) = ZERO + U1(J,1) = ZERO + END DO + CALL DLACPY( 'L', P-1, P-1, X11(2,1), LDX11, U1(2,2), LDU1 ) + CALL DORGQR( P-1, P-1, P-1, U1(2,2), LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL DORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DLACPY( 'U', P, Q, X11, LDX11, V1T, LDV1T ) + CALL DORGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL DBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ WORK(IPHI), V1T, LDV1T, DUM2, 1, U1, LDU1, U2, + $ LDU2, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), + $ WORK(IB22D), WORK(IB22E), WORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL DLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. M-P ) THEN +* +* Case 3: R = M-P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL DORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL DORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + U2(1,1) = ONE + DO J = 2, M-P + U2(1,J) = ZERO + U2(J,1) = ZERO + END DO + CALL DLACPY( 'L', M-P-1, M-P-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL DORGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, + $ WORK(ITAUP2), WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DLACPY( 'U', M-P, Q, X21, LDX21, V1T, LDV1T ) + CALL DORGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL DBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, WORK(IPHI), DUM2, 1, V1T, LDV1T, U2, + $ LDU2, U1, LDU1, WORK(IB11D), WORK(IB11E), + $ WORK(IB12D), WORK(IB12E), WORK(IB21D), + $ WORK(IB21E), WORK(IB22D), WORK(IB22E), + $ WORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. R ) THEN + DO I = 1, R + IWORK(I) = Q - R + I + END DO + DO I = R + 1, Q + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL DLAPMT( .FALSE., P, Q, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL DLAPMR( .FALSE., Q, Q, V1T, LDV1T, IWORK ) + END IF + END IF + ELSE +* +* Case 4: R = M-Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL DORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), WORK(IORBDB+M), + $ LORBDB-M, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL DCOPY( P, WORK(IORBDB), 1, U1, 1 ) + DO J = 2, P + U1(1,J) = ZERO + END DO + CALL DLACPY( 'L', P-1, M-Q-1, X11(2,1), LDX11, U1(2,2), + $ LDU1 ) + CALL DORGQR( P, P, M-Q, U1, LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL DCOPY( M-P, WORK(IORBDB+P), 1, U2, 1 ) + DO J = 2, M-P + U2(1,J) = ZERO + END DO + CALL DLACPY( 'L', M-P-1, M-Q-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL DORGQR( M-P, M-P, M-Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL DLACPY( 'U', M-Q, Q, X21, LDX21, V1T, LDV1T ) + CALL DLACPY( 'U', P-(M-Q), Q-(M-Q), X11(M-Q+1,M-Q+1), LDX11, + $ V1T(M-Q+1,M-Q+1), LDV1T ) + CALL DLACPY( 'U', -P+Q, Q-P, X21(M-Q+1,P+1), LDX21, + $ V1T(P+1,P+1), LDV1T ) + CALL DORGLQ( Q, Q, Q, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL DBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, WORK(IPHI), U2, LDU2, U1, LDU1, DUM2, + $ 1, V1T, LDV1T, WORK(IB11D), WORK(IB11E), + $ WORK(IB12D), WORK(IB12E), WORK(IB21D), + $ WORK(IB21E), WORK(IB22D), WORK(IB22E), + $ WORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( P .GT. R ) THEN + DO I = 1, R + IWORK(I) = P - R + I + END DO + DO I = R + 1, P + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL DLAPMT( .FALSE., P, P, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL DLAPMR( .FALSE., P, Q, V1T, LDV1T, IWORK ) + END IF + END IF + END IF +* + RETURN +* +* End of DORCSD2BY1 +* + END + diff --git a/dspl/liblapack/SRC/dorg2l.f b/dspl/liblapack/SRC/dorg2l.f new file mode 100644 index 0000000..36ff4e5 --- /dev/null +++ b/dspl/liblapack/SRC/dorg2l.f @@ -0,0 +1,198 @@ +*> \brief \b DORG2L generates all or part of the orthogonal matrix Q from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORG2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORG2L generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL DLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL DSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2L +* + END diff --git a/dspl/liblapack/SRC/dorg2r.f b/dspl/liblapack/SRC/dorg2r.f new file mode 100644 index 0000000..4b71011 --- /dev/null +++ b/dspl/liblapack/SRC/dorg2r.f @@ -0,0 +1,200 @@ +*> \brief \b DORG2R generates all or part of the orthogonal matrix Q from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORG2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORG2R generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL DSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2R +* + END diff --git a/dspl/liblapack/SRC/dorgbr.f b/dspl/liblapack/SRC/dorgbr.f new file mode 100644 index 0000000..cfebda5 --- /dev/null +++ b/dspl/liblapack/SRC/dorgbr.f @@ -0,0 +1,337 @@ +*> \brief \b DORGBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGBR generates one of the real orthogonal matrices Q or P**T +*> determined by DGEBRD when reducing a real matrix A to bidiagonal +*> form: A = Q * B * P**T. Q and P**T are defined as products of +*> elementary reflectors H(i) or G(i) respectively. +*> +*> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +*> is of order M: +*> if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n +*> columns of Q, where m >= n >= k; +*> if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an +*> M-by-M matrix. +*> +*> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +*> is of order N: +*> if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m +*> rows of P**T, where n >= m >= k; +*> if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as +*> an N-by-N matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether the matrix Q or the matrix P**T is +*> required, as defined in the transformation applied by DGEBRD: +*> = 'Q': generate Q; +*> = 'P': generate P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q or P**T to be returned. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q or P**T to be returned. +*> N >= 0. +*> If VECT = 'Q', M >= N >= min(M,K); +*> if VECT = 'P', N >= M >= min(N,K). +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original M-by-K +*> matrix reduced by DGEBRD. +*> If VECT = 'P', the number of rows in the original K-by-N +*> matrix reduced by DGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by DGEBRD. +*> On exit, the M-by-N matrix Q or P**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension +*> (min(M,K)) if VECT = 'Q' +*> (min(N,K)) if VECT = 'P' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i), which determines Q or P**T, as +*> returned by DGEBRD in its array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,min(M,N)). +*> For optimum performance LWORK >= min(M,N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleGBcomputational +* +* ===================================================================== + SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DORGLQ, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = 1 + IF( WANTQ ) THEN + IF( M.GE.K ) THEN + CALL DORGQR( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( M.GT.1 ) THEN + CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + ELSE + IF( K.LT.N ) THEN + CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( N.GT.1 ) THEN + CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + END IF + LWKOPT = WORK( 1 ) + LWKOPT = MAX (LWKOPT, MN) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to DGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P**T, determined by a call to DGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P**T to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P**T(2:n,2:n) +* + CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGBR +* + END diff --git a/dspl/liblapack/SRC/dorghr.f b/dspl/liblapack/SRC/dorghr.f new file mode 100644 index 0000000..7f60c68 --- /dev/null +++ b/dspl/liblapack/SRC/dorghr.f @@ -0,0 +1,240 @@ +*> \brief \b DORGHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGHR generates a real orthogonal matrix Q which is defined as the +*> product of IHI-ILO elementary reflectors of order N, as returned by +*> DGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of DGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by DGEHRD. +*> On exit, the N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEHRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= IHI-ILO. +*> For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL DORGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DORGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL DORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGHR +* + END diff --git a/dspl/liblapack/SRC/dorgl2.f b/dspl/liblapack/SRC/dorgl2.f new file mode 100644 index 0000000..5d8985d --- /dev/null +++ b/dspl/liblapack/SRC/dorgl2.f @@ -0,0 +1,204 @@ +*> \brief \b DORGL2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGL2 generates an m by n real matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by DGELQF in the first k rows of its array argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL DSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORGL2 +* + END diff --git a/dspl/liblapack/SRC/dorglq.f b/dspl/liblapack/SRC/dorglq.f new file mode 100644 index 0000000..912b5de --- /dev/null +++ b/dspl/liblapack/SRC/dorglq.f @@ -0,0 +1,289 @@ +*> \brief \b DORGLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGLQ generates an M-by-N real matrix Q with orthonormal rows, +*> which is defined as the first M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by DGELQF in the first k rows of its array argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL DORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i+ib:m,i:n) from the right +* + CALL DLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H**T to columns i:n of current block +* + CALL DORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGLQ +* + END diff --git a/dspl/liblapack/SRC/dorgql.f b/dspl/liblapack/SRC/dorgql.f new file mode 100644 index 0000000..ea12be9 --- /dev/null +++ b/dspl/liblapack/SRC/dorgql.f @@ -0,0 +1,296 @@ +*> \brief \b DORGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGQL generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'DORGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL DORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL DORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQL +* + END diff --git a/dspl/liblapack/SRC/dorgqr.f b/dspl/liblapack/SRC/dorgqr.f new file mode 100644 index 0000000..628eeac --- /dev/null +++ b/dspl/liblapack/SRC/dorgqr.f @@ -0,0 +1,290 @@ +*> \brief \b DORGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGQR generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL DORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL DORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQR +* + END diff --git a/dspl/liblapack/SRC/dorgr2.f b/dspl/liblapack/SRC/dorgr2.f new file mode 100644 index 0000000..7c5dce1 --- /dev/null +++ b/dspl/liblapack/SRC/dorgr2.f @@ -0,0 +1,202 @@ +*> \brief \b DORGR2 generates all or part of the orthogonal matrix Q from an RQ factorization determined by sgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGR2 generates an m by n real matrix Q with orthonormal rows, +*> which is defined as the last m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGERQF in the last k rows of its array argument +*> A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right +* + A( II, N-M+II ) = ONE + CALL DLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ), + $ A, LDA, WORK ) + CALL DSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - TAU( I ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORGR2 +* + END diff --git a/dspl/liblapack/SRC/dorgrq.f b/dspl/liblapack/SRC/dorgrq.f new file mode 100644 index 0000000..b76fb37 --- /dev/null +++ b/dspl/liblapack/SRC/dorgrq.f @@ -0,0 +1,296 @@ +*> \brief \b DORGRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGRQ generates an M-by-N real matrix Q with orthonormal rows, +*> which is defined as the last M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGERQF in the last k rows of its array argument +*> A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'DORGRQ', ' ', M, N, K, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL DORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL DLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', + $ II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK, + $ LDWORK, A, LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H**T to columns 1:n-k+i+ib-1 of current block +* + CALL DORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGRQ +* + END diff --git a/dspl/liblapack/SRC/dorgtr.f b/dspl/liblapack/SRC/dorgtr.f new file mode 100644 index 0000000..72623ea --- /dev/null +++ b/dspl/liblapack/SRC/dorgtr.f @@ -0,0 +1,255 @@ +*> \brief \b DORGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGTR generates a real orthogonal matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> DSYTRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from DSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from DSYTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by DSYTRD. +*> On exit, the N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSYTRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N-1). +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORGQL, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'DORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'DORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL DORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL DORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGTR +* + END diff --git a/dspl/liblapack/SRC/dorm22.f b/dspl/liblapack/SRC/dorm22.f new file mode 100644 index 0000000..aac4710 --- /dev/null +++ b/dspl/liblapack/SRC/dorm22.f @@ -0,0 +1,441 @@ +*> \brief \b DORM22 multiplies a general matrix by a banded orthogonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORM22 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose +* ============ +*> +*> \verbatim +*> +*> +*> DORM22 overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order NQ, with NQ = M if +*> SIDE = 'L' and NQ = N if SIDE = 'R'. +*> The orthogonal matrix Q processes a 2-by-2 block structure +*> +*> [ Q11 Q12 ] +*> Q = [ ] +*> [ Q21 Q22 ], +*> +*> where Q12 is an N1-by-N1 lower triangular matrix and Q21 is an +*> N2-by-N2 upper triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose); +*> = 'C': apply Q**T (Conjugate transpose). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \param[in] N2 +*> \verbatim +*> N1 is INTEGER +*> N2 is INTEGER +*> The dimension of Q12 and Q21, respectively. N1, N2 >= 0. +*> The following requirement must be satisfied: +*> N1 + N2 = M if SIDE = 'L' and N1 + N2 = N if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension +*> (LDQ,M) if SIDE = 'L' +*> (LDQ,N) if SIDE = 'R' +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if SIDE = 'L'; LDQ >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= M*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, LDWORK, LEN, LWKOPT, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q; +* NW is the minimum dimension of WORK. +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + NW = NQ + IF( N1.EQ.0 .OR. N2.EQ.0 ) NW = 1 + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N1.LT.0 .OR. N1+N2.NE.NQ ) THEN + INFO = -5 + ELSE IF( N2.LT.0 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = M*N + WORK( 1 ) = DBLE( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM22', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Degenerate cases (N1 = 0 or N2 = 0) are handled using DTRMM. +* + IF( N1.EQ.0 ) THEN + CALL DTRMM( SIDE, 'Upper', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + ELSE IF( N2.EQ.0 ) THEN + CALL DTRMM( SIDE, 'Lower', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + END IF +* +* Compute the largest chunk size available from the workspace. +* + NB = MAX( 1, MIN( LWORK, LWKOPT ) / NQ ) +* + IF( LEFT ) THEN + IF( NOTRAN ) THEN + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q12. +* + CALL DLACPY( 'All', N1, LEN, C( N2+1, I ), LDC, WORK, + $ LDWORK ) + CALL DTRMM( 'Left', 'Lower', 'No Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11. +* + CALL DGEMM( 'No Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q21. +* + CALL DLACPY( 'All', N2, LEN, C( 1, I ), LDC, + $ WORK( N1+1 ), LDWORK ) + CALL DTRMM( 'Left', 'Upper', 'No Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( N1+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22. +* + CALL DGEMM( 'No Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N2+1, I ), LDC, + $ ONE, WORK( N1+1 ), LDWORK ) +* +* Copy everything back. +* + CALL DLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + ELSE + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q21**T. +* + CALL DLACPY( 'All', N2, LEN, C( N1+1, I ), LDC, WORK, + $ LDWORK ) + CALL DTRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11**T. +* + CALL DGEMM( 'Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q12**T. +* + CALL DLACPY( 'All', N1, LEN, C( 1, I ), LDC, + $ WORK( N2+1 ), LDWORK ) + CALL DTRMM( 'Left', 'Lower', 'Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( N2+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22**T. +* + CALL DGEMM( 'Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N1+1, I ), LDC, + $ ONE, WORK( N2+1 ), LDWORK ) +* +* Copy everything back. +* + CALL DLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + END IF + ELSE + IF( NOTRAN ) THEN + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q21. +* + CALL DLACPY( 'All', LEN, N2, C( I, N1+1 ), LDC, WORK, + $ LDWORK ) + CALL DTRMM( 'Right', 'Upper', 'No Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11. +* + CALL DGEMM( 'No Transpose', 'No Transpose', LEN, N2, N1, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q12. +* + CALL DLACPY( 'All', LEN, N1, C( I, 1 ), LDC, + $ WORK( 1 + N2*LDWORK ), LDWORK ) + CALL DTRMM( 'Right', 'Lower', 'No Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22. +* + CALL DGEMM( 'No Transpose', 'No Transpose', LEN, N1, N2, + $ ONE, C( I, N1+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL DLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + ELSE + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q12**T. +* + CALL DLACPY( 'All', LEN, N1, C( I, N2+1 ), LDC, WORK, + $ LDWORK ) + CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11**T. +* + CALL DGEMM( 'No Transpose', 'Transpose', LEN, N1, N2, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q21**T. +* + CALL DLACPY( 'All', LEN, N2, C( I, 1 ), LDC, + $ WORK( 1 + N1*LDWORK ), LDWORK ) + CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22**T. +* + CALL DGEMM( 'No Transpose', 'Transpose', LEN, N2, N1, + $ ONE, C( I, N2+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL DLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + END IF + END IF +* + WORK( 1 ) = DBLE( LWKOPT ) + RETURN +* +* End of DORM22 +* + END diff --git a/dspl/liblapack/SRC/dorm2l.f b/dspl/liblapack/SRC/dorm2l.f new file mode 100644 index 0000000..1014cb2 --- /dev/null +++ b/dspl/liblapack/SRC/dorm2l.f @@ -0,0 +1,278 @@ +*> \brief \b DORM2L multiplies a general matrix by the orthogonal matrix from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORM2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORM2L overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T * C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQLF in the last k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + $ WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2L +* + END diff --git a/dspl/liblapack/SRC/dorm2r.f b/dspl/liblapack/SRC/dorm2r.f new file mode 100644 index 0000000..632b70e --- /dev/null +++ b/dspl/liblapack/SRC/dorm2r.f @@ -0,0 +1,282 @@ +*> \brief \b DORM2R multiplies a general matrix by the orthogonal matrix from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORM2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORM2R overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQRF in the first k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORM2R +* + END diff --git a/dspl/liblapack/SRC/dormbr.f b/dspl/liblapack/SRC/dormbr.f new file mode 100644 index 0000000..f035d0a --- /dev/null +++ b/dspl/liblapack/SRC/dormbr.f @@ -0,0 +1,372 @@ +*> \brief \b DORMBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, VECT +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': P * C C * P +*> TRANS = 'T': P**T * C C * P**T +*> +*> Here Q and P**T are the orthogonal matrices determined by DGEBRD when +*> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +*> P**T are defined as products of elementary reflectors H(i) and G(i) +*> respectively. +*> +*> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +*> order of the orthogonal matrix Q or P**T that is applied. +*> +*> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +*> if nq >= k, Q = H(1) H(2) . . . H(k); +*> if nq < k, Q = H(1) H(2) . . . H(nq-1). +*> +*> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +*> if k < nq, P = G(1) G(2) . . . G(k); +*> if k >= nq, P = G(1) G(2) . . . G(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'Q': apply Q or Q**T; +*> = 'P': apply P or P**T. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q, Q**T, P or P**T from the Left; +*> = 'R': apply Q, Q**T, P or P**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q or P; +*> = 'T': Transpose, apply Q**T or P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original +*> matrix reduced by DGEBRD. +*> If VECT = 'P', the number of rows in the original +*> matrix reduced by DGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,min(nq,K)) if VECT = 'Q' +*> (LDA,nq) if VECT = 'P' +*> The vectors which define the elementary reflectors H(i) and +*> G(i), whose products determine the matrices Q and P, as +*> returned by DGEBRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If VECT = 'Q', LDA >= max(1,nq); +*> if VECT = 'P', LDA >= max(1,min(nq,K)). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (min(nq,K)) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i) which determines Q or P, as returned +*> by DGEBRD in the array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +*> or P*C or P**T*C or C*P or C*P**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMLQ, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to DGEBRD with nq >= k +* + CALL DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to DGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to DGEBRD with nq > k +* + CALL DORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to DGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL DORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMBR +* + END diff --git a/dspl/liblapack/SRC/dormhr.f b/dspl/liblapack/SRC/dormhr.f new file mode 100644 index 0000000..d1e214e --- /dev/null +++ b/dspl/liblapack/SRC/dormhr.f @@ -0,0 +1,294 @@ +*> \brief \b DORMHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMHR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> IHI-ILO elementary reflectors, as returned by DGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of DGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +*> ILO = 1 and IHI = 0, if M = 0; +*> if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +*> ILO = 1 and IHI = 0, if N = 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by DGEHRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEHRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL DORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMHR +* + END diff --git a/dspl/liblapack/SRC/dorml2.f b/dspl/liblapack/SRC/dorml2.f new file mode 100644 index 0000000..2c55c7f --- /dev/null +++ b/dspl/liblapack/SRC/dorml2.f @@ -0,0 +1,282 @@ +*> \brief \b DORML2 multiplies a general matrix by the orthogonal matrix from a LQ factorization determined by sgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORML2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORML2 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQF in the first k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of DORML2 +* + END diff --git a/dspl/liblapack/SRC/dormlq.f b/dspl/liblapack/SRC/dormlq.f new file mode 100644 index 0000000..bb5469d --- /dev/null +++ b/dspl/liblapack/SRC/dormlq.f @@ -0,0 +1,347 @@ +*> \brief \b DORMLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQF in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMLQ +* + END diff --git a/dspl/liblapack/SRC/dormql.f b/dspl/liblapack/SRC/dormql.f new file mode 100644 index 0000000..7d2b5d6 --- /dev/null +++ b/dspl/liblapack/SRC/dormql.f @@ -0,0 +1,339 @@ +*> \brief \b DORMQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMQL overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQLF in the last k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**T is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQL +* + END diff --git a/dspl/liblapack/SRC/dormqr.f b/dspl/liblapack/SRC/dormqr.f new file mode 100644 index 0000000..7f2ebb9 --- /dev/null +++ b/dspl/liblapack/SRC/dormqr.f @@ -0,0 +1,340 @@ +*> \brief \b DORMQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGEQRF in the first k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMQR +* + END diff --git a/dspl/liblapack/SRC/dormr2.f b/dspl/liblapack/SRC/dormr2.f new file mode 100644 index 0000000..129ee1b --- /dev/null +++ b/dspl/liblapack/SRC/dormr2.f @@ -0,0 +1,278 @@ +*> \brief \b DORMR2 multiplies a general matrix by the orthogonal matrix from a RQ factorization determined by sgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMR2 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q' (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGERQF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL DLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC, + $ WORK ) + A( I, NQ-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of DORMR2 +* + END diff --git a/dspl/liblapack/SRC/dormr3.f b/dspl/liblapack/SRC/dormr3.f new file mode 100644 index 0000000..5f20db7 --- /dev/null +++ b/dspl/liblapack/SRC/dormr3.f @@ -0,0 +1,299 @@ +*> \brief \b DORMR3 multiplies a general matrix by the orthogonal matrix from a RZ factorization determined by stzrzf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMR3 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**T +* + CALL DLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of DORMR3 +* + END diff --git a/dspl/liblapack/SRC/dormrq.f b/dspl/liblapack/SRC/dormrq.f new file mode 100644 index 0000000..421bd10 --- /dev/null +++ b/dspl/liblapack/SRC/dormrq.f @@ -0,0 +1,346 @@ +*> \brief \b DORMRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMRQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGERQF in the last k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**T is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**T +* + CALL DLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMRQ +* + END diff --git a/dspl/liblapack/SRC/dormrz.f b/dspl/liblapack/SRC/dormrz.f new file mode 100644 index 0000000..8e1bd56 --- /dev/null +++ b/dspl/liblapack/SRC/dormrz.f @@ -0,0 +1,380 @@ +*> \brief \b DORMRZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMRZ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLARZB, DLARZT, DORMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), WORK( IWT ), LDT ) +* + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL DLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DORMRZ +* + END diff --git a/dspl/liblapack/SRC/dormtr.f b/dspl/liblapack/SRC/dormtr.f new file mode 100644 index 0000000..d2443c1 --- /dev/null +++ b/dspl/liblapack/SRC/dormtr.f @@ -0,0 +1,310 @@ +*> \brief \b DORMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORMTR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by DSYTRD: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from DSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from DSYTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by DSYTRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSYTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORMQL, DORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* + CALL DORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORMTR +* + END diff --git a/dspl/liblapack/SRC/dpbcon.f b/dspl/liblapack/SRC/dpbcon.f new file mode 100644 index 0000000..41d43dc --- /dev/null +++ b/dspl/liblapack/SRC/dpbcon.f @@ -0,0 +1,271 @@ +*> \brief \b DPBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite band matrix using the +*> Cholesky factorization A = U**T*U or A = L*L**T computed by DPBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the symmetric band matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL DLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of DPBCON +* + END diff --git a/dspl/liblapack/SRC/dpbequ.f b/dspl/liblapack/SRC/dpbequ.f new file mode 100644 index 0000000..ec5d4eb --- /dev/null +++ b/dspl/liblapack/SRC/dpbequ.f @@ -0,0 +1,242 @@ +*> \brief \b DPBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite band matrix A and reduce its condition +*> number (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular of A is stored; +*> = 'L': Lower triangular of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AB( J, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = AB( J, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPBEQU +* + END diff --git a/dspl/liblapack/SRC/dpbrfs.f b/dspl/liblapack/SRC/dpbrfs.f new file mode 100644 index 0000000..6bc522f --- /dev/null +++ b/dspl/liblapack/SRC/dpbrfs.f @@ -0,0 +1,443 @@ +*> \brief \b DPBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, +* LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and banded, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A as computed by +*> DPBTRF, in the same storage format as A (see AB). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DPBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DPBTRS, DSBMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPBRFS +* + END diff --git a/dspl/liblapack/SRC/dpbstf.f b/dspl/liblapack/SRC/dpbstf.f new file mode 100644 index 0000000..c104ddb --- /dev/null +++ b/dspl/liblapack/SRC/dpbstf.f @@ -0,0 +1,319 @@ +*> \brief \b DPBSTF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBSTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBSTF computes a split Cholesky factorization of a real +*> symmetric positive definite band matrix A. +*> +*> This routine is designed to be used in conjunction with DSBGST. +*> +*> The factorization has the form A = S**T*S where S is a band matrix +*> of the same bandwidth as A and the following structure: +*> +*> S = ( U ) +*> ( M L ) +*> +*> where U is upper triangular of order m = (n+kd)/2, and L is lower +*> triangular of order n-m. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first kd+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the factor S from the split Cholesky +*> factorization A = S**T*S. See Further Details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the factorization could not be completed, +*> because the updated element a(i,i) was negative; the +*> matrix A is not positive definite. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 7, KD = 2: +*> +*> S = ( s11 s12 s13 ) +*> ( s22 s23 s24 ) +*> ( s33 s34 ) +*> ( s44 ) +*> ( s53 s54 s55 ) +*> ( s64 s65 s66 ) +*> ( s75 s76 s77 ) +*> +*> If UPLO = 'U', the array AB holds: +*> +*> on entry: on exit: +*> +*> * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 +*> * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> +*> If UPLO = 'L', the array AB holds: +*> +*> on entry: on exit: +*> +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * +*> a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL DSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL DSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL DSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL DSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL DSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL DSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL DSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL DSYR( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of DPBSTF +* + END diff --git a/dspl/liblapack/SRC/dpbsv.f b/dspl/liblapack/SRC/dpbsv.f new file mode 100644 index 0000000..535a2d4 --- /dev/null +++ b/dspl/liblapack/SRC/dpbsv.f @@ -0,0 +1,229 @@ +*> \brief DPBSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix, with the same number of superdiagonals or +*> subdiagonals as A. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPBTRF, DPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL DPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPBSV +* + END diff --git a/dspl/liblapack/SRC/dpbsvx.f b/dspl/liblapack/SRC/dpbsvx.f new file mode 100644 index 0000000..b194d26 --- /dev/null +++ b/dspl/liblapack/SRC/dpbsvx.f @@ -0,0 +1,545 @@ +*> \brief DPBSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, +* EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), S( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AB and AFB will not +*> be modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array, except +*> if FACT = 'F' and EQUED = 'Y', then A must contain the +*> equilibrated matrix diag(S)*A*diag(S). The j-th column of A +*> is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is DOUBLE PRECISION array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the band matrix +*> A, in the same storage format as A (see AB). If EQUED = 'Y', +*> then AFB is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 +*> a22 a23 a24 +*> a33 a34 a35 +*> a44 a45 a46 +*> a55 a56 +*> (aij=conjg(aji)) a66 +*> +*> Band storage of the upper triangle of A: +*> +*> * * a13 a24 a35 a46 +*> * a12 a23 a34 a45 a56 +*> a11 a22 a33 a44 a55 a66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> a11 a22 a33 a44 a55 a66 +*> a21 a32 a43 a54 a65 * +*> a31 a42 a53 a64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAQSB, DPBCON, DPBEQU, DPBRFS, + $ DPBTRF, DPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T *U or A = L*L**T. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL DCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL DCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL DPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSB( '1', UPLO, N, KD, AB, LDAB, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DPBSVX +* + END diff --git a/dspl/liblapack/SRC/dpbtf2.f b/dspl/liblapack/SRC/dpbtf2.f new file mode 100644 index 0000000..fd38532 --- /dev/null +++ b/dspl/liblapack/SRC/dpbtf2.f @@ -0,0 +1,263 @@ +*> \brief \b DPBTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite band matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBTF2 computes the Cholesky factorization of a real symmetric +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**T * U , if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix, U**T is the transpose of U, and +*> L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL DSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL DSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL DSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL DSYR( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of DPBTF2 +* + END diff --git a/dspl/liblapack/SRC/dpbtrf.f b/dspl/liblapack/SRC/dpbtrf.f new file mode 100644 index 0000000..269e973 --- /dev/null +++ b/dspl/liblapack/SRC/dpbtrf.f @@ -0,0 +1,435 @@ +*> \brief \b DPBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBTRF computes the Cholesky factorization of a real symmetric +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== + SUBROUTINE DPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + DOUBLE PRECISION WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPBTF2, DPOTF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'DPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL DPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL DPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I2, ONE, AB( KD+1, I ), + $ LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL DSYRK( 'Upper', 'Transpose', I2, IB, -ONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I3, ONE, AB( KD+1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL DGEMM( 'Transpose', 'No Transpose', I2, I3, + $ IB, -ONE, AB( KD+1-IB, I+IB ), + $ LDAB-1, WORK, LDWORK, ONE, + $ AB( 1+IB, I+KD ), LDAB-1 ) +* +* Update A33 +* + CALL DSYRK( 'Upper', 'Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( KD+1, I+KD ), + $ LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL DPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I2, IB, ONE, AB( 1, I ), + $ LDAB-1, AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL DSYRK( 'Lower', 'No Transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL DTRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I3, IB, ONE, AB( 1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL DGEMM( 'No transpose', 'Transpose', I3, I2, + $ IB, -ONE, WORK, LDWORK, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1+KD-IB, I+IB ), LDAB-1 ) +* +* Update A33 +* + CALL DSYRK( 'Lower', 'No Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of DPBTRF +* + END diff --git a/dspl/liblapack/SRC/dpbtrs.f b/dspl/liblapack/SRC/dpbtrs.f new file mode 100644 index 0000000..08e4373 --- /dev/null +++ b/dspl/liblapack/SRC/dpbtrs.f @@ -0,0 +1,220 @@ +*> \brief \b DPBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPBTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite band matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by DPBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T *U. +* + DO 10 J = 1, NRHS +* +* Solve U**T *X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L**T. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL DTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL DTBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of DPBTRS +* + END diff --git a/dspl/liblapack/SRC/dpftrf.f b/dspl/liblapack/SRC/dpftrf.f new file mode 100644 index 0000000..b460f2a --- /dev/null +++ b/dspl/liblapack/SRC/dpftrf.f @@ -0,0 +1,457 @@ +*> \brief \b DPFTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPFTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPFTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); +*> On entry, the symmetric matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the NT elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization RFP A = U**T*U or RFP A = L*L**T. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER N, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYRK, DPOTRF, DTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPFTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL DPOTRF( 'L', N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'R', 'L', 'T', 'N', N2, N1, ONE, A( 0 ), N, + $ A( N1 ), N ) + CALL DSYRK( 'U', 'N', N2, N1, -ONE, A( N1 ), N, ONE, + $ A( N ), N ) + CALL DPOTRF( 'U', N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL DPOTRF( 'L', N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'L', 'L', 'N', 'N', N1, N2, ONE, A( N2 ), N, + $ A( 0 ), N ) + CALL DSYRK( 'U', 'T', N2, N1, -ONE, A( 0 ), N, ONE, + $ A( N1 ), N ) + CALL DPOTRF( 'U', N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + CALL DPOTRF( 'U', N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'L', 'U', 'T', 'N', N1, N2, ONE, A( 0 ), N1, + $ A( N1*N1 ), N1 ) + CALL DSYRK( 'L', 'T', N2, N1, -ONE, A( N1*N1 ), N1, ONE, + $ A( 1 ), N1 ) + CALL DPOTRF( 'L', N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + CALL DPOTRF( 'U', N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'R', 'U', 'N', 'N', N2, N1, ONE, A( N2*N2 ), + $ N2, A( 0 ), N2 ) + CALL DSYRK( 'L', 'N', N2, N1, -ONE, A( 0 ), N2, ONE, + $ A( N1*N2 ), N2 ) + CALL DPOTRF( 'L', N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL DPOTRF( 'L', K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'R', 'L', 'T', 'N', K, K, ONE, A( 1 ), N+1, + $ A( K+1 ), N+1 ) + CALL DSYRK( 'U', 'N', K, K, -ONE, A( K+1 ), N+1, ONE, + $ A( 0 ), N+1 ) + CALL DPOTRF( 'U', K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL DPOTRF( 'L', K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'L', 'L', 'N', 'N', K, K, ONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL DSYRK( 'U', 'T', K, K, -ONE, A( 0 ), N+1, ONE, + $ A( K ), N+1 ) + CALL DPOTRF( 'U', K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL DPOTRF( 'U', K, A( 0+K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'L', 'U', 'T', 'N', K, K, ONE, A( K ), N1, + $ A( K*( K+1 ) ), K ) + CALL DSYRK( 'L', 'T', K, K, -ONE, A( K*( K+1 ) ), K, ONE, + $ A( 0 ), K ) + CALL DPOTRF( 'L', K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL DPOTRF( 'U', K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRSM( 'R', 'U', 'N', 'N', K, K, ONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL DSYRK( 'L', 'N', K, K, -ONE, A( 0 ), K, ONE, + $ A( K*K ), K ) + CALL DPOTRF( 'L', K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DPFTRF +* + END diff --git a/dspl/liblapack/SRC/dpftri.f b/dspl/liblapack/SRC/dpftri.f new file mode 100644 index 0000000..adbbfa8 --- /dev/null +++ b/dspl/liblapack/SRC/dpftri.f @@ -0,0 +1,423 @@ +*> \brief \b DPFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPFTRI computes the inverse of a (real) symmetric positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by DPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ) +*> On entry, the symmetric matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, the symmetric inverse of the original matrix, in the +*> same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DTFTRI, DLAUUM, DTRMM, DSYRK +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL DTFTRI( TRANSR, UPLO, 'N', N, A, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or +* inv(L)^C*inv(L). There are eight cases. +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) +* T1 -> a(0), T2 -> a(n), S -> a(N1) +* + CALL DLAUUM( 'L', N1, A( 0 ), N, INFO ) + CALL DSYRK( 'L', 'T', N1, N2, ONE, A( N1 ), N, ONE, + $ A( 0 ), N ) + CALL DTRMM( 'L', 'U', 'N', 'N', N2, N1, ONE, A( N ), N, + $ A( N1 ), N ) + CALL DLAUUM( 'U', N2, A( N ), N, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) +* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) +* T1 -> a(N2), T2 -> a(N1), S -> a(0) +* + CALL DLAUUM( 'L', N1, A( N2 ), N, INFO ) + CALL DSYRK( 'L', 'N', N1, N2, ONE, A( 0 ), N, ONE, + $ A( N2 ), N ) + CALL DTRMM( 'R', 'U', 'T', 'N', N1, N2, ONE, A( N1 ), N, + $ A( 0 ), N ) + CALL DLAUUM( 'U', N2, A( N1 ), N, INFO ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) +* + CALL DLAUUM( 'U', N1, A( 0 ), N1, INFO ) + CALL DSYRK( 'U', 'N', N1, N2, ONE, A( N1*N1 ), N1, ONE, + $ A( 0 ), N1 ) + CALL DTRMM( 'R', 'L', 'N', 'N', N1, N2, ONE, A( 1 ), N1, + $ A( N1*N1 ), N1 ) + CALL DLAUUM( 'L', N2, A( 1 ), N1, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is odd +* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) +* + CALL DLAUUM( 'U', N1, A( N2*N2 ), N2, INFO ) + CALL DSYRK( 'U', 'T', N1, N2, ONE, A( 0 ), N2, ONE, + $ A( N2*N2 ), N2 ) + CALL DTRMM( 'L', 'L', 'T', 'N', N2, N1, ONE, A( N1*N2 ), + $ N2, A( 0 ), N2 ) + CALL DLAUUM( 'L', N2, A( N1*N2 ), N2, INFO ) +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL DLAUUM( 'L', K, A( 1 ), N+1, INFO ) + CALL DSYRK( 'L', 'T', K, K, ONE, A( K+1 ), N+1, ONE, + $ A( 1 ), N+1 ) + CALL DTRMM( 'L', 'U', 'N', 'N', K, K, ONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) + CALL DLAUUM( 'U', K, A( 0 ), N+1, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL DLAUUM( 'L', K, A( K+1 ), N+1, INFO ) + CALL DSYRK( 'L', 'N', K, K, ONE, A( 0 ), N+1, ONE, + $ A( K+1 ), N+1 ) + CALL DTRMM( 'R', 'U', 'T', 'N', K, K, ONE, A( K ), N+1, + $ A( 0 ), N+1 ) + CALL DLAUUM( 'U', K, A( K ), N+1, INFO ) +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL DLAUUM( 'U', K, A( K ), K, INFO ) + CALL DSYRK( 'U', 'N', K, K, ONE, A( K*( K+1 ) ), K, ONE, + $ A( K ), K ) + CALL DTRMM( 'R', 'L', 'N', 'N', K, K, ONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + CALL DLAUUM( 'L', K, A( 0 ), K, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL DLAUUM( 'U', K, A( K*( K+1 ) ), K, INFO ) + CALL DSYRK( 'U', 'T', K, K, ONE, A( 0 ), K, ONE, + $ A( K*( K+1 ) ), K ) + CALL DTRMM( 'L', 'L', 'T', 'N', K, K, ONE, A( K*K ), K, + $ A( 0 ), K ) + CALL DLAUUM( 'L', K, A( K*K ), K, INFO ) +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DPFTRI +* + END diff --git a/dspl/liblapack/SRC/dpftrs.f b/dspl/liblapack/SRC/dpftrs.f new file mode 100644 index 0000000..9c32506 --- /dev/null +++ b/dspl/liblapack/SRC/dpftrs.f @@ -0,0 +1,280 @@ +*> \brief \b DPFTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPFTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPFTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by DPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ). +*> The triangular factor U or L from the Cholesky factorization +*> of RFP A = U**T*U or RFP A = L*L**T, as computed by DPFTRF. +*> See note below for more details about RFP A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DTFSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPFTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* start execution: there are two triangular solves +* + IF( LOWER ) THEN + CALL DTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, ONE, A, B, + $ LDB ) + CALL DTFSM( TRANSR, 'L', UPLO, 'T', 'N', N, NRHS, ONE, A, B, + $ LDB ) + ELSE + CALL DTFSM( TRANSR, 'L', UPLO, 'T', 'N', N, NRHS, ONE, A, B, + $ LDB ) + CALL DTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, ONE, A, B, + $ LDB ) + END IF +* + RETURN +* +* End of DPFTRS +* + END diff --git a/dspl/liblapack/SRC/dpocon.f b/dspl/liblapack/SRC/dpocon.f new file mode 100644 index 0000000..20e9aff --- /dev/null +++ b/dspl/liblapack/SRC/dpocon.f @@ -0,0 +1,253 @@ +*> \brief \b DPOCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite matrix using the +*> Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the symmetric matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL DLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DPOCON +* + END diff --git a/dspl/liblapack/SRC/dpoequ.f b/dspl/liblapack/SRC/dpoequ.f new file mode 100644 index 0000000..eb15071 --- /dev/null +++ b/dspl/liblapack/SRC/dpoequ.f @@ -0,0 +1,205 @@ +*> \brief \b DPOEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The N-by-N symmetric positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPOEQU +* + END diff --git a/dspl/liblapack/SRC/dpoequb.f b/dspl/liblapack/SRC/dpoequb.f new file mode 100644 index 0000000..fbcc6fe --- /dev/null +++ b/dspl/liblapack/SRC/dpoequb.f @@ -0,0 +1,221 @@ +*> \brief \b DPOEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOEQUB computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> +*> This routine differs from DPOEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled diagonal entries are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The N-by-N symmetric positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN, BASE, TMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, LOG, INT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* +* Positive definite only performs 1 pass of equilibration. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF + + BASE = DLAMCH( 'B' ) + TMP = -0.5D+0 / LOG ( BASE ) +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = BASE ** INT( TMP * LOG( S( I ) ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)). +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF +* + RETURN +* +* End of DPOEQUB +* + END diff --git a/dspl/liblapack/SRC/dporfs.f b/dspl/liblapack/SRC/dporfs.f new file mode 100644 index 0000000..8fc74fd --- /dev/null +++ b/dspl/liblapack/SRC/dporfs.f @@ -0,0 +1,430 @@ +*> \brief \b DPORFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPORFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, +* LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPORFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite, +*> and provides error bounds and backward error estimates for the +*> solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DPOTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DPOTRS, DSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPORFS +* + END diff --git a/dspl/liblapack/SRC/dporfsx.f b/dspl/liblapack/SRC/dporfsx.f new file mode 100644 index 0000000..5372492 --- /dev/null +++ b/dspl/liblapack/SRC/dporfsx.f @@ -0,0 +1,693 @@ +*> \brief \b DPORFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPORFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, +* LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPORFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive +*> definite, and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, + $ LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DPOCON, DLA_PORFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, DLANSY, DLA_PORCOND + DOUBLE PRECISION DLAMCH, DLANSY, DLA_PORCOND + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME(UPLO, 'U') .AND. .NOT.LSAME(UPLO, 'L')) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPORFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = DLANSY( NORM, UPLO, N, A, LDA, WORK ) + CALL DPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + CALL DLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), WORK( 1 ), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = DLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, + $ -1, S, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = DLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, + $ 0, S, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF (N_ERR_BNDS .GE. LA_LINRX_RCOND_I) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = DLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, 1, + $ X( 1, J ), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of DPORFSX +* + END diff --git a/dspl/liblapack/SRC/dposv.f b/dspl/liblapack/SRC/dposv.f new file mode 100644 index 0000000..ab8f007 --- /dev/null +++ b/dspl/liblapack/SRC/dposv.f @@ -0,0 +1,193 @@ +*> \brief DPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOsolve +* +* ===================================================================== + SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL DPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPOSV +* + END diff --git a/dspl/liblapack/SRC/dposvx.f b/dspl/liblapack/SRC/dposvx.f new file mode 100644 index 0000000..cf33c96 --- /dev/null +++ b/dspl/liblapack/SRC/dposvx.f @@ -0,0 +1,494 @@ +*> \brief DPOSVX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), S( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. A and AF will not +*> be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and +*> EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored form +*> of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doublePOsolve +* +* ===================================================================== + SUBROUTINE DPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLAQSY, DPOCON, DPOEQU, DPORFS, DPOTRF, + $ DPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T *U or A = L*L**T. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSY( '1', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DPOSVX +* + END diff --git a/dspl/liblapack/SRC/dposvxx.f b/dspl/liblapack/SRC/dposvxx.f new file mode 100644 index 0000000..488e0b1 --- /dev/null +++ b/dspl/liblapack/SRC/dposvxx.f @@ -0,0 +1,683 @@ +*> \brief DPOSVXX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T +*> to compute the solution to a double precision system of linear equations +*> A * X = B, where A is an N-by-N symmetric positive definite matrix +*> and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. DPOSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> DPOSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> DPOSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what DPOSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A (see argument RCOND). If the reciprocal of the condition number +*> is less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A and AF are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = +*> 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper +*> triangular part of A contains the upper triangular part of the +*> matrix A, and the strictly lower triangular part of A is not +*> referenced. If UPLO = 'L', the leading N-by-N lower triangular +*> part of A contains the lower triangular part of the matrix A, and +*> the strictly upper triangular part of A is not referenced. A is +*> not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = +*> 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored +*> form of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doublePOsolve +* +* ===================================================================== + SUBROUTINE DPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, SMIN, SMAX, + $ SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, DLA_PORPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLA_PORPVGRW +* .. +* .. External Subroutines .. + EXTERNAL DPOEQUB, DPOTRF, DPOTRS, DLACPY, DLAQSY, + $ XERBLA, DLASCL2, DPORFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in DPORFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until DPORFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPOEQUB( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL DLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization of A. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = DLA_PORPVGRW( UPLO, INFO, A, LDA, AF, LDAF, WORK ) + RETURN + ENDIF + END IF +* +* Compute the reciprocal growth factor RPVGRW. +* + RPVGRW = DLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, WORK ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, INFO ) + +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL DLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of DPOSVXX +* + END diff --git a/dspl/liblapack/SRC/dpotf2.f b/dspl/liblapack/SRC/dpotf2.f new file mode 100644 index 0000000..1fb60a9 --- /dev/null +++ b/dspl/liblapack/SRC/dpotf2.f @@ -0,0 +1,230 @@ +*> \brief \b DPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U , if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T *U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T *U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - DDOT( J-1, A( J, 1 ), LDA, A( J, 1 ), + $ LDA ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of DPOTF2 +* + END diff --git a/dspl/liblapack/SRC/dpotrf.f b/dspl/liblapack/SRC/dpotrf.f new file mode 100644 index 0000000..1fa75a4 --- /dev/null +++ b/dspl/liblapack/SRC/dpotrf.f @@ -0,0 +1,246 @@ +*> \brief \b DPOTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DPOTRF2, DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL DPOTRF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTRF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL DGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1, + $ J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ), + $ LDA, ONE, A( J, J+JB ), LDA ) + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL DSYRK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL DPOTRF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ), + $ LDA, ONE, A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of DPOTRF +* + END diff --git a/dspl/liblapack/SRC/dpotrf2.f b/dspl/liblapack/SRC/dpotrf2.f new file mode 100644 index 0000000..0d419c4 --- /dev/null +++ b/dspl/liblapack/SRC/dpotrf2.f @@ -0,0 +1,237 @@ +*> \brief \b DPOTRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A using the recursive algorithm. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = n/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> The subroutine calls itself to factor A11. Update and scale A21 +*> or A12, update A22 then calls itself to factor A22. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE DPOTRF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER N1, N2, IINFO +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DSYRK, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* N=1 case +* + IF( N.EQ.1 ) THEN +* +* Test for non-positive-definiteness +* + IF( A( 1, 1 ).LE.ZERO.OR.DISNAN( A( 1, 1 ) ) ) THEN + INFO = 1 + RETURN + END IF +* +* Factor +* + A( 1, 1 ) = SQRT( A( 1, 1 ) ) +* +* Use recursive code +* + ELSE + N1 = N/2 + N2 = N-N1 +* +* Factor A11 +* + CALL DPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U +* + IF( UPPER ) THEN +* +* Update and scale A12 +* + CALL DTRSM( 'L', 'U', 'T', 'N', N1, N2, ONE, + $ A( 1, 1 ), LDA, A( 1, N1+1 ), LDA ) +* +* Update and factor A22 +* + CALL DSYRK( UPLO, 'T', N2, N1, -ONE, A( 1, N1+1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* +* Compute the Cholesky factorization A = L*L**T +* + ELSE +* +* Update and scale A21 +* + CALL DTRSM( 'R', 'L', 'T', 'N', N2, N1, ONE, + $ A( 1, 1 ), LDA, A( N1+1, 1 ), LDA ) +* +* Update and factor A22 +* + CALL DSYRK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL DPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF + END IF + END IF + RETURN +* +* End of DPOTRF2 +* + END diff --git a/dspl/liblapack/SRC/dpotri.f b/dspl/liblapack/SRC/dpotri.f new file mode 100644 index 0000000..4d2dcb4 --- /dev/null +++ b/dspl/liblapack/SRC/dpotri.f @@ -0,0 +1,159 @@ +*> \brief \b DPOTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRI computes the inverse of a real symmetric positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, as computed by +*> DPOTRF. +*> On exit, the upper or lower triangle of the (symmetric) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAUUM, DTRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL DTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U) * inv(U)**T or inv(L)**T * inv(L). +* + CALL DLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of DPOTRI +* + END diff --git a/dspl/liblapack/SRC/dpotrs.f b/dspl/liblapack/SRC/dpotrs.f new file mode 100644 index 0000000..4cc5e74 --- /dev/null +++ b/dspl/liblapack/SRC/dpotrs.f @@ -0,0 +1,204 @@ +*> \brief \b DPOTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPOTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPOTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePOcomputational +* +* ===================================================================== + SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T *U. +* +* Solve U**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L**T. +* +* Solve L*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of DPOTRS +* + END diff --git a/dspl/liblapack/SRC/dppcon.f b/dspl/liblapack/SRC/dppcon.f new file mode 100644 index 0000000..0e6ab92 --- /dev/null +++ b/dspl/liblapack/SRC/dppcon.f @@ -0,0 +1,248 @@ +*> \brief \b DPPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite packed matrix using +*> the Cholesky factorization A = U**T*U or A = L*L**T computed by +*> DPPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the symmetric matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATPS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL DLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL DLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL DLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL DLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of DPPCON +* + END diff --git a/dspl/liblapack/SRC/dppequ.f b/dspl/liblapack/SRC/dppequ.f new file mode 100644 index 0000000..3563b59 --- /dev/null +++ b/dspl/liblapack/SRC/dppequ.f @@ -0,0 +1,238 @@ +*> \brief \b DPPEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A in packed storage and reduce +*> its condition number (with respect to the two-norm). S contains the +*> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +*> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +*> This choice of S puts the condition number of B within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AP( 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of DPPEQU +* + END diff --git a/dspl/liblapack/SRC/dpprfs.f b/dspl/liblapack/SRC/dpprfs.f new file mode 100644 index 0000000..1c068e2 --- /dev/null +++ b/dspl/liblapack/SRC/dpprfs.f @@ -0,0 +1,421 @@ +*> \brief \b DPPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, +* BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPPTRF/ZPPTRF, +*> packed columnwise in a linear array in the same format as A +*> (see AP). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DPPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DPPTRS, DSPMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DPPRFS +* + END diff --git a/dspl/liblapack/SRC/dppsv.f b/dspl/liblapack/SRC/dppsv.f new file mode 100644 index 0000000..cb70bab --- /dev/null +++ b/dspl/liblapack/SRC/dppsv.f @@ -0,0 +1,205 @@ +*> \brief DPPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL DPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DPPSV +* + END diff --git a/dspl/liblapack/SRC/dppsvx.f b/dspl/liblapack/SRC/dppsvx.f new file mode 100644 index 0000000..4fc84ea --- /dev/null +++ b/dspl/liblapack/SRC/dppsvx.f @@ -0,0 +1,493 @@ +*> \brief DPPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, +* X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFP contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AP and AFP will not +*> be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array, except if FACT = 'F' +*> and EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). The j-th column of A is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AFP is the factored +*> form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T * U or A = L * L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T * U or A = L * L**T of the equilibrated +*> matrix A (see the description of AP for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DLAQSP, DPPCON, DPPEQU, DPPRFS, + $ DPPTRF, DPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T * U or A = L * L**T. +* + CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL DPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DPPSVX +* + END diff --git a/dspl/liblapack/SRC/dpptrf.f b/dspl/liblapack/SRC/dpptrf.f new file mode 100644 index 0000000..c7f0c35 --- /dev/null +++ b/dspl/liblapack/SRC/dpptrf.f @@ -0,0 +1,240 @@ +*> \brief \b DPPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPTRF( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A stored in packed format. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T, in the same +*> storage format as A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP, + $ AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) - DDOT( J-1, AP( JC ), 1, AP( JC ), 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL DSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL DSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of DPPTRF +* + END diff --git a/dspl/liblapack/SRC/dpptri.f b/dspl/liblapack/SRC/dpptri.f new file mode 100644 index 0000000..8f16de0 --- /dev/null +++ b/dspl/liblapack/SRC/dpptri.f @@ -0,0 +1,188 @@ +*> \brief \b DPPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPTRI( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPTRI computes the inverse of a real symmetric positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by DPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor is stored in AP; +*> = 'L': Lower triangular factor is stored in AP. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, packed columnwise as +*> a linear array. The j-th column of U or L is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> +*> On exit, the upper or lower triangle of the (symmetric) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DTPMV, DTPTRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL DTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)**T. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL DSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL DSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)**T * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = DDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) + IF( J.LT.N ) + $ CALL DTPMV( 'Lower', 'Transpose', 'Non-unit', N-J, + $ AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of DPPTRI +* + END diff --git a/dspl/liblapack/SRC/dpptrs.f b/dspl/liblapack/SRC/dpptrs.f new file mode 100644 index 0000000..b4410a5 --- /dev/null +++ b/dspl/liblapack/SRC/dpptrs.f @@ -0,0 +1,203 @@ +*> \brief \b DPPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPPTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A in packed storage using the Cholesky +*> factorization A = U**T*U or A = L*L**T computed by DPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T * U. +* + DO 10 I = 1, NRHS +* +* Solve U**T *X = B, overwriting B with X. +* + CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L * L**T. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL DTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L**T *X = Y, overwriting B with X. +* + CALL DTPSV( 'Lower', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of DPPTRS +* + END diff --git a/dspl/liblapack/SRC/dpstf2.f b/dspl/liblapack/SRC/dpstf2.f new file mode 100644 index 0000000..53c4d34 --- /dev/null +++ b/dspl/liblapack/SRC/dpstf2.f @@ -0,0 +1,386 @@ +*> \brief \b DPSTF2 computes the Cholesky factorization with complete pivoting of a real symmetric positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPSTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPSTF2 computes the Cholesky factorization with complete +*> pivoting of a real symmetric positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**T * U , if UPLO = 'U', +*> P**T * A * P = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AJJ, DSTOP, DTEMP + INTEGER I, ITEMP, J, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME, DISNAN + EXTERNAL DLAMCH, LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPSTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + PVT = 1 + AJJ = A( PVT, PVT ) + DO I = 2, N + IF( A( I, I ).GT.AJJ ) THEN + PVT = I + AJJ = A( PVT, PVT ) + END IF + END DO + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 170 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + DSTOP = N * DLAMCH( 'Epsilon' ) * AJJ + ELSE + DSTOP = TOL + END IF +* +* Set first half of WORK to zero, holds dot products +* + DO 110 I = 1, N + WORK( I ) = 0 + 110 CONTINUE +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**T * U +* + DO 130 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 120 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + A( J-1, I )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 120 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 160 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL DSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL DSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + CALL DSWAP( PVT-J-1, A( J, J+1 ), LDA, A( J+1, PVT ), 1 ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J +* + IF( J.LT.N ) THEN + CALL DGEMV( 'Trans', J-1, N-J, -ONE, A( 1, J+1 ), LDA, + $ A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 130 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**T +* + DO 150 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 140 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + A( I, J-1 )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 140 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 160 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL DSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL DSWAP( N-PVT, A( PVT+1, J ), 1, A( PVT+1, PVT ), + $ 1 ) + CALL DSWAP( PVT-J-1, A( J+1, J ), 1, A( PVT, J+1 ), LDA ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J +* + IF( J.LT.N ) THEN + CALL DGEMV( 'No Trans', N-J, J-1, -ONE, A( J+1, 1 ), LDA, + $ A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 150 CONTINUE +* + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 170 + 160 CONTINUE +* +* Rank is number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 170 CONTINUE + RETURN +* +* End of DPSTF2 +* + END diff --git a/dspl/liblapack/SRC/dpstrf.f b/dspl/liblapack/SRC/dpstrf.f new file mode 100644 index 0000000..940f46d --- /dev/null +++ b/dspl/liblapack/SRC/dpstrf.f @@ -0,0 +1,445 @@ +*> \brief \b DPSTRF computes the Cholesky factorization with complete pivoting of a real symmetric positive semidefinite matrix. +* +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPSTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPSTRF computes the Cholesky factorization with complete +*> pivoting of a real symmetric positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**T * U , if UPLO = 'U', +*> P**T * A * P = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION AJJ, DSTOP, DTEMP + INTEGER I, ITEMP, J, JB, K, NB, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER ILAENV + LOGICAL LSAME, DISNAN + EXTERNAL DLAMCH, ILAENV, LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DPSTF2, DSCAL, DSWAP, DSYRK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPSTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get block size +* + NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DPSTF2( UPLO, N, A( 1, 1 ), LDA, PIV, RANK, TOL, WORK, + $ INFO ) + GO TO 200 +* + ELSE +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + PVT = 1 + AJJ = A( PVT, PVT ) + DO I = 2, N + IF( A( I, I ).GT.AJJ ) THEN + PVT = I + AJJ = A( PVT, PVT ) + END IF + END DO + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 200 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + DSTOP = N * DLAMCH( 'Epsilon' ) * AJJ + ELSE + DSTOP = TOL + END IF +* +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**T * U +* + DO 140 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 110 I = K, N + WORK( I ) = 0 + 110 CONTINUE +* + DO 130 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 120 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + A( J-1, I )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 120 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL DSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL DSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + CALL DSWAP( PVT-J-1, A( J, J+1 ), LDA, + $ A( J+1, PVT ), 1 ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'Trans', J-K, N-J, -ONE, A( K, J+1 ), + $ LDA, A( K, J ), 1, ONE, A( J, J+1 ), + $ LDA ) + CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 130 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL DSYRK( 'Upper', 'Trans', N-J+1, JB, -ONE, + $ A( K, J ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 140 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**T +* + DO 180 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 150 I = K, N + WORK( I ) = 0 + 150 CONTINUE +* + DO 170 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 160 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + A( I, J-1 )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 160 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL DSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL DSWAP( N-PVT, A( PVT+1, J ), 1, + $ A( PVT+1, PVT ), 1 ) + CALL DSWAP( PVT-J-1, A( J+1, J ), 1, A( PVT, J+1 ), + $ LDA ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL DGEMV( 'No Trans', N-J, J-K, -ONE, + $ A( J+1, K ), LDA, A( J, K ), LDA, ONE, + $ A( J+1, J ), 1 ) + CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 170 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL DSYRK( 'Lower', 'No Trans', N-J+1, JB, -ONE, + $ A( J, K ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 180 CONTINUE +* + END IF + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 200 + 190 CONTINUE +* +* Rank is the number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 200 CONTINUE + RETURN +* +* End of DPSTRF +* + END diff --git a/dspl/liblapack/SRC/dptcon.f b/dspl/liblapack/SRC/dptcon.f new file mode 100644 index 0000000..84c4ed7 --- /dev/null +++ b/dspl/liblapack/SRC/dptcon.f @@ -0,0 +1,221 @@ +*> \brief \b DPTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTCON computes the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite tridiagonal matrix +*> using the factorization A = L*D*L**T or A = U**T*D*U computed by +*> DPTTRF. +*> +*> Norm(inv(A)) is computed by a direct method, and the reciprocal of +*> the condition number is computed as +*> RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization of A, as computed by DPTTRF. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal factor +*> U or L from the factorization of A, as computed by DPTTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +*> 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The method used is described in Nicholas J. Higham, "Efficient +*> Algorithms for Computing the Condition Number of a Tridiagonal +*> Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**T. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 20 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)**T * x = b. +* + WORK( N ) = WORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, WORK, 1 ) + AINVNM = ABS( WORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DPTCON +* + END diff --git a/dspl/liblapack/SRC/dpteqr.f b/dspl/liblapack/SRC/dpteqr.f new file mode 100644 index 0000000..ecfc775 --- /dev/null +++ b/dspl/liblapack/SRC/dpteqr.f @@ -0,0 +1,261 @@ +*> \brief \b DPTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric positive definite tridiagonal matrix by first factoring the +*> matrix using DPTTRF, and then calling DBDSQR to compute the singular +*> values of the bidiagonal factor. +*> +*> This routine computes the eigenvalues of the positive definite +*> tridiagonal matrix to high relative accuracy. This means that if the +*> eigenvalues range over many orders of magnitude in size, then the +*> small eigenvalues and corresponding eigenvectors will be computed +*> more accurately than, for example, with the standard QR method. +*> +*> The eigenvectors of a full or band symmetric positive definite matrix +*> can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to +*> reduce this matrix to tridiagonal form. (The reduction to tridiagonal +*> form, however, may preclude the possibility of obtaining high +*> relative accuracy in the small eigenvalues of the original matrix, if +*> these eigenvalues range over many orders of magnitude.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvectors of original symmetric +*> matrix also. Array Z contains the orthogonal +*> matrix used to reduce the original matrix to +*> tridiagonal form. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal +*> matrix. +*> On normal exit, D contains the eigenvalues, in descending +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix used in the +*> reduction to tridiagonal form. +*> On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +*> original symmetric matrix; +*> if COMPZ = 'I', the orthonormal eigenvectors of the +*> tridiagonal matrix. +*> If INFO > 0 on exit, Z contains the eigenvectors associated +*> with only the stored eigenvalues. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> COMPZ = 'V' or 'I', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is: +*> <= N the Cholesky factorization of the matrix could +*> not be performed because the i-th principal minor +*> was not positive definite. +*> > N the SVD algorithm failed to converge; +*> if INFO = N+i, i off-diagonal elements of the +*> bidiagonal factor did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +* ===================================================================== + SUBROUTINE DPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DBDSQR, DLASET, DPTTRF, XERBLA +* .. +* .. Local Arrays .. + DOUBLE PRECISION C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Call DPTTRF to factor the matrix. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call DBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL DBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of DPTEQR +* + END diff --git a/dspl/liblapack/SRC/dptrfs.f b/dspl/liblapack/SRC/dptrfs.f new file mode 100644 index 0000000..ca038a8 --- /dev/null +++ b/dspl/liblapack/SRC/dptrfs.f @@ -0,0 +1,395 @@ +*> \brief \b DPTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, +* BERR, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ E( * ), EF( * ), FERR( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and tridiagonal, and provides error bounds and backward error +*> estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization computed by DPTTRF. +*> \endverbatim +*> +*> \param[in] EF +*> \verbatim +*> EF is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the factorization computed by DPTTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DPTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +* ===================================================================== + SUBROUTINE DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER COUNT, I, IX, J, NZ + DOUBLE PRECISION BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2, + $ SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IDAMAX, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 90 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( N+1 ) = BI - DX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( N+1 ) = BI - DX - EX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( N+I ) = BI - CX - DX - EX + WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX ) + 30 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N+N ) = BI - CX - DX + WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 40 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 40 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 50 CONTINUE + IX = IDAMAX( N, WORK, 1 ) + FERR( J ) = WORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**T. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 60 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) ) + 60 CONTINUE +* +* Solve D * M(L)**T * x = b. +* + WORK( N ) = WORK( N ) / DF( N ) + DO 70 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) ) + 70 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, WORK, 1 ) + FERR( J ) = FERR( J )*ABS( WORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 80 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 80 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 90 CONTINUE +* + RETURN +* +* End of DPTRFS +* + END diff --git a/dspl/liblapack/SRC/dptsv.f b/dspl/liblapack/SRC/dptsv.f new file mode 100644 index 0000000..019ed4f --- /dev/null +++ b/dspl/liblapack/SRC/dptsv.f @@ -0,0 +1,167 @@ +*> \brief DPTSV computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTSV computes the solution to a real system of linear equations +*> A*X = B, where A is an N-by-N symmetric positive definite tridiagonal +*> matrix, and X and B are N-by-NRHS matrices. +*> +*> A is factored as A = L*D*L**T, and the factored form of A is then +*> used to solve the system of equations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the factorization A = L*D*L**T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**T factorization of +*> A. (E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**T*D*U factorization of A.) +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the solution has not been +*> computed. The factorization has not been completed +*> unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTsolve +* +* ===================================================================== + SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DPTTRF, DPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DPTTRS( N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of DPTSV +* + END diff --git a/dspl/liblapack/SRC/dptsvx.f b/dspl/liblapack/SRC/dptsvx.f new file mode 100644 index 0000000..59f3445 --- /dev/null +++ b/dspl/liblapack/SRC/dptsvx.f @@ -0,0 +1,336 @@ +*> \brief DPTSVX computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ E( * ), EF( * ), FERR( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTSVX uses the factorization A = L*D*L**T to compute the solution +*> to a real system of linear equations A*X = B, where A is an N-by-N +*> symmetric positive definite tridiagonal matrix and X and B are +*> N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L +*> is a unit lower bidiagonal matrix and D is diagonal. The +*> factorization can also be regarded as having the form +*> A = U**T*D*U. +*> +*> 2. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, DF and EF contain the factored form of A. +*> D, E, DF, and EF will not be modified. +*> = 'N': The matrix A will be copied to DF and EF and +*> factored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**T factorization of A. +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in,out] EF +*> \verbatim +*> EF is DOUBLE PRECISION array, dimension (N-1) +*> If FACT = 'F', then EF is an input argument and on entry +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**T factorization of A. +*> If FACT = 'N', then EF is an output argument and on exit +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal condition number of the matrix A. If RCOND +*> is less than the machine precision (in particular, if +*> RCOND = 0), the matrix is singular to working precision. +*> This condition is indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in any +*> element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTsolve +* +* ===================================================================== + SUBROUTINE DPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DPTCON, DPTRFS, DPTTRF, DPTTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL DCOPY( N-1, E, 1, EF, 1 ) + CALL DPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANST( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DPTTRS( N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR, + $ WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DPTSVX +* + END diff --git a/dspl/liblapack/SRC/dpttrf.f b/dspl/liblapack/SRC/dpttrf.f new file mode 100644 index 0000000..33a67ad --- /dev/null +++ b/dspl/liblapack/SRC/dpttrf.f @@ -0,0 +1,211 @@ +*> \brief \b DPTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTTRF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTTRF computes the L*D*L**T factorization of a real symmetric +*> positive definite tridiagonal matrix A. The factorization may also +*> be regarded as having the form A = U**T*D*U. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**T factorization of A. +*> E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**T*D*U factorization of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite; if k < N, the factorization could not +*> be completed, while if k = N, the factorization was +*> completed, but D(N) <= 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +* ===================================================================== + SUBROUTINE DPTTRF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + DOUBLE PRECISION EI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EI = E( I+1 ) + E( I+1 ) = EI / D( I+1 ) + D( I+2 ) = D( I+2 ) - E( I+1 )*EI +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EI = E( I+2 ) + E( I+2 ) = EI / D( I+2 ) + D( I+3 ) = D( I+3 ) - E( I+2 )*EI +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EI = E( I+3 ) + E( I+3 ) = EI / D( I+3 ) + D( I+4 ) = D( I+4 ) - E( I+3 )*EI + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of DPTTRF +* + END diff --git a/dspl/liblapack/SRC/dpttrs.f b/dspl/liblapack/SRC/dpttrs.f new file mode 100644 index 0000000..34cbe9b --- /dev/null +++ b/dspl/liblapack/SRC/dpttrs.f @@ -0,0 +1,182 @@ +*> \brief \b DPTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTTRS solves a tridiagonal system of the form +*> A * X = B +*> using the L*D*L**T factorization of A computed by DPTTRF. D is a +*> diagonal matrix specified in the vector D, L is a unit bidiagonal +*> matrix whose subdiagonal is specified in the vector E, and X and B +*> are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the L*D*L**T factorization of A. E can also be regarded +*> as the superdiagonal of the unit bidiagonal factor U from the +*> factorization A = U**T*D*U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +* ===================================================================== + SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'DPTTRS', ' ', N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL DPTTS2( N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL DPTTS2( N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of DPTTRS +* + END diff --git a/dspl/liblapack/SRC/dptts2.f b/dspl/liblapack/SRC/dptts2.f new file mode 100644 index 0000000..99e212d --- /dev/null +++ b/dspl/liblapack/SRC/dptts2.f @@ -0,0 +1,158 @@ +*> \brief \b DPTTS2 solves a tridiagonal system of the form AX=B using the L D LH factorization computed by spttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DPTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DPTTS2( N, NRHS, D, E, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DPTTS2 solves a tridiagonal system of the form +*> A * X = B +*> using the L*D*L**T factorization of A computed by DPTTRF. D is a +*> diagonal matrix specified in the vector D, L is a unit bidiagonal +*> matrix whose subdiagonal is specified in the vector E, and X and B +*> are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the L*D*L**T factorization of A. E can also be regarded +*> as the superdiagonal of the unit bidiagonal factor U from the +*> factorization A = U**T*D*U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doublePTcomputational +* +* ===================================================================== + SUBROUTINE DPTTS2( N, NRHS, D, E, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL DSCAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL DSCAL( NRHS, 1.D0 / D( 1 ), B, LDB ) + RETURN + END IF +* +* Solve A * X = B using the factorization A = L*D*L**T, +* overwriting each right hand side vector with its solution. +* + DO 30 J = 1, NRHS +* +* Solve L * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 10 CONTINUE +* +* Solve D * L**T * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 20 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 20 CONTINUE + 30 CONTINUE +* + RETURN +* +* End of DPTTS2 +* + END diff --git a/dspl/liblapack/SRC/drscl.f b/dspl/liblapack/SRC/drscl.f new file mode 100644 index 0000000..cbd66dd --- /dev/null +++ b/dspl/liblapack/SRC/drscl.f @@ -0,0 +1,174 @@ +*> \brief \b DRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DRSCL( N, SA, SX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION SA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION SX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DRSCL multiplies an n-element real vector x by the real scalar 1/a. +*> This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is DOUBLE PRECISION +*> The scalar a which is used to divide each component of x. +*> SA must be >= 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is DOUBLE PRECISION array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector SX. +*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SA +* .. +* .. Array Arguments .. + DOUBLE PRECISION SX( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL DSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of DRSCL +* + END diff --git a/dspl/liblapack/SRC/dsb2st_kernels.f b/dspl/liblapack/SRC/dsb2st_kernels.f new file mode 100644 index 0000000..3bf126d --- /dev/null +++ b/dspl/liblapack/SRC/dsb2st_kernels.f @@ -0,0 +1,377 @@ +*> \brief \b DSB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSB2ST_KERNELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim +*> +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim +*> +*> \param[in] ST +*> \verbatim +*> ST is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] ED +*> \verbatim +*> ED is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is DOUBLE PRECISION array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. Workspace of size nb. +*> \endverbatim +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + DOUBLE PRECISION CTMP +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DLARFX, DLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = ( A( OFDPOS, ST ) ) + CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + LM = ED - ST + 1 + CALL DLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.3 ) THEN +* + LM = ED - ST + 1 + CALL DLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL DLARFX( 'Left', LN, LM, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS-NB, J1 ), LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = + $ ( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = ( A( DPOS-NB, J1 ) ) + CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL DLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + ENDIF +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + LM = ED - ST + 1 +* + CALL DLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.3 ) THEN + LM = ED - ST + 1 +* + CALL DLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL DLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL DLARFX( 'Left', LM, LN-1, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + ENDIF + ENDIF +* + RETURN +* +* END OF DSB2ST_KERNELS +* + END diff --git a/dspl/liblapack/SRC/dsbev.f b/dspl/liblapack/SRC/dsbev.f new file mode 100644 index 0000000..416ae22 --- /dev/null +++ b/dspl/liblapack/SRC/dsbev.f @@ -0,0 +1,287 @@ +*> \brief DSBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEV computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DSBTRD, DSCAL, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of DSBEV +* + END diff --git a/dspl/liblapack/SRC/dsbev_2stage.f b/dspl/liblapack/SRC/dsbev_2stage.f new file mode 100644 index 0000000..79991b6 --- /dev/null +++ b/dspl/liblapack/SRC/dsbev_2stage.f @@ -0,0 +1,380 @@ +*> \brief DSBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DSCAL, DSTEQR, DSTERF, XERBLA, + $ DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSBEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsbevd.f b/dspl/liblapack/SRC/dsbevd.f new file mode 100644 index 0000000..0fa15c0 --- /dev/null +++ b/dspl/liblapack/SRC/dsbevd.f @@ -0,0 +1,360 @@ +*> \brief DSBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> IF N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. +*> If JOBZ = 'V' and N > 2, LWORK must be at least +*> ( 1 + 5*N + 2*N**2 ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWRK2, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLASCL, DSBTRD, DSCAL, DSTEDC, + $ DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSBEVD +* + END diff --git a/dspl/liblapack/SRC/dsbevd_2stage.f b/dspl/liblapack/SRC/dsbevd_2stage.f new file mode 100644 index 0000000..859f87c --- /dev/null +++ b/dspl/liblapack/SRC/dsbevd_2stage.f @@ -0,0 +1,412 @@ +*> \brief DSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ LLWRK2 + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLASCL, DSCAL, DSTEDC, + $ DSTERF, XERBLA, DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = MAX( 2*N, N+LHTRD+LWTRD ) + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce band symmetric matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSBEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsbevx.f b/dspl/liblapack/SRC/dsbevx.f new file mode 100644 index 0000000..5e6d642 --- /dev/null +++ b/dspl/liblapack/SRC/dsbevx.f @@ -0,0 +1,543 @@ +*> \brief DSBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, +* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DSBTRD, DSCAL, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of DSBEVX +* + END diff --git a/dspl/liblapack/SRC/dsbevx_2stage.f b/dspl/liblapack/SRC/dsbevx_2stage.f new file mode 100644 index 0000000..93a07f1 --- /dev/null +++ b/dspl/liblapack/SRC/dsbevx_2stage.f @@ -0,0 +1,636 @@ +*> \brief DSBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, +* LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 7*N, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + 2*N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, + $ LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSB + EXTERNAL LSAME, DLAMCH, DLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DLASCL, DSCAL, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA, + $ DSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBEVX_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSBEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsbgst.f b/dspl/liblapack/SRC/dsbgst.f new file mode 100644 index 0000000..3adfeb9 --- /dev/null +++ b/dspl/liblapack/SRC/dsbgst.f @@ -0,0 +1,1434 @@ +*> \brief \b DSBGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, +* LDX, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBGST reduces a real symmetric-definite banded generalized +*> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +*> such that C has the same bandwidth as A. +*> +*> B must have been previously factorized as S**T*S by DPBSTF, using a +*> split Cholesky factorization. A is overwritten by C = X**T*A*X, where +*> X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the +*> bandwidth of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form the transformation matrix X; +*> = 'V': form X. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the transformed matrix X**T*A*X, stored in the same +*> format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in] BB +*> \verbatim +*> BB is DOUBLE PRECISION array, dimension (LDBB,N) +*> The banded factor S from the split Cholesky factorization of +*> B, as returned by DPBSTF, stored in the first KB+1 rows of +*> the array. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,N) +*> If VECT = 'V', the n-by-n matrix X. +*> If VECT = 'N', the array X is not referenced. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. +*> LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + DOUBLE PRECISION BII, RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGER, DLAR2V, DLARGV, DLARTG, DLARTV, DLASET, + $ DROT, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in DPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**T*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The cosines and sines of the rotations are stored in the array +* WORK. The cosines of the 1st set of rotations are stored in +* elements n+2:n+m-kb-1 and the sines of the 1st set in elements +* 2:m-kb-1; the cosines of the 2nd set are stored in elements +* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 20 J = I, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )*AB( K-I+KA1, I ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + + $ AB( KA1, I )*BB( J-I+KB1, I )* + $ BB( K-I+KB1, I ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL DLARTG( AB( K+1, I-K+KA ), RA1, + $ WORK( N+I-K+KA-M ), WORK( I-K+KA-M ), + $ RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 250 J = I, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*AB( I-K+1, K ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + + $ AB( 1, I )*BB( I-J+1, J )* + $ BB( I-K+1, K ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL DLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ WORK( N+J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2 ), WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 500 J = I1, I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )*AB( I-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + + $ AB( KA1, I )*BB( I-J+KB1, J )* + $ BB( I-K+KB1, K ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ), + $ LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL DLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, + $ WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 730 J = I1, I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*AB( K-I+1, I ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + + $ AB( 1, I )*BB( J-I+1, I )* + $ BB( K-I+1, I ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1, + $ X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL DLARTG( AB( KA1-K, I+K-KA ), RA1, + $ WORK( N+I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL DLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL DLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of DSBGST +* + END diff --git a/dspl/liblapack/SRC/dsbgv.f b/dspl/liblapack/SRC/dsbgv.f new file mode 100644 index 0000000..d82cdae --- /dev/null +++ b/dspl/liblapack/SRC/dsbgv.f @@ -0,0 +1,280 @@ +*> \brief \b DSBGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, +* LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +*> and banded, and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is DOUBLE PRECISION array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by DPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPBSTF, DSBGST, DSBTRD, DSTEQR, DSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF + RETURN +* +* End of DSBGV +* + END diff --git a/dspl/liblapack/SRC/dsbgvd.f b/dspl/liblapack/SRC/dsbgvd.f new file mode 100644 index 0000000..2a215fb --- /dev/null +++ b/dspl/liblapack/SRC/dsbgvd.f @@ -0,0 +1,372 @@ +*> \brief \b DSBGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, +* Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of the +*> form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and +*> banded, and B is also positive definite. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is DOUBLE PRECISION array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by DPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then DPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2, + $ LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DPBSTF, DSBGST, DSBTRD, DSTEDC, + $ DSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSBGVD +* + END diff --git a/dspl/liblapack/SRC/dsbgvx.f b/dspl/liblapack/SRC/dsbgvx.f new file mode 100644 index 0000000..eab5ebc --- /dev/null +++ b/dspl/liblapack/SRC/dsbgvx.f @@ -0,0 +1,522 @@ +*> \brief \b DSBGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, +* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, +* $ N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), +* $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +*> and banded, and B is also positive definite. Eigenvalues and +*> eigenvectors can be selected by specifying either all eigenvalues, +*> a range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is DOUBLE PRECISION array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by DPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ, N) +*> If JOBZ = 'V', the n-by-n matrix used in the reduction of +*> A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +*> and consequently C to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'N', +*> LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvalues that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> < 0 : if INFO = -i, the i-th argument had an illegal value +*> <= N: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in IFAIL. +*> > N : DPBSTF returned an error code; i.e., +*> if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DGEMV, DLACPY, DPBSTF, DSBGST, DSBTRD, + $ DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( WANTZ .AND. LDQ.LT.N ) ) THEN + INFO = -12 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -14 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -16 + END IF + END IF + END IF + IF( INFO.EQ.0) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, IINFO ) +* +* Reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or SSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, +* call DSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply transformation matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + DO 20 J = 1, M + CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of DSBGVX +* + END diff --git a/dspl/liblapack/SRC/dsbtrd.f b/dspl/liblapack/SRC/dsbtrd.f new file mode 100644 index 0000000..9ea0c22 --- /dev/null +++ b/dspl/liblapack/SRC/dsbtrd.f @@ -0,0 +1,641 @@ +*> \brief \b DSBTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSBTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSBTRD reduces a real symmetric band matrix A to symmetric +*> tridiagonal form T by an orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form Q; +*> = 'V': form Q; +*> = 'U': update a matrix X, by forming X*Q. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if VECT = 'U', then Q must contain an N-by-N +*> matrix X; if VECT = 'N' or 'V', then Q need not be set. +*> +*> On exit: +*> if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; +*> if VECT = 'U', Q contains the product X*Q; +*> if VECT = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by Linda Kaufman, Bell Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DLAR2V, DLARGV, DLARTG, DLARTV, DLASET, DROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The cosines and sines of the plane rotations are stored in the +* arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL DLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL DLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL DROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL DLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL DROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL DLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL DROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL DROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 100 I = 1, N - 1 + E( I ) = AB( KD, I+1 ) + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL DLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL DLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL DROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL DLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL DROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL DLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* DLARTV or DROT is used +* + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL DLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL DROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL DROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 220 I = 1, N - 1 + E( I ) = AB( 2, I ) + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of DSBTRD +* + END diff --git a/dspl/liblapack/SRC/dsfrk.f b/dspl/liblapack/SRC/dsfrk.f new file mode 100644 index 0000000..1fd1763 --- /dev/null +++ b/dspl/liblapack/SRC/dsfrk.f @@ -0,0 +1,544 @@ +*> \brief \b DSFRK performs a symmetric rank-k operation for matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSFRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, +* C ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER K, LDA, N +* CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for C in RFP Format. +*> +*> DSFRK performs one of the symmetric rank--k operations +*> +*> C := alpha*A*A**T + beta*C, +*> +*> or +*> +*> C := alpha*A**T*A + beta*C, +*> +*> where alpha and beta are real scalars, C is an n--by--n symmetric +*> matrix and A is an n--by--k matrix in the first case and a k--by--n +*> matrix in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'T': The Transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*A**T + beta*C. +*> +*> TRANS = 'T' or 't' C := alpha*A**T*A + beta*C. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrix A, and on entry with TRANS = 'T' +*> or 't', K specifies the number of rows of the matrix A. K +*> must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,ka) +*> where KA +*> is K when TRANS = 'N' or 'n', and is N otherwise. Before +*> entry with TRANS = 'N' or 'n', the leading N--by--K part of +*> the array A must contain the matrix A, otherwise the leading +*> K--by--N part of the array A must contain the matrix A. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (NT) +*> NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP +*> Format. RFP Format is described by TRANSR, UPLO and N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, + $ C ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER K, LDA, N + CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), C( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR, NISODD, NOTRANS + INTEGER INFO, NROWA, J, NK, N1, N2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DGEMM, DSYRK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) +* + IF( NOTRANS ) THEN + NROWA = N + ELSE + NROWA = K + END IF +* + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSFRK ', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* +* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not +* done (it is in DSYRK for example) and left in the general case. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* + IF( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ZERO ) ) THEN + DO J = 1, ( ( N*( N+1 ) ) / 2 ) + C( J ) = ZERO + END DO + RETURN + END IF +* +* C is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and NK. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + NK = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL DSYRK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL DSYRK( 'U', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL DGEMM( 'N', 'T', N2, N1, K, ALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( N1+1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' +* + CALL DSYRK( 'L', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL DSYRK( 'U', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL DGEMM( 'T', 'N', N2, N1, K, ALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( N1+1 ), N ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL DSYRK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL DSYRK( 'U', 'N', N2, K, ALPHA, A( N2, 1 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL DGEMM( 'N', 'T', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( N2, 1 ), LDA, BETA, C( 1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' +* + CALL DSYRK( 'L', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL DSYRK( 'U', 'T', N2, K, ALPHA, A( 1, N2 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL DGEMM( 'T', 'N', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, N2 ), LDA, BETA, C( 1 ), N ) +* + END IF +* + END IF +* + ELSE +* +* N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' +* + CALL DSYRK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL DSYRK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL DGEMM( 'N', 'T', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( N1+1, 1 ), LDA, BETA, + $ C( N1*N1+1 ), N1 ) +* + ELSE +* +* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' +* + CALL DSYRK( 'U', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL DSYRK( 'L', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL DGEMM( 'T', 'N', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, N1+1 ), LDA, BETA, + $ C( N1*N1+1 ), N1 ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' +* + CALL DSYRK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL DSYRK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL DGEMM( 'N', 'T', N2, N1, K, ALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), N2 ) +* + ELSE +* +* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' +* + CALL DSYRK( 'U', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL DSYRK( 'L', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL DGEMM( 'T', 'N', N2, N1, K, ALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), N2 ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL DSYRK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL DSYRK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL DGEMM( 'N', 'T', NK, NK, K, ALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( NK+2 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' +* + CALL DSYRK( 'L', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL DSYRK( 'U', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL DGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( NK+2 ), + $ N+1 ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL DSYRK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL DSYRK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL DGEMM( 'N', 'T', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, BETA, C( 1 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' +* + CALL DSYRK( 'L', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL DSYRK( 'U', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL DGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, BETA, C( 1 ), + $ N+1 ) +* + END IF +* + END IF +* + ELSE +* +* N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' +* + CALL DSYRK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL DSYRK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL DGEMM( 'N', 'T', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, BETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' +* + CALL DSYRK( 'U', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL DSYRK( 'L', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL DGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, BETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' +* + CALL DSYRK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL DSYRK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL DGEMM( 'N', 'T', NK, NK, K, ALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' +* + CALL DSYRK( 'U', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL DSYRK( 'L', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL DGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), NK ) +* + END IF +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DSFRK +* + END diff --git a/dspl/liblapack/SRC/dsgesv.f b/dspl/liblapack/SRC/dsgesv.f new file mode 100644 index 0000000..f47327d --- /dev/null +++ b/dspl/liblapack/SRC/dsgesv.f @@ -0,0 +1,433 @@ +*> \brief DSGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision with iterative refinement) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, +* SWORK, ITER, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL SWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSGESV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> DSGESV first attempts to factorize the matrix in SINGLE PRECISION +*> and use this factorization within an iterative refinement procedure +*> to produce a solution with DOUBLE PRECISION normwise backward error +*> quality (see below). If the approach fails the method switches to a +*> DOUBLE PRECISION factorization and solve. +*> +*> The iterative refinement is not going to be a winning strategy if +*> the ratio SINGLE PRECISION performance over DOUBLE PRECISION +*> performance is too small. A reasonable strategy should take the +*> number of right-hand sides and the size of the matrix into account. +*> This might be done with a call to ILAENV in the future. Up to now, we +*> always try iterative refinement. +*> +*> The iterative refinement process is stopped if +*> ITER > ITERMAX +*> or for all the RHS we have: +*> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX +*> where +*> o ITER is the number of the current iteration in the iterative +*> refinement process +*> o RNRM is the infinity-norm of the residual +*> o XNRM is the infinity-norm of the solution +*> o ANRM is the infinity-operator-norm of the matrix A +*> o EPS is the machine epsilon returned by DLAMCH('Epsilon') +*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 +*> respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, +*> dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, if iterative refinement has been successfully used +*> (INFO.EQ.0 and ITER.GE.0, see description below), then A is +*> unchanged, if double precision factorization has been used +*> (INFO.EQ.0 and ITER.LT.0, see description below), then the +*> array A contains the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> Corresponds either to the single precision factorization +*> (if INFO.EQ.0 and ITER.GE.0) or the double precision +*> factorization (if INFO.EQ.0 and ITER.LT.0). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N,NRHS) +*> This array is used to hold the residual vectors. +*> \endverbatim +*> +*> \param[out] SWORK +*> \verbatim +*> SWORK is REAL array, dimension (N*(N+NRHS)) +*> This array is used to use the single precision matrix and the +*> right-hand sides or solutions in single precision. +*> \endverbatim +*> +*> \param[out] ITER +*> \verbatim +*> ITER is INTEGER +*> < 0: iterative refinement has failed, double precision +*> factorization has been performed +*> -1 : the routine fell back to full precision for +*> implementation- or machine-specific reasons +*> -2 : narrowing the precision induced an overflow, +*> the routine fell back to full precision +*> -3 : failure of SGETRF +*> -31: stop the iterative refinement after the 30th +*> iterations +*> > 0: iterative refinement has been successfully used. +*> Returns the number of iterations +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is +*> exactly zero. The factorization has been completed, +*> but the factor U is exactly singular, so the solution +*> could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, + $ SWORK, ITER, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL SWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + LOGICAL DOITREF + PARAMETER ( DOITREF = .TRUE. ) +* + INTEGER ITERMAX + PARAMETER ( ITERMAX = 30 ) +* + DOUBLE PRECISION BWDMAX + PARAMETER ( BWDMAX = 1.0E+00 ) +* + DOUBLE PRECISION NEGONE, ONE + PARAMETER ( NEGONE = -1.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + INTEGER I, IITER, PTSA, PTSX + DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM +* +* .. External Subroutines .. + EXTERNAL DAXPY, DGEMM, DLACPY, DLAG2S, DGETRF, DGETRS, + $ SGETRF, SGETRS, SLAG2D, XERBLA +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANGE + EXTERNAL IDAMAX, DLAMCH, DLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + ITER = 0 +* +* Test the input parameters. +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSGESV', -INFO ) + RETURN + END IF +* +* Quick return if (N.EQ.0). +* + IF( N.EQ.0 ) + $ RETURN +* +* Skip single precision iterative refinement if a priori slower +* than double precision factorization. +* + IF( .NOT.DOITREF ) THEN + ITER = -1 + GO TO 40 + END IF +* +* Compute some constants. +* + ANRM = DLANGE( 'I', N, N, A, LDA, WORK ) + EPS = DLAMCH( 'Epsilon' ) + CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX +* +* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. +* + PTSA = 1 + PTSX = PTSA + N*N +* +* Convert B from double precision to single precision and store the +* result in SX. +* + CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Convert A from double precision to single precision and store the +* result in SA. +* + CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Compute the LU factorization of SA. +* + CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -3 + GO TO 40 + END IF +* +* Solve the system SA*SX = SB. +* + CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV, + $ SWORK( PTSX ), N, INFO ) +* +* Convert SX back to double precision +* + CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO ) +* +* Compute R = B - AX (R is WORK). +* + CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A, + $ LDA, X, LDX, ONE, WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=0 and return. +* + DO I = 1, NRHS + XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 10 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion. We are good to exit. +* + ITER = 0 + RETURN +* + 10 CONTINUE +* + DO 30 IITER = 1, ITERMAX +* +* Convert R (in WORK) from double precision to single precision +* and store the result in SX. +* + CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Solve the system SA*SX = SR. +* + CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV, + $ SWORK( PTSX ), N, INFO ) +* +* Convert SX back to double precision and update the current +* iterate. +* + CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO ) +* + DO I = 1, NRHS + CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 ) + END DO +* +* Compute R = B - AX (R is WORK). +* + CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, + $ A, LDA, X, LDX, ONE, WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=IITER>0 and return. +* + DO I = 1, NRHS + XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 20 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion, we are good to exit. +* + ITER = IITER +* + RETURN +* + 20 CONTINUE +* + 30 CONTINUE +* +* If we are at this place of the code, this is because we have +* performed ITER=ITERMAX iterations and never satisified the +* stopping criterion, set up the ITER flag accordingly and follow up +* on double precision routine. +* + ITER = -ITERMAX - 1 +* + 40 CONTINUE +* +* Single-precision iterative refinement failed to converge to a +* satisfactory solution, so we resort to double precision. +* + CALL DGETRF( N, N, A, LDA, IPIV, INFO ) +* + IF( INFO.NE.0 ) + $ RETURN +* + CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX ) + CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX, + $ INFO ) +* + RETURN +* +* End of DSGESV. +* + END diff --git a/dspl/liblapack/SRC/dspcon.f b/dspl/liblapack/SRC/dspcon.f new file mode 100644 index 0000000..b422f84 --- /dev/null +++ b/dspl/liblapack/SRC/dspcon.f @@ -0,0 +1,238 @@ +*> \brief \b DSPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric packed matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by DSPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL DSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSPCON +* + END diff --git a/dspl/liblapack/SRC/dspev.f b/dspl/liblapack/SRC/dspev.f new file mode 100644 index 0000000..f314279 --- /dev/null +++ b/dspl/liblapack/SRC/dspev.f @@ -0,0 +1,262 @@ +*> \brief DSPEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPEV computes all the eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A in packed storage. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DOPGTR, DSCAL, DSPTRD, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DOPGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of DSPEV +* + END diff --git a/dspl/liblapack/SRC/dspevd.f b/dspl/liblapack/SRC/dspevd.f new file mode 100644 index 0000000..5b99d75 --- /dev/null +++ b/dspl/liblapack/SRC/dspevd.f @@ -0,0 +1,337 @@ +*> \brief DSPEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPEVD computes all the eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A in packed storage. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DOPMTR, DSCAL, DSPTRD, DSTEDC, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IWORK( 1 ) = LIWMIN + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DOPMTR to multiply it by the +* Householder transformations represented in AP. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL DSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWORK, IWORK, LIWORK, INFO ) + CALL DOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSPEVD +* + END diff --git a/dspl/liblapack/SRC/dspevx.f b/dspl/liblapack/SRC/dspevx.f new file mode 100644 index 0000000..d66dc18 --- /dev/null +++ b/dspl/liblapack/SRC/dspevx.f @@ -0,0 +1,496 @@ +*> \brief DSPEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A in packed storage. Eigenvalues/vectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the selected eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DOPGTR, DOPMTR, DSCAL, DSPTRD, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = DLANSP( 'M', UPLO, N, AP, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + CALL DSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or DOPGTR and SSTEQR. If this fails +* for some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + CALL DOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of DSPEVX +* + END diff --git a/dspl/liblapack/SRC/dspgst.f b/dspl/liblapack/SRC/dspgst.f new file mode 100644 index 0000000..59cda68 --- /dev/null +++ b/dspl/liblapack/SRC/dspgst.f @@ -0,0 +1,274 @@ +*> \brief \b DSPGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), BP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPGST reduces a real symmetric-definite generalized eigenproblem +*> to standard form, using packed storage. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +*> +*> B must have been previously factorized as U**T*U or L*L**T by DPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**T*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] BP +*> \verbatim +*> BP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The triangular factor from the Cholesky factorization of B, +*> stored in the same format as A, as returned by DPPTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), BP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + DOUBLE PRECISION AJJ, AKK, BJJ, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSPMV, DSPR2, DTPMV, DTPSV, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + BJJ = BP( JJ ) + CALL DTPSV( UPLO, 'Transpose', 'Nonunit', J, BP, + $ AP( J1 ), 1 ) + CALL DSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE, + $ AP( J1 ), 1 ) + CALL DSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-DDOT( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL DSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL DTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL DTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL DSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL DSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**T *A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + DDOT( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL DSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL DSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ ONE, AP( JJ+1 ), 1 ) + CALL DTPMV( UPLO, 'Transpose', 'Non-unit', N-J+1, + $ BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DSPGST +* + END diff --git a/dspl/liblapack/SRC/dspgv.f b/dspl/liblapack/SRC/dspgv.f new file mode 100644 index 0000000..f7313ff --- /dev/null +++ b/dspl/liblapack/SRC/dspgv.f @@ -0,0 +1,277 @@ +*> \brief \b DSPGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPGV computes all the eigenvalues and, optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric, stored in packed format, +*> and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPPTRF or DSPEV returned an error code: +*> <= N: if INFO = i, DSPEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero. +*> > N: if INFO = n + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEV, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of DSPGV +* + END diff --git a/dspl/liblapack/SRC/dspgvd.f b/dspl/liblapack/SRC/dspgvd.f new file mode 100644 index 0000000..71b290b --- /dev/null +++ b/dspl/liblapack/SRC/dspgvd.f @@ -0,0 +1,364 @@ +*> \brief \b DSPGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be symmetric, stored in packed format, and B is also +*> positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPPTRF or DSPEVD returned an error code: +*> <= N: if INFO = i, DSPEVD failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEVD, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of BP. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) + LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) ) + LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T *y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSPGVD +* + END diff --git a/dspl/liblapack/SRC/dspgvx.f b/dspl/liblapack/SRC/dspgvx.f new file mode 100644 index 0000000..8619ef7 --- /dev/null +++ b/dspl/liblapack/SRC/dspgvx.f @@ -0,0 +1,417 @@ +*> \brief \b DSPGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +*> and B are assumed to be symmetric, stored in packed storage, and B +*> is also positive definite. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A and B are stored; +*> = 'L': Lower triangle of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix pencil (A,B). N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPPTRF or DSPEVX returned an error code: +*> <= N: if INFO = i, DSPEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPPTRF, DSPEVX, DSPGST, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -11 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, M + CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of DSPGVX +* + END diff --git a/dspl/liblapack/SRC/dsposv.f b/dspl/liblapack/SRC/dsposv.f new file mode 100644 index 0000000..4a85752 --- /dev/null +++ b/dspl/liblapack/SRC/dsposv.f @@ -0,0 +1,439 @@ +*> \brief DSPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, +* SWORK, ITER, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL SWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPOSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> DSPOSV first attempts to factorize the matrix in SINGLE PRECISION +*> and use this factorization within an iterative refinement procedure +*> to produce a solution with DOUBLE PRECISION normwise backward error +*> quality (see below). If the approach fails the method switches to a +*> DOUBLE PRECISION factorization and solve. +*> +*> The iterative refinement is not going to be a winning strategy if +*> the ratio SINGLE PRECISION performance over DOUBLE PRECISION +*> performance is too small. A reasonable strategy should take the +*> number of right-hand sides and the size of the matrix into account. +*> This might be done with a call to ILAENV in the future. Up to now, we +*> always try iterative refinement. +*> +*> The iterative refinement process is stopped if +*> ITER > ITERMAX +*> or for all the RHS we have: +*> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX +*> where +*> o ITER is the number of the current iteration in the iterative +*> refinement process +*> o RNRM is the infinity-norm of the residual +*> o XNRM is the infinity-norm of the solution +*> o ANRM is the infinity-operator-norm of the matrix A +*> o EPS is the machine epsilon returned by DLAMCH('Epsilon') +*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 +*> respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, +*> dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if iterative refinement has been successfully used +*> (INFO.EQ.0 and ITER.GE.0, see description below), then A is +*> unchanged, if double precision factorization has been used +*> (INFO.EQ.0 and ITER.LT.0, see description below), then the +*> array A contains the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N,NRHS) +*> This array is used to hold the residual vectors. +*> \endverbatim +*> +*> \param[out] SWORK +*> \verbatim +*> SWORK is REAL array, dimension (N*(N+NRHS)) +*> This array is used to use the single precision matrix and the +*> right-hand sides or solutions in single precision. +*> \endverbatim +*> +*> \param[out] ITER +*> \verbatim +*> ITER is INTEGER +*> < 0: iterative refinement has failed, double precision +*> factorization has been performed +*> -1 : the routine fell back to full precision for +*> implementation- or machine-specific reasons +*> -2 : narrowing the precision induced an overflow, +*> the routine fell back to full precision +*> -3 : failure of SPOTRF +*> -31: stop the iterative refinement after the 30th +*> iterations +*> > 0: iterative refinement has been successfully used. +*> Returns the number of iterations +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of (DOUBLE +*> PRECISION) A is not positive definite, so the +*> factorization could not be completed, and the solution +*> has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doublePOsolve +* +* ===================================================================== + SUBROUTINE DSPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, + $ SWORK, ITER, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL SWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + LOGICAL DOITREF + PARAMETER ( DOITREF = .TRUE. ) +* + INTEGER ITERMAX + PARAMETER ( ITERMAX = 30 ) +* + DOUBLE PRECISION BWDMAX + PARAMETER ( BWDMAX = 1.0E+00 ) +* + DOUBLE PRECISION NEGONE, ONE + PARAMETER ( NEGONE = -1.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + INTEGER I, IITER, PTSA, PTSX + DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM +* +* .. External Subroutines .. + EXTERNAL DAXPY, DSYMM, DLACPY, DLAT2S, DLAG2S, SLAG2D, + $ SPOTRF, SPOTRS, DPOTRF, DPOTRS, XERBLA +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANSY + LOGICAL LSAME + EXTERNAL IDAMAX, DLAMCH, DLANSY, LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + ITER = 0 +* +* Test the input parameters. +* + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPOSV', -INFO ) + RETURN + END IF +* +* Quick return if (N.EQ.0). +* + IF( N.EQ.0 ) + $ RETURN +* +* Skip single precision iterative refinement if a priori slower +* than double precision factorization. +* + IF( .NOT.DOITREF ) THEN + ITER = -1 + GO TO 40 + END IF +* +* Compute some constants. +* + ANRM = DLANSY( 'I', UPLO, N, A, LDA, WORK ) + EPS = DLAMCH( 'Epsilon' ) + CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX +* +* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. +* + PTSA = 1 + PTSX = PTSA + N*N +* +* Convert B from double precision to single precision and store the +* result in SX. +* + CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Convert A from double precision to single precision and store the +* result in SA. +* + CALL DLAT2S( UPLO, N, A, LDA, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Compute the Cholesky factorization of SA. +* + CALL SPOTRF( UPLO, N, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -3 + GO TO 40 + END IF +* +* Solve the system SA*SX = SB. +* + CALL SPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N, + $ INFO ) +* +* Convert SX back to double precision +* + CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO ) +* +* Compute R = B - AX (R is WORK). +* + CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL DSYMM( 'Left', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE, + $ WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=0 and return. +* + DO I = 1, NRHS + XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 10 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion. We are good to exit. +* + ITER = 0 + RETURN +* + 10 CONTINUE +* + DO 30 IITER = 1, ITERMAX +* +* Convert R (in WORK) from double precision to single precision +* and store the result in SX. +* + CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Solve the system SA*SX = SR. +* + CALL SPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N, + $ INFO ) +* +* Convert SX back to double precision and update the current +* iterate. +* + CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO ) +* + DO I = 1, NRHS + CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 ) + END DO +* +* Compute R = B - AX (R is WORK). +* + CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL DSYMM( 'L', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE, + $ WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=IITER>0 and return. +* + DO I = 1, NRHS + XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 20 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion, we are good to exit. +* + ITER = IITER +* + RETURN +* + 20 CONTINUE +* + 30 CONTINUE +* +* If we are at this place of the code, this is because we have +* performed ITER=ITERMAX iterations and never satisified the +* stopping criterion, set up the ITER flag accordingly and follow +* up on double precision routine. +* + ITER = -ITERMAX - 1 +* + 40 CONTINUE +* +* Single-precision iterative refinement failed to converge to a +* satisfactory solution, so we resort to double precision. +* + CALL DPOTRF( UPLO, N, A, LDA, INFO ) +* + IF( INFO.NE.0 ) + $ RETURN +* + CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX ) + CALL DPOTRS( UPLO, N, NRHS, A, LDA, X, LDX, INFO ) +* + RETURN +* +* End of DSPOSV. +* + END diff --git a/dspl/liblapack/SRC/dsprfs.f b/dspl/liblapack/SRC/dsprfs.f new file mode 100644 index 0000000..9ad5a80 --- /dev/null +++ b/dspl/liblapack/SRC/dsprfs.f @@ -0,0 +1,431 @@ +*> \brief \b DSPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by DSPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DSPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DSPMV, DSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DSPRFS +* + END diff --git a/dspl/liblapack/SRC/dspsv.f b/dspl/liblapack/SRC/dspsv.f new file mode 100644 index 0000000..e969439 --- /dev/null +++ b/dspl/liblapack/SRC/dspsv.f @@ -0,0 +1,224 @@ +*> \brief DSPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is symmetric and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by DSPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSPTRF, DSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL DSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of DSPSV +* + END diff --git a/dspl/liblapack/SRC/dspsvx.f b/dspl/liblapack/SRC/dspsvx.f new file mode 100644 index 0000000..62df170 --- /dev/null +++ b/dspl/liblapack/SRC/dspsvx.f @@ -0,0 +1,385 @@ +*> \brief DSPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +*> A = L*D*L**T to compute the solution to a real system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form of +*> A. AP, AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by DSPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by DSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANSP + EXTERNAL LSAME, DLAMCH, DLANSP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DSPCON, DSPRFS, DSPTRF, DSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL DSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of DSPSVX +* + END diff --git a/dspl/liblapack/SRC/dsptrd.f b/dspl/liblapack/SRC/dsptrd.f new file mode 100644 index 0000000..082f814 --- /dev/null +++ b/dspl/liblapack/SRC/dsptrd.f @@ -0,0 +1,300 @@ +*> \brief \b DSPTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPTRD reduces a real symmetric matrix A stored in packed form to +*> symmetric tridiagonal form T by an orthogonal similarity +*> transformation: Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +*> overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +*> overwriting A(i+2:n,i), and tau is stored in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, + $ HALF = 1.0D0 / 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + DOUBLE PRECISION ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DSPMV, DSPR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(1:i-1,i+1) +* + CALL DLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI ) + E( I ) = AP( I1+I-1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL DSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y**T *v) * v +* + ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, AP( I1 ), 1 ) + CALL DAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + AP( I1+I-1 ) = E( I ) + END IF + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(i+2:n,i) +* + CALL DLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI ) + E( I ) = AP( II+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL DSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y**T *v) * v +* + ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL DAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + AP( II+1 ) = E( I ) + END IF + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of DSPTRD +* + END diff --git a/dspl/liblapack/SRC/dsptrf.f b/dspl/liblapack/SRC/dsptrf.f new file mode 100644 index 0000000..9158ff1 --- /dev/null +++ b/dspl/liblapack/SRC/dsptrf.f @@ -0,0 +1,616 @@ +*> \brief \b DSPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPTRF computes the factorization of a real symmetric matrix A stored +*> in packed format using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> J. Lewis, Boeing Computer Services Company +*> +* ===================================================================== + SUBROUTINE DSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSPR, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, AP( KC ), 1 ) + COLMAX = ABS( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL DSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = ONE / AP( KC+K-1 ) + CALL DSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = ABS( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = ONE / AP( KC ) + CALL DSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL DSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) +* + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE +* + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 +* + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of DSPTRF +* + END diff --git a/dspl/liblapack/SRC/dsptri.f b/dspl/liblapack/SRC/dsptri.f new file mode 100644 index 0000000..e68effa --- /dev/null +++ b/dspl/liblapack/SRC/dsptri.f @@ -0,0 +1,401 @@ +*> \brief \b DSPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPTRI computes the inverse of a real symmetric indefinite matrix +*> A in packed storage using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by DSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSPMV, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DDOT( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DDOT( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ DDOT( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL DCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ DDOT( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL DSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ DDOT( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL DCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ DDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of DSPTRI +* + END diff --git a/dspl/liblapack/SRC/dsptrs.f b/dspl/liblapack/SRC/dsptrs.f new file mode 100644 index 0000000..17f8c6a --- /dev/null +++ b/dspl/liblapack/SRC/dsptrs.f @@ -0,0 +1,450 @@ +*> \brief \b DSPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSPTRS solves a system of linear equations A*X = B with a real +*> symmetric matrix A stored in packed format using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by DSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL DGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL DGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL DGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL DGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of DSPTRS +* + END diff --git a/dspl/liblapack/SRC/dstebz.f b/dspl/liblapack/SRC/dstebz.f new file mode 100644 index 0000000..e41279e --- /dev/null +++ b/dspl/liblapack/SRC/dstebz.f @@ -0,0 +1,771 @@ +*> \brief \b DSTEBZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEBZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, +* M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ORDER, RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEBZ computes the eigenvalues of a symmetric tridiagonal +*> matrix T. The user may ask for all eigenvalues, all eigenvalues +*> in the half-open interval (VL, VU], or the IL-th through IU-th +*> eigenvalues. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] ORDER +*> \verbatim +*> ORDER is CHARACTER*1 +*> = 'B': ("By Block") the eigenvalues will be grouped by +*> split-off block (see IBLOCK, ISPLIT) and +*> ordered from smallest to largest within +*> the block. +*> = 'E': ("Entire matrix") +*> the eigenvalues for the entire matrix +*> will be ordered from smallest to +*> largest. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute tolerance for the eigenvalues. An eigenvalue +*> (or cluster) is considered to be located if it has been +*> determined to lie in an interval whose width is ABSTOL or +*> less. If ABSTOL is less than or equal to zero, then ULP*|T| +*> will be used, where |T| means the 1-norm of T. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The actual number of eigenvalues found. 0 <= M <= N. +*> (See also the description of INFO=2,3.) +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of diagonal blocks in the matrix T. +*> 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On exit, the first M elements of W will contain the +*> eigenvalues. (DSTEBZ may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> At each row/column j where E(j) is zero or small, the +*> matrix T is considered to split into a block diagonal +*> matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +*> block (from 1 to the number of blocks) the eigenvalue W(i) +*> belongs. (DSTEBZ may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> (Only the first NSPLIT elements will actually be used, but +*> since the user cannot know a priori what value NSPLIT will +*> have, N words must be reserved for ISPLIT.) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: some or all of the eigenvalues failed to converge or +*> were not computed: +*> =1 or 3: Bisection failed to converge for some +*> eigenvalues; these eigenvalues are flagged by a +*> negative block number. The effect is that the +*> eigenvalues may not be as accurate as the +*> absolute and relative tolerances. This is +*> generally caused by unexpectedly inaccurate +*> arithmetic. +*> =2 or 3: RANGE='I' only: Not all of the eigenvalues +*> IL:IU were found. +*> Effect: M < IU+1-IL +*> Cause: non-monotonic arithmetic, causing the +*> Sturm sequence to be non-monotonic. +*> Cure: recalculate, using RANGE='A', and pick +*> out eigenvalues IL:IU. In some cases, +*> increasing the PARAMETER "FUDGE" may +*> make things work. +*> = 4: RANGE='I', and the Gershgorin interval +*> initially used was too small. No eigenvalues +*> were computed. +*> Probable cause: your machine has sloppy +*> floating-point arithmetic. +*> Cure: Increase the PARAMETER "FUDGE", +*> recompile, and try again. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> RELFAC DOUBLE PRECISION, default = 2.0e0 +*> The relative tolerance. An interval (a,b] lies within +*> "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), +*> where "ulp" is the machine precision (distance from 1 to +*> the next larger floating point number.) +*> +*> FUDGE DOUBLE PRECISION, default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. Ideally, +*> a value of 1 should work, but on machines with sloppy +*> arithmetic, this needs to be larger. The default for +*> publicly released versions should be large enough to handle +*> the worst machine around. Note that this has no effect +*> on accuracy of the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, + $ M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ HALF = 1.0D0 / TWO ) + DOUBLE PRECISION FUDGE, RELFAC + PARAMETER ( FUDGE = 2.1D0, RELFAC = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX, + $ ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL, + $ NWU + DOUBLE PRECISION ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN, + $ TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, ILAENV, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLAEBZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = 1 + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = 2 + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = 3 + ELSE + IRANGE = 0 + END IF +* +* Decode ORDER +* + IF( LSAME( ORDER, 'B' ) ) THEN + IORDER = 2 + ELSE IF( LSAME( ORDER, 'E' ) ) THEN + IORDER = 1 + ELSE + IORDER = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( IORDER.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.2 ) THEN + IF( VL.GE.VU ) + $ INFO = -5 + ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -6 + ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) + $ THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEBZ', -INFO ) + RETURN + END IF +* +* Initialize error flags +* + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Simplifications: +* + IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) + $ IRANGE = 1 +* +* Get machine constants +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. +* + SAFEMN = DLAMCH( 'S' ) + ULP = DLAMCH( 'P' ) + RTOLI = ULP*RELFAC + NB = ILAENV( 1, 'DSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) + $ NB = 0 +* +* Special Case when N=1 +* + IF( N.EQ.1 ) THEN + NSPLIT = 1 + ISPLIT( 1 ) = 1 + IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN + M = 0 + ELSE + W( 1 ) = D( 1 ) + IBLOCK( 1 ) = 1 + M = 1 + END IF + RETURN + END IF +* +* Compute Splitting Points +* + NSPLIT = 1 + WORK( N ) = ZERO + PIVMIN = ONE +* + DO 10 J = 2, N + TMP1 = E( J-1 )**2 + IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN + ISPLIT( NSPLIT ) = J - 1 + NSPLIT = NSPLIT + 1 + WORK( J-1 ) = ZERO + ELSE + WORK( J-1 ) = TMP1 + PIVMIN = MAX( PIVMIN, TMP1 ) + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N + PIVMIN = PIVMIN*SAFEMN +* +* Compute Interval and ATOLI +* + IF( IRANGE.EQ.3 ) THEN +* +* RANGE='I': Compute the interval containing eigenvalues +* IL through IU. +* +* Compute Gershgorin interval for entire (split) matrix +* and use it as the initial interval +* + GU = D( 1 ) + GL = D( 1 ) + TMP1 = ZERO +* + DO 20 J = 1, N - 1 + TMP2 = SQRT( WORK( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 20 CONTINUE +* + GU = MAX( GU, D( N )+TMP1 ) + GL = MIN( GL, D( N )-TMP1 ) + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* +* Compute Iteration parameters +* + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, + $ WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) +* + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + ELSE +* +* RANGE='A' or 'V' -- Set ATOLI +* + TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( D( N ) )+ABS( E( N-1 ) ) ) +* + DO 30 J = 2, N - 1 + TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+ + $ ABS( E( J ) ) ) + 30 CONTINUE +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.EQ.2 ) THEN + WL = VL + WU = VU + ELSE + WL = ZERO + WU = ZERO + END IF + END IF +* +* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU +* + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JB = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JB ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* +* Special Case -- IN=1 +* + IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE. + $ D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + IBLOCK( M ) = JB + END IF + ELSE +* +* General Case -- IN > 1 +* +* Compute Gershgorin Interval +* and use it as the initial interval +* + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO +* + DO 40 J = IBEGIN, IEND - 1 + TMP2 = ABS( E( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 40 CONTINUE +* + GU = MAX( GU, D( IEND )+TMP1 ) + GL = MIN( GL, D( IEND )-TMP1 ) + BNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN + GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN +* +* Compute ATOLI for the current submatrix +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) ) + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF +* +* Set Up Initial Interval +* + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) +* +* Compute Eigenvalues +* + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* +* Copy Eigenvalues Into W and IBLOCK +* Use -JB for block number for unconverged eigenvalues. +* + DO 60 J = 1, IOUT + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* +* Flag non-convergence. +* + IF( J.GT.IOUT-IINFO ) THEN + NCNVRG = .TRUE. + IB = -JB + ELSE + IB = JB + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE +* +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. +* + IF( IRANGE.EQ.3 ) THEN + IM = 0 + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN + DO 80 JE = 1, M + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* +* Code to deal with effects of bad arithmetic: +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by simply finding the smallest/largest +* eigenvalue(s). +* +* (If N(w) is monotone non-decreasing, this should never +* happen.) +* + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN +* + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* +* If ORDER='B', do nothing -- the eigenvalues are already sorted +* by block. +* If ORDER='E', sort the eigenvalues from smallest to largest +* + IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE +* + IF( IE.NE.0 ) THEN + ITMP1 = IBLOCK( IE ) + W( IE ) = W( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + W( JE ) = TMP1 + IBLOCK( JE ) = ITMP1 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of DSTEBZ +* + END diff --git a/dspl/liblapack/SRC/dstedc.f b/dspl/liblapack/SRC/dstedc.f new file mode 100644 index 0000000..61b44bc --- /dev/null +++ b/dspl/liblapack/SRC/dstedc.f @@ -0,0 +1,482 @@ +*> \brief \b DSTEDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEDC computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> The eigenvectors of a full or band real symmetric matrix can also be +*> found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See DLAED3 for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> = 'V': Compute eigenvectors of original dense symmetric +*> matrix also. On entry, Z contains the orthogonal +*> matrix used to reduce the original matrix to +*> tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the subdiagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LWORK must be at least +*> ( 1 + 3*N + 2*N*lg N + 4*N**2 ), +*> where lg( N ) = smallest integer k such +*> that 2**k >= N. +*> If COMPZ = 'I' and N > 1 then LWORK must be at least +*> ( 1 + 4*N + N**2 ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LWORK need +*> only be max(1,2*(N-1)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LIWORK must be at least +*> ( 6 + 6*N + 5*N*lg N ). +*> If COMPZ = 'I' and N > 1 then LIWORK must be at least +*> ( 3 + 5*N ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LIWORK +*> need only be 1. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FINISH, I, ICOMPZ, II, J, K, LGN, LIWMIN, + $ LWMIN, M, SMLSIZ, START, STOREZ, STRTRW + DOUBLE PRECISION EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLAED0, DLASCL, DLASET, DLASRT, + $ DSTEQR, DSTERF, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MOD, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. + $ ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + SMLSIZ = ILAENV( 9, 'DSTEDC', ' ', 0, 0, 0, 0 ) + IF( N.LE.1 .OR. ICOMPZ.EQ.0 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( N.LE.SMLSIZ ) THEN + LIWMIN = 1 + LWMIN = 2*( N - 1 ) + ELSE + LGN = INT( LOG( DBLE( N ) )/LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT. LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT. LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEDC', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures DSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. If the conditional clause is removed, then +* information on the size of workspace needs to be changed. +* +* If COMPZ = 'N', use DSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + GO TO 50 + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN +* + CALL DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* + ELSE +* +* If COMPZ = 'V', the Z matrix must be stored elsewhere for later +* use. +* + IF( ICOMPZ.EQ.1 ) THEN + STOREZ = 1 + N*N + ELSE + STOREZ = 1 + END IF +* + IF( ICOMPZ.EQ.2 ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) + END IF +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ GO TO 50 +* + EPS = DLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 10 CONTINUE + IF( START.LE.N ) THEN +* +* Let FINISH be the position of the next subdiagonal entry +* such that E( FINISH ) <= TINY or FINISH = N if no such +* subdiagonal exists. The matrix identified by the elements +* between START and FINISH constitutes an independent +* sub-problem. +* + FINISH = START + 20 CONTINUE + IF( FINISH.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( FINISH ) ) )* + $ SQRT( ABS( D( FINISH+1 ) ) ) + IF( ABS( E( FINISH ) ).GT.TINY ) THEN + FINISH = FINISH + 1 + GO TO 20 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = FINISH - START + 1 + IF( M.EQ.1 ) THEN + START = FINISH + 1 + GO TO 10 + END IF + IF( M.GT.SMLSIZ ) THEN +* +* Scale. +* + ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + IF( ICOMPZ.EQ.1 ) THEN + STRTRW = 1 + ELSE + STRTRW = START + END IF + CALL DLAED0( ICOMPZ, N, M, D( START ), E( START ), + $ Z( STRTRW, START ), LDZ, WORK( 1 ), N, + $ WORK( STOREZ ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + GO TO 50 + END IF +* +* Scale back. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + IF( ICOMPZ.EQ.1 ) THEN +* +* Since QR won't update a Z matrix which is larger than +* the length of D, we must solve the sub-problem in a +* workspace and then multiply back into Z. +* + CALL DSTEQR( 'I', M, D( START ), E( START ), WORK, M, + $ WORK( M*M+1 ), INFO ) + CALL DLACPY( 'A', N, M, Z( 1, START ), LDZ, + $ WORK( STOREZ ), N ) + CALL DGEMM( 'N', 'N', N, M, M, ONE, + $ WORK( STOREZ ), N, WORK, M, ZERO, + $ Z( 1, START ), LDZ ) + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL DSTEQR( 'I', M, D( START ), E( START ), + $ Z( START, START ), LDZ, WORK, INFO ) + ELSE + CALL DSTERF( M, D( START ), E( START ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + INFO = START*( N+1 ) + FINISH + GO TO 50 + END IF + END IF +* + START = FINISH + 1 + GO TO 10 + END IF +* +* endwhile +* + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 40 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 30 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 30 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 40 CONTINUE + END IF + END IF +* + 50 CONTINUE + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSTEDC +* + END diff --git a/dspl/liblapack/SRC/dstegr.f b/dspl/liblapack/SRC/dstegr.f new file mode 100644 index 0000000..6a8c43f --- /dev/null +++ b/dspl/liblapack/SRC/dstegr.f @@ -0,0 +1,302 @@ +*> \brief \b DSTEGR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEGR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEGR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> DSTEGR is a compatibility wrapper around the improved DSTEMR routine. +*> See DSTEMR for further details. +*> +*> One important change is that the ABSTOL parameter no longer provides any +*> benefit and hence is no longer used. +*> +*> Note : DSTEGR and DSTEMR work only on machines which follow +*> IEEE-754 floating-point standard in their handling of infinities and +*> NaNs. Normal execution may create these exceptiona values and hence +*> may abort due to a floating point exception in environments which +*> do not conform to the IEEE-754 standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> Unused. Was the absolute error tolerance for the +*> eigenvalues/eigenvectors in previous versions. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in DLARRE, +*> if INFO = 2X, internal error in DLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by DLARRE or +*> DLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE DSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL TRYRAC +* .. +* .. External Subroutines .. + EXTERNAL DSTEMR +* .. +* .. Executable Statements .. + INFO = 0 + TRYRAC = .FALSE. + + CALL DSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, N, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* End of DSTEGR +* + END diff --git a/dspl/liblapack/SRC/dstein.f b/dspl/liblapack/SRC/dstein.f new file mode 100644 index 0000000..fb1e8b9 --- /dev/null +++ b/dspl/liblapack/SRC/dstein.f @@ -0,0 +1,453 @@ +*> \brief \b DSTEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), +* $ IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEIN computes the eigenvectors of a real symmetric tridiagonal +*> matrix T corresponding to specified eigenvalues, using inverse +*> iteration. +*> +*> The maximum number of iterations allowed for each eigenvector is +*> specified by an internal parameter MAXITS (currently set to 5). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix +*> T, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of eigenvectors to be found. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements of W contain the eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block. ( The output array +*> W from DSTEBZ with ORDER = 'B' is expected here. ) +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The submatrix indices associated with the corresponding +*> eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +*> the first submatrix from the top, =2 if W(i) belongs to +*> the second submatrix, etc. ( The output array IBLOCK +*> from DSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> ( The output array ISPLIT from DSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, M) +*> The computed eigenvectors. The eigenvector associated +*> with the eigenvalue W(i) is stored in the i-th column of +*> Z. Any vector which fails to converge is set to its current +*> iterate after MAXITS iterations. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> On normal exit, all elements of IFAIL are zero. +*> If one or more eigenvectors fail to converge after +*> MAXITS iterations, then their indices are stored in +*> array IFAIL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in MAXITS iterations. Their indices are stored in +*> array IFAIL. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXITS INTEGER, default = 5 +*> The maximum number of iterations performed. +*> +*> EXTRA INTEGER, default = 2 +*> The number of iterations performed after norm growth +*> criterion is satisfied, should be at least 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1, + $ ODM3 = 1.0D-3, ODM1 = 1.0D-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, NBLK, NRMCHK + DOUBLE PRECISION DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, TOL, XJ, XJM, ZTR +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH, DNRM2 + EXTERNAL IDAMAX, DDOT, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + EPS = DLAMCH( 'Precision' ) +* +* Initialize seed for random number generator DLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 160 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = J1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + DTPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 150 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 160 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 120 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 100 +* +* Normalize and scale the righthand side vector Pb. +* + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ ABS( WORK( INDRV1+JMAX ) ) + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 90 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 80 I = GPIND, J - 1 + ZTR = -DDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), + $ 1 ) + CALL DAXPY( BLKSIZ, ZTR, Z( B1, I ), 1, + $ WORK( INDRV1+1 ), 1 ) + 80 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 90 CONTINUE + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.DTPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 110 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 100 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 110 CONTINUE + SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 120 CONTINUE + DO 130 I = 1, N + Z( I, J ) = ZERO + 130 CONTINUE + DO 140 I = 1, BLKSIZ + Z( B1+I-1, J ) = WORK( INDRV1+I ) + 140 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 150 CONTINUE + 160 CONTINUE +* + RETURN +* +* End of DSTEIN +* + END diff --git a/dspl/liblapack/SRC/dstemr.f b/dspl/liblapack/SRC/dstemr.f new file mode 100644 index 0000000..a1a8e34 --- /dev/null +++ b/dspl/liblapack/SRC/dstemr.f @@ -0,0 +1,777 @@ +*> \brief \b DSTEMR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* LOGICAL TRYRAC +* INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N +* DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEMR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> Depending on the number of desired eigenvalues, these are computed either +*> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are +*> computed by the use of various suitable L D L^T factorizations near clusters +*> of close eigenvalues (referred to as RRRs, Relatively Robust +*> Representations). An informal sketch of the algorithm follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> For more details, see: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> Further Details +*> 1.DSTEMR works only on machines which follow IEEE-754 +*> floating-point standard in their handling of infinities and NaNs. +*> This permits the use of efficient inner loops avoiding a check for +*> zero divisors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and can be computed with a workspace +*> query by setting NZC = -1, see below. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[in] NZC +*> \verbatim +*> NZC is INTEGER +*> The number of eigenvectors to be held in the array Z. +*> If RANGE = 'A', then NZC >= max(1,N). +*> If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. +*> If RANGE = 'I', then NZC >= IU-IL+1. +*> If NZC = -1, then a workspace query is assumed; the +*> routine calculates the number of columns of the array Z that +*> are needed to hold the eigenvectors. +*> This value is returned as the first entry of the Z array, and +*> no error message related to NZC is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[in,out] TRYRAC +*> \verbatim +*> TRYRAC is LOGICAL +*> If TRYRAC.EQ..TRUE., indicates that the code should check whether +*> the tridiagonal matrix defines its eigenvalues to high relative +*> accuracy. If so, the code uses relative-accuracy preserving +*> algorithms that might be (a bit) slower depending on the matrix. +*> If the matrix does not define its eigenvalues to high relative +*> accuracy, the code can uses possibly faster algorithms. +*> If TRYRAC.EQ..FALSE., the code is not required to guarantee +*> relatively accurate eigenvalues and can use the fastest possible +*> techniques. +*> On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix +*> does not define its eigenvalues to high relative accuracy. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in DLARRE, +*> if INFO = 2X, internal error in DLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by DLARRE or +*> DLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE DSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + LOGICAL TRYRAC + INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N + DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + DOUBLE PRECISION Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, FOUR, MINRGP + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ FOUR = 4.0D0, + $ MINRGP = 1.0D-3 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, + $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, + $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, + $ ITMP2, J, JBLK, JJ, LIWMIN, LWMIN, NSPLIT, + $ NZCMIN, OFFSET, WBEGIN, WEND + DOUBLE PRECISION BIGNUM, CS, EPS, PIVMIN, R1, R2, RMAX, RMIN, + $ RTOL1, RTOL2, SAFMIN, SCALE, SMLNUM, SN, + $ THRESH, TMP, TNRM, WL, WU +* .. +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAE2, DLAEV2, DLARRC, DLARRE, DLARRJ, + $ DLARRR, DLARRV, DLASRT, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT + + +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) + ZQUERY = ( NZC.EQ.-1 ) + +* DSTEMR needs WORK of size 6*N, IWORK of size 3*N. +* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. +* Furthermore, DLARRV needs WORK of size 12*N, IWORK of size 7*N. + IF( WANTZ ) THEN + LWMIN = 18*N + LIWMIN = 10*N + ELSE +* need less workspace if only the eigenvalues are wanted + LWMIN = 12*N + LIWMIN = 8*N + ENDIF + + WL = ZERO + WU = ZERO + IIL = 0 + IIU = 0 + NSPLIT = 0 + + IF( VALEIG ) THEN +* We do not reference VL, VU in the cases RANGE = 'I','A' +* The interval (WL, WU] contains all the wanted eigenvalues. +* It is either given by the user or computed in DLARRE. + WL = VL + WU = VU + ELSEIF( INDEIG ) THEN +* We do not reference IL, IU in the cases RANGE = 'V','A' + IIL = IL + IIU = IU + ENDIF +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. WU.LE.WL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. ( IIL.LT.1 .OR. IIL.GT.N ) ) THEN + INFO = -8 + ELSE IF( INDEIG .AND. ( IIU.LT.IIL .OR. IIU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( WANTZ .AND. ALLEIG ) THEN + NZCMIN = N + ELSE IF( WANTZ .AND. VALEIG ) THEN + CALL DLARRC( 'T', N, VL, VU, D, E, SAFMIN, + $ NZCMIN, ITMP, ITMP2, INFO ) + ELSE IF( WANTZ .AND. INDEIG ) THEN + NZCMIN = IIU-IIL+1 + ELSE +* WANTZ .EQ. FALSE. + NZCMIN = 0 + ENDIF + IF( ZQUERY .AND. INFO.EQ.0 ) THEN + Z( 1,1 ) = NZCMIN + ELSE IF( NZC.LT.NZCMIN .AND. .NOT.ZQUERY ) THEN + INFO = -14 + END IF + END IF + + IF( INFO.NE.0 ) THEN +* + CALL XERBLA( 'DSTEMR', -INFO ) +* + RETURN + ELSE IF( LQUERY .OR. ZQUERY ) THEN + RETURN + END IF +* +* Handle N = 0, 1, and 2 cases immediately +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( WL.LT.D( 1 ) .AND. WU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, 1 ) = ONE + ISUPPZ(1) = 1 + ISUPPZ(2) = 1 + END IF + RETURN + END IF +* + IF( N.EQ.2 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DLAE2( D(1), E(1), D(2), R1, R2 ) + ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) + END IF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R2.GT.WL).AND. + $ (R2.LE.WU)).OR. + $ (INDEIG.AND.(IIL.EQ.1)) ) THEN + M = M+1 + W( M ) = R2 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R1.GT.WL).AND. + $ (R1.LE.WU)).OR. + $ (INDEIG.AND.(IIU.EQ.2)) ) THEN + M = M+1 + W( M ) = R1 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + + ELSE + +* Continue with general N + + INDGRS = 1 + INDERR = 2*N + 1 + INDGP = 3*N + 1 + INDD = 4*N + 1 + INDE2 = 5*N + 1 + INDWRK = 6*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDW = 2*N + 1 + IINDWK = 3*N + 1 +* +* Scale matrix to allowable range, if necessary. +* The allowable range is related to the PIVMIN parameter; see the +* comments in DLARRD. The preference for scaling small values +* up is heuristic; we expect users' matrices not to be close to the +* RMAX threshold. +* + SCALE = ONE + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N, SCALE, D, 1 ) + CALL DSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + IF( VALEIG ) THEN +* If eigenvalues in interval have to be found, +* scale (WL, WU] accordingly + WL = WL*SCALE + WU = WU*SCALE + ENDIF + END IF +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding off-diagonal elements +* are small +* THRESH is the splitting parameter for DLARRE +* A negative THRESH forces the old splitting criterion based on the +* size of the off-diagonal. A positive THRESH switches to splitting +* which preserves relative accuracy. +* + IF( TRYRAC ) THEN +* Test whether the matrix warrants the more expensive relative approach. + CALL DLARRR( N, D, E, IINFO ) + ELSE +* The user does not care about relative accurately eigenvalues + IINFO = -1 + ENDIF +* Set the splitting criterion + IF (IINFO.EQ.0) THEN + THRESH = EPS + ELSE + THRESH = -EPS +* relative accuracy is desired but T does not guarantee it + TRYRAC = .FALSE. + ENDIF +* + IF( TRYRAC ) THEN +* Copy original diagonal, needed to guarantee relative accuracy + CALL DCOPY(N,D,1,WORK(INDD),1) + ENDIF +* Store the squares of the offdiagonal values of T + DO 5 J = 1, N-1 + WORK( INDE2+J-1 ) = E(J)**2 + 5 CONTINUE + +* Set the tolerance parameters for bisection + IF( .NOT.WANTZ ) THEN +* DLARRE computes the eigenvalues to full precision. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ELSE +* DLARRE computes the eigenvalues to less than full precision. +* DLARRV will refine the eigenvalue approximations, and we can +* need less accurate initial bisection in DLARRE. +* Note: these settings do only affect the subset case and DLARRE + RTOL1 = SQRT(EPS) + RTOL2 = MAX( SQRT(EPS)*5.0D-3, FOUR * EPS ) + ENDIF + CALL DLARRE( RANGE, N, WL, WU, IIL, IIU, D, E, + $ WORK(INDE2), RTOL1, RTOL2, THRESH, NSPLIT, + $ IWORK( IINSPL ), M, W, WORK( INDERR ), + $ WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), PIVMIN, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 10 + ABS( IINFO ) + RETURN + END IF +* Note that if RANGE .NE. 'V', DLARRE computes bounds on the desired +* part of the spectrum. All desired eigenvalues are contained in +* (WL,WU] + + + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + CALL DLARRV( N, WL, WU, D, E, + $ PIVMIN, IWORK( IINSPL ), M, + $ 1, M, MINRGP, RTOL1, RTOL2, + $ W, WORK( INDERR ), WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), Z, LDZ, + $ ISUPPZ, WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 20 + ABS( IINFO ) + RETURN + END IF + ELSE +* DLARRE computes eigenvalues of the (shifted) root representation +* DLARRV returns the eigenvalues of the unshifted matrix. +* However, if the eigenvectors are not desired by the user, we need +* to apply the corresponding shifts from DLARRE to obtain the +* eigenvalues of the original matrix. + DO 20 J = 1, M + ITMP = IWORK( IINDBL+J-1 ) + W( J ) = W( J ) + E( IWORK( IINSPL+ITMP-1 ) ) + 20 CONTINUE + END IF +* + + IF ( TRYRAC ) THEN +* Refine computed eigenvalues so that they are relatively accurate +* with respect to the original matrix T. + IBEGIN = 1 + WBEGIN = 1 + DO 39 JBLK = 1, IWORK( IINDBL+M-1 ) + IEND = IWORK( IINSPL+JBLK-1 ) + IN = IEND - IBEGIN + 1 + WEND = WBEGIN - 1 +* check if any eigenvalues have to be refined in this block + 36 CONTINUE + IF( WEND.LT.M ) THEN + IF( IWORK( IINDBL+WEND ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 36 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 39 + END IF + + OFFSET = IWORK(IINDW+WBEGIN-1)-1 + IFIRST = IWORK(IINDW+WBEGIN-1) + ILAST = IWORK(IINDW+WEND-1) + RTOL2 = FOUR * EPS + CALL DLARRJ( IN, + $ WORK(INDD+IBEGIN-1), WORK(INDE2+IBEGIN-1), + $ IFIRST, ILAST, RTOL2, OFFSET, W(WBEGIN), + $ WORK( INDERR+WBEGIN-1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), PIVMIN, + $ TNRM, IINFO ) + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 39 CONTINUE + ENDIF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( M, ONE / SCALE, W, 1 ) + END IF + + END IF + +* +* If eigenvalues are not in increasing order, then sort them, +* possibly along with eigenvectors. +* + IF( NSPLIT.GT.1 .OR. N.EQ.2 ) THEN + IF( .NOT. WANTZ ) THEN + CALL DLASRT( 'I', M, W, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 3 + RETURN + END IF + ELSE + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF + ENDIF +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSTEMR +* + END diff --git a/dspl/liblapack/SRC/dsteqr.f b/dspl/liblapack/SRC/dsteqr.f new file mode 100644 index 0000000..c34a548 --- /dev/null +++ b/dspl/liblapack/SRC/dsteqr.f @@ -0,0 +1,572 @@ +*> \brief \b DSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band symmetric matrix can also be found +*> if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to +*> tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> symmetric matrix. On entry, Z must contain the +*> orthogonal matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is orthogonally similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASET, DLASR, + $ DLASRT, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL DLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL DLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL DLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL DLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of DSTEQR +* + END diff --git a/dspl/liblapack/SRC/dsterf.f b/dspl/liblapack/SRC/dsterf.f new file mode 100644 index 0000000..3401894 --- /dev/null +++ b/dspl/liblapack/SRC/dsterf.f @@ -0,0 +1,426 @@ +*> \brief \b DSTERF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTERF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTERF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTERF computes all eigenvalues of a symmetric tridiagonal matrix +*> using the Pal-Walker-Kahan variant of the QL or QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed to find all of the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE DSTERF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + DOUBLE PRECISION ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN, RMAX +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLASCL, DLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'DSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 + RMAX = DLAMCH( 'O' ) +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( (ANORM.GT.SSFMAX) ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL DLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use DLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL DLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = DLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL DLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of DSTERF +* + END diff --git a/dspl/liblapack/SRC/dstev.f b/dspl/liblapack/SRC/dstev.f new file mode 100644 index 0000000..c59eaf3 --- /dev/null +++ b/dspl/liblapack/SRC/dstev.f @@ -0,0 +1,235 @@ +*> \brief DSTEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEV computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric tridiagonal matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A, stored in elements 1 to N-1 of E. +*> On exit, the contents of E are destroyed. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with D(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) +*> If JOBZ = 'N', WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of E did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IMAX, ISCALE + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTEQR, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call DSTERF. For eigenvalues and +* eigenvectors, call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, D, E, INFO ) + ELSE + CALL DSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, D, 1 ) + END IF +* + RETURN +* +* End of DSTEV +* + END diff --git a/dspl/liblapack/SRC/dstevd.f b/dspl/liblapack/SRC/dstevd.f new file mode 100644 index 0000000..6a07b24 --- /dev/null +++ b/dspl/liblapack/SRC/dstevd.f @@ -0,0 +1,302 @@ +*> \brief DSTEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEVD computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric tridiagonal matrix. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A, stored in elements 1 to N-1 of E. +*> On exit, the contents of E are destroyed. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with D(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. +*> If JOBZ = 'V' and N > 1 then LWORK must be at least +*> ( 1 + 4*N + N**2 ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of E did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER ISCALE, LIWMIN, LWMIN + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTEDC, DSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + LIWMIN = 1 + LWMIN = 1 + IF( N.GT.1 .AND. WANTZ ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call DSTERF. For eigenvalues and +* eigenvectors, call DSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, D, E, INFO ) + ELSE + CALL DSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, D, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSTEVD +* + END diff --git a/dspl/liblapack/SRC/dstevr.f b/dspl/liblapack/SRC/dstevr.f new file mode 100644 index 0000000..10f1b77 --- /dev/null +++ b/dspl/liblapack/SRC/dstevr.f @@ -0,0 +1,584 @@ +*> \brief DSTEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, +* M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Eigenvalues and +*> eigenvectors can be selected by specifying either a range of values +*> or a range of indices for the desired eigenvalues. +*> +*> Whenever possible, DSTEVR calls DSTEMR to compute the +*> eigenspectrum using Relatively Robust Representations. DSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. For the i-th +*> unreduced block of T, +*> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +*> is a relatively robust representation, +*> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +*> relative accuracy by the dqds algorithm, +*> (c) If there is a cluster of close eigenvalues, "choose" sigma_i +*> close to the cluster, and go to step (a), +*> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +*> compute the corresponding eigenvector by forming a +*> rank-revealing twisted factorization. +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +*> Computer Science Division Technical Report No. UCB//CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of DSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> DSTEIN are called +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, D may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (max(1,N-1)) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A in elements 1 to N-1 of E. +*> On exit, E may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal (and +*> minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,20*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal (and +*> minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE DSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, LQUERY, VALEIG, WANTZ, + $ TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, ISCALE, ITMP1, J, JJ, LIWMIN, LWMIN, + $ NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTEMR, DSTEIN, DSTERF, + $ DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSTEVR', 'N', 1, 2, 3, 4 ) +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = MAX( 1, 20*N ) + LIWMIN = MAX( 1, 10*N ) +* +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF +* + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: These indices are used only +* if DSTERF or DSTEMR fail. + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* DSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDISP + N +* +* If all eigenvalues are desired, then +* call DSTERF or DSTEMR. If this fails for some eigenvalue, then +* try DSTEBZ. +* +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. IEEEOK.EQ.1 ) THEN + CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, D, 1, W, 1 ) + CALL DSTERF( N, W, WORK, INFO ) + ELSE + CALL DCOPY( N, D, 1, WORK( N+1 ), 1 ) + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL DSTEMR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL, + $ IU, M, W, Z, LDZ, N, ISUPPZ, TRYRAC, + $ WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO ) +* + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 10 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK, + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 10 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 30 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 20 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 20 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( I ) + W( I ) = W( J ) + IWORK( I ) = IWORK( J ) + W( J ) = TMP1 + IWORK( J ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 30 CONTINUE + END IF +* +* Causes problems with tests 19 & 20: +* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of DSTEVR +* + END diff --git a/dspl/liblapack/SRC/dstevx.f b/dspl/liblapack/SRC/dstevx.f new file mode 100644 index 0000000..7acbdaa --- /dev/null +++ b/dspl/liblapack/SRC/dstevx.f @@ -0,0 +1,464 @@ +*> \brief DSTEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSTEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, +* M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSTEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix A. Eigenvalues and +*> eigenvectors can be selected by specifying either a range of values +*> or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, D may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (max(1,N-1)) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A in elements 1 to N-1 of E. +*> On exit, E may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less +*> than or equal to zero, then EPS*|T| will be used in +*> its place, where |T| is the 1-norm of the tridiagonal +*> matrix. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge (INFO > 0), then that +*> column of Z contains the latest approximation to the +*> eigenvector, and the index of the eigenvector is returned +*> in IFAIL. If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHEReigen +* +* ===================================================================== + SUBROUTINE DSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IMAX, INDIBL, INDISP, INDIWO, INDWRK, + $ ISCALE, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTEIN, DSTEQR, DSTERF, + $ DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSTEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL DSCAL( N, SIGMA, D, 1 ) + CALL DSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired and ABSTOL is less than zero, then +* call DSTERF or SSTEQR. If this fails for some eigenvalue, then +* try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, D, 1, W, 1 ) + CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + INDWRK = N + 1 + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK, INFO ) + ELSE + CALL DSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDWRK = 1 + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), + $ WORK( INDWRK ), IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of DSTEVX +* + END diff --git a/dspl/liblapack/SRC/dsycon.f b/dspl/liblapack/SRC/dsycon.f new file mode 100644 index 0000000..66e4536 --- /dev/null +++ b/dspl/liblapack/SRC/dsycon.f @@ -0,0 +1,244 @@ +*> \brief \b DSYCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by DSYTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL DSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSYCON +* + END diff --git a/dspl/liblapack/SRC/dsycon_3.f b/dspl/liblapack/SRC/dsycon_3.f new file mode 100644 index 0000000..7245eba --- /dev/null +++ b/dspl/liblapack/SRC/dsycon_3.f @@ -0,0 +1,285 @@ +*> \brief \b DSYCON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYCON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> computed by DSYTRF_RK or DSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver DSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by DSYTRF_RK and DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_RK or DSYTRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DSYTRS_3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL DSYTRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSYCON_3 +* + END diff --git a/dspl/liblapack/SRC/dsycon_rook.f b/dspl/liblapack/SRC/dsycon_rook.f new file mode 100644 index 0000000..4022adf --- /dev/null +++ b/dspl/liblapack/SRC/dsycon_rook.f @@ -0,0 +1,258 @@ +*> \brief DSYCON_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYCON_ROOK estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by DSYTRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DSYTRS_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL DSYTRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSYCON_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsyconv.f b/dspl/liblapack/SRC/dsyconv.f new file mode 100644 index 0000000..f582bce --- /dev/null +++ b/dspl/liblapack/SRC/dsyconv.f @@ -0,0 +1,366 @@ +*> \brief \b DSYCONV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCONV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYCONV convert A given by TRF into L and D and vice-versa. +*> Get Non-diag elements of D (returned in workspace) and +*> apply or reverse permutation done in TRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> E stores the supdiagonal/subdiagonal of the symmetric 1-by-1 +*> or 2-by-2 block diagonal matrix D in LDLT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, J + DOUBLE PRECISION TEMP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCONV', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* A is UPPER +* +* Convert A (A is upper) +* +* Convert VALUE +* + IF ( CONVERT ) THEN + I=N + E(1)=ZERO + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + E(I)=A(I-1,I) + E(I-1)=ZERO + A(I-1,I)=ZERO + I=I-1 + ELSE + E(I)=ZERO + ENDIF + I=I-1 + END DO +* +* Convert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO 12 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 12 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF( I .LT. N) THEN + DO 13 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + 13 CONTINUE + ENDIF + I=I-1 + ENDIF + I=I-1 + END DO + + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I+1 + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + END DO + ENDIF + ENDIF + I=I+1 + END DO +* +* Revert VALUE +* + I=N + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I-1,I)=E(I) + I=I-1 + ENDIF + I=I-1 + END DO + END IF + ELSE +* +* A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* + I=1 + E(N)=ZERO + DO WHILE ( I .LE. N ) + IF( I.LT.N .AND. IPIV(I) .LT. 0 ) THEN + E(I)=A(I+1,I) + E(I+1)=ZERO + A(I+1,I)=ZERO + I=I+1 + ELSE + E(I)=ZERO + ENDIF + I=I+1 + END DO +* +* Convert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO 22 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 22 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF (I .GT. 1) THEN + DO 23 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I+1,J) + A(I+1,J)=TEMP + 23 CONTINUE + ENDIF + I=I+1 + ENDIF + I=I+1 + END DO + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I,J) + A(I,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I-1 + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I+1,J) + A(I+1,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ENDIF + I=I-1 + END DO +* +* Revert VALUE +* + I=1 + DO WHILE ( I .LE. N-1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I+1,I)=E(I) + I=I+1 + ENDIF + I=I+1 + END DO + END IF + END IF + + RETURN +* +* End of DSYCONV +* + END diff --git a/dspl/liblapack/SRC/dsyconvf.f b/dspl/liblapack/SRC/dsyconvf.f new file mode 100644 index 0000000..37c8157 --- /dev/null +++ b/dspl/liblapack/SRC/dsyconvf.f @@ -0,0 +1,559 @@ +*> \brief \b DSYCONVF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCONVF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> DSYCONVF converts the factorization output format used in +*> DSYTRF provided on entry in parameter A into the factorization +*> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored +*> on exit in parameters A and E. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in DSYTRF into +*> the format used in DSYTRF_RK (or DSYTRF_BK). +*> +*> If parameter WAY = 'R': +*> DSYCONVF performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in DSYTRF_RK +*> (or DSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in DSYTRF that is stored +*> on exit in parameter A. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in DSYTRF_RK +*> (or DSYTRF_BK) into the format used in DSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> DSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> DSYTRF_RK or DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> DSYTRF_RK or DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> DSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> +*> 1) If WAY ='C': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in DSYTRF. +*> On exit, details of the interchanges and the block +*> structure of D in the format used in DSYTRF_RK +*> ( or DSYTRF_BK). +*> +*> 1) If WAY ='R': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in DSYTRF_RK +*> ( or DSYTRF_BK). +*> On exit, details of the interchanges and the block +*> structure of D in the format used in DSYTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE DSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL DSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCONVF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL DSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL DSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i-1 and IPIV(i-1), +* so this should be recorded in two consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I-1 ) +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where k increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL DSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL DSWAP( I-1, A( IP, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i+1 and IPIV(i+1), +* so this should be recorded in consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I+1 ) +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of DSYCONVF +* + END diff --git a/dspl/liblapack/SRC/dsyconvf_rook.f b/dspl/liblapack/SRC/dsyconvf_rook.f new file mode 100644 index 0000000..5c77490 --- /dev/null +++ b/dspl/liblapack/SRC/dsyconvf_rook.f @@ -0,0 +1,544 @@ +*> \brief \b DSYCONVF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYCONVF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> DSYCONVF_ROOK converts the factorization output format used in +*> DSYTRF_ROOK provided on entry in parameter A into the factorization +*> output format used in DSYTRF_RK (or DSYTRF_BK) that is stored +*> on exit in parameters A and E. IPIV format for DSYTRF_ROOK and +*> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. +*> +*> If parameter WAY = 'R': +*> DSYCONVF_ROOK performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in DSYTRF_RK +*> (or DSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in DSYTRF_ROOK that is stored +*> on exit in parameter A. IPIV format for DSYTRF_ROOK and +*> DSYTRF_RK (or DSYTRF_BK) is the same and is not converted. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> DSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> DSYTRF_RK or DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> DSYTRF_RK or DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> DSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On entry, details of the interchanges and the block +*> structure of D as determined: +*> 1) by DSYTRF_ROOK, if WAY ='C'; +*> 2) by DSYTRF_RK (or DSYTRF_BK), if WAY ='R'. +*> The IPIV format is the same for all these routines. +*> +*> On exit, is not changed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE DSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL DSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, IP2 +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYCONVF_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i-1 and IPIV(i-1) +* in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + IF( IP2.NE.(I-1) ) THEN + CALL DSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP2, I+1 ), LDA ) + END IF + END IF + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i-1) and i and IPIV(i) +* in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP2.NE.(I-1) ) THEN + CALL DSWAP( N-I, A( IP2, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL DSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i+1 and IPIV(i+1) +* in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + IF( IP2.NE.(I+1) ) THEN + CALL DSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP2, 1 ), LDA ) + END IF + END IF + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i+1) and i and IPIV(i) +* in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP2.NE.(I+1) ) THEN + CALL DSWAP( I-1, A( IP2, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL DSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of DSYCONVF_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsyequb.f b/dspl/liblapack/SRC/dsyequb.f new file mode 100644 index 0000000..de6b71a --- /dev/null +++ b/dspl/liblapack/SRC/dsyequb.f @@ -0,0 +1,334 @@ +*> \brief \b DSYEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEQUB computes row and column scalings intended to equilibrate a +*> symmetric matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The N-by-N symmetric matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE DSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + DOUBLE PRECISION AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME + EXTERNAL DLAMCH, LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'DSYEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), ABS( A( I, J ) ) ) + S( J ) = MAX( S( J ), ABS( A( I, J ) ) ) + AMAX = MAX( AMAX, ABS( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), ABS( A( J, J ) ) ) + AMAX = MAX( AMAX, ABS( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), ABS( A( J, J ) ) ) + AMAX = MAX( AMAX, ABS( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), ABS( A( I, J ) ) ) + S( J ) = MAX( S( J ), ABS( A( I, J ) ) ) + AMAX = MAX( AMAX, ABS( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0D0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0D0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0D0 + SUMSQ = 0.0D0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + ABS( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + ABS( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + ABS( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + ABS( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0D0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0D0 + DO I = N+1, 2*N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL DLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = ABS( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = ABS( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = ABS( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = ABS( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = ABS( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = DLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = DLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/dsyev.f b/dspl/liblapack/SRC/dsyev.f new file mode 100644 index 0000000..ee8c479 --- /dev/null +++ b/dspl/liblapack/SRC/dsyev.f @@ -0,0 +1,286 @@ +*> \brief DSYEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEV computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for DSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYeigen +* +* ===================================================================== + SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, DSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DORGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYEV +* + END diff --git a/dspl/liblapack/SRC/dsyev_2stage.f b/dspl/liblapack/SRC/dsyev_2stage.f new file mode 100644 index 0000000..fff0ded --- /dev/null +++ b/dspl/liblapack/SRC/dsyev_2stage.f @@ -0,0 +1,348 @@ +*> \brief DSYEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, + $ XERBLA, DSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DORGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSYEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsyevd.f b/dspl/liblapack/SRC/dsyevd.f new file mode 100644 index 0000000..2db6784 --- /dev/null +++ b/dspl/liblapack/SRC/dsyevd.f @@ -0,0 +1,357 @@ +*> \brief DSYEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVD computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> +*> Because of large use of BLAS of level 3, DSYEVD needs N**2 more +*> workspace than DSYEVX. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n + + +*> +* ===================================================================== + SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, + $ DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = MAX( LWMIN, 2*N + + $ ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) ) + LIOPT = LIWMIN + END IF + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYEVD +* + END diff --git a/dspl/liblapack/SRC/dsyevd_2stage.f b/dspl/liblapack/SRC/dsyevd_2stage.f new file mode 100644 index 0000000..75a6da4 --- /dev/null +++ b/dspl/liblapack/SRC/dsyevd_2stage.f @@ -0,0 +1,410 @@ +*> \brief DSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLWORK, LLWRK2, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF, + $ DSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + LHTRD + LWTRD + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call DORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL DSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsyevr.f b/dspl/liblapack/SRC/dsyevr.f new file mode 100644 index 0000000..d140426 --- /dev/null +++ b/dspl/liblapack/SRC/dsyevr.f @@ -0,0 +1,681 @@ +*> \brief DSYEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> DSYEVR first reduces the matrix A to tridiagonal form T with a call +*> to DSYTRD. Then, whenever possible, DSYEVR calls DSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. DSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : DSYEVR calls DSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> DSYEVR calls DSTEBZ and DSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of DSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> DSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of DSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by DORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,26*N). +*> For optimal efficiency, LWORK >= (NB+6)*N, +*> where NB is the max of the blocksize for DSYTRD and DORMTR +*> returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ, + $ TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEMR, DSTEIN, + $ DSTERF, DSWAP, DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + LWMIN = MAX( 1, 26*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or DSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in DSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from DSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by DSTEMR (the DSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and DSTEMR. + INDEE = INDDD + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDEE + N + LLWORK = LWORK - INDWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* DSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or DSTEMR and DORMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL DSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip DSTEBZ/DSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. +* Also call DSTEBZ and DSTEIN if DSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if DSTEMR/DSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if DSTEMR/DSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVR +* + END diff --git a/dspl/liblapack/SRC/dsyevr_2stage.f b/dspl/liblapack/SRC/dsyevr_2stage.f new file mode 100644 index 0000000..847acce --- /dev/null +++ b/dspl/liblapack/SRC/dsyevr_2stage.f @@ -0,0 +1,740 @@ +*> \brief DSYEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVR_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> DSYEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to DSYTRD. Then, whenever possible, DSYEVR_2STAGE calls DSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. DSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : DSYEVR_2STAGE calls DSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> DSYEVR_2STAGE calls DSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of DSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> DSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of DSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by DORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 5*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 5*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ, + $ TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWMIN, NSPLIT, + $ LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEMR, DSTEIN, + $ DSTERF, DSWAP, DSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'DSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN +* NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) +* NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) ) +* LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 7 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or DSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in DSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from DSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by DSTEMR (the DSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and DSTEMR. + INDEE = INDDD + N +* INDHOUS is the starting offset Householder storage of stage 2 + INDHOUS = INDEE + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* DSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or DSTEMR and DORMTR. +* + IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND. + $ IEEEOK.EQ.1 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL DSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip DSTEBZ/DSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. +* Also call DSTEBZ and DSTEIN if DSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if DSTEMR/DSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if DSTEMR/DSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DSYEVR_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsyevx.f b/dspl/liblapack/SRC/dsyevx.f new file mode 100644 index 0000000..2fd7bce --- /dev/null +++ b/dspl/liblapack/SRC/dsyevx.f @@ -0,0 +1,554 @@ +*> \brief DSYEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise 8*N. +*> For optimal efficiency, LWORK >= (NB+3)*N, +*> where NB is the max of the blocksize for DSYTRD and DORMTR +*> returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +* ===================================================================== + SUBROUTINE DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, LWKMIN, + $ LWKOPT, NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, DSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWKMIN = 1 + WORK( 1 ) = LWKMIN + ELSE + LWKMIN = 8*N + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + LLWORK = LWORK - INDWRK + 1 + CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL DORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYEVX +* + END diff --git a/dspl/liblapack/SRC/dsyevx_2stage.f b/dspl/liblapack/SRC/dsyevx_2stage.f new file mode 100644 index 0000000..7a93ac0 --- /dev/null +++ b/dspl/liblapack/SRC/dsyevx_2stage.f @@ -0,0 +1,612 @@ +*> \brief DSYEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 3*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 3*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, DLAMCH, DLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ, + $ DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA, + $ DSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL DSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call DSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDHOUS = INDD + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL DSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or DORGTR and SSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL DLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL DORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by DSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DSYEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsygs2.f b/dspl/liblapack/SRC/dsygs2.f new file mode 100644 index 0000000..a54955c --- /dev/null +++ b/dspl/liblapack/SRC/dsygs2.f @@ -0,0 +1,283 @@ +*> \brief \b DSYGS2 reduces a symmetric definite generalized eigenproblem to standard form, using the factorization results obtained from spotrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGS2 reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. +*> +*> B must have been previously factorized as U**T *U or L*L**T by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T *A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored, and how B has been factorized. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + DOUBLE PRECISION AKK, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, DSYR2, DTRMV, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL DSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL DSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**T *A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB, + $ A( K, 1 ), LDA ) + CT = HALF*AKK + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL DSCAL( K-1, BKK, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of DSYGS2 +* + END diff --git a/dspl/liblapack/SRC/dsygst.f b/dspl/liblapack/SRC/dsygst.f new file mode 100644 index 0000000..5055acd --- /dev/null +++ b/dspl/liblapack/SRC/dsygst.f @@ -0,0 +1,321 @@ +*> \brief \b DSYGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGST reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +*> +*> B must have been previously factorized as U**T*U or L*L**T by DPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**T*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D0, HALF = 0.5D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL DSYGS2, DSYMM, DSYR2K, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DSYGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Left', UPLO, 'Transpose', 'Non-unit', + $ KB, N-K-KB+1, ONE, B( K, K ), LDB, + $ A( K, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, + $ A( K, K+KB ), LDA, B( K, K+KB ), LDB, + $ ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL DTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, ONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL DTRSM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ N-K-KB+1, KB, ONE, B( K, K ), LDB, + $ A( K+KB, K ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -ONE, A( K+KB, K ), LDA, B( K+KB, K ), + $ LDB, ONE, A( K+KB, K+KB ), LDA ) + CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL DTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, ONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, ONE, B, LDB, A( 1, K ), LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DSYR2K( UPLO, 'No transpose', K-1, KB, ONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL DTRMM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ K-1, KB, ONE, B( K, K ), LDB, A( 1, K ), + $ LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L**T*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL DTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, ONE, B, LDB, A( K, 1 ), LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DSYR2K( UPLO, 'Transpose', K-1, KB, ONE, + $ A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A, + $ LDA ) + CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL DTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, + $ K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA ) + CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of DSYGST +* + END diff --git a/dspl/liblapack/SRC/dsygv.f b/dspl/liblapack/SRC/dsygv.f new file mode 100644 index 0000000..651abc5 --- /dev/null +++ b/dspl/liblapack/SRC/dsygv.f @@ -0,0 +1,314 @@ +*> \brief \b DSYGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for DSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEV returned an error code: +*> <= N: if INFO = i, DSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYeigen +* +* ===================================================================== + SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKMIN, LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEV, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 3*N - 1 ) + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKMIN, ( NB + 2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYGV +* + END diff --git a/dspl/liblapack/SRC/dsygv_2stage.f b/dspl/liblapack/SRC/dsygv_2stage.f new file mode 100644 index 0000000..5b1a176 --- /dev/null +++ b/dspl/liblapack/SRC/dsygv_2stage.f @@ -0,0 +1,370 @@ +*> \brief \b DSYGV_2STAGE +* +* @precisions fortran d -> s +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +*> sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEV returned an error code: +*> <= N: if INFO = i, DSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYGST, DTRMM, DTRSM, XERBLA, + $ DSYEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYGV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsygvd.f b/dspl/liblapack/SRC/dsygvd.f new file mode 100644 index 0000000..29c7828 --- /dev/null +++ b/dspl/liblapack/SRC/dsygvd.f @@ -0,0 +1,380 @@ +*> \brief \b DSYGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be symmetric and B is also positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK >= 1. +*> If JOBZ = 'N' and N > 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEVD returned an error code: +*> <= N: if INFO = i and JOBZ = 'N', then the algorithm +*> failed to converge; i off-diagonal elements of an +*> intermediate tridiagonal form did not converge to +*> zero; +*> if INFO = i and JOBZ = 'V', then the algorithm +*> failed to compute an eigenvalue while working on +*> the submatrix lying in rows and columns INFO/(N+1) +*> through mod(INFO,N+1); +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified so that no backsubstitution is performed if DSYEVD fails to +*> converge (NEIG in old code could be greater than N causing out of +*> bounds reference to A - reported by Ralf Meyer). Also corrected the +*> description of INFO and the test on ITYPE. Sven, 16 Feb 05. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +*> +* ===================================================================== + SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEVD, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) ) + LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of DSYGVD +* + END diff --git a/dspl/liblapack/SRC/dsygvx.f b/dspl/liblapack/SRC/dsygvx.f new file mode 100644 index 0000000..aeca602 --- /dev/null +++ b/dspl/liblapack/SRC/dsygvx.f @@ -0,0 +1,465 @@ +*> \brief \b DSYGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, +* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +*> and B are assumed to be symmetric and B is also positive definite. +*> Eigenvalues and eigenvectors can be selected by specifying either a +*> range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A and B are stored; +*> = 'L': Lower triangle of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix pencil (A,B). N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing C to tridiagonal form, where C is the symmetric +*> matrix of the standard symmetric problem to which the +*> generalized problem is transformed. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,8*N). +*> For optimal efficiency, LWORK >= (NB+3)*N, +*> where NB is the blocksize for DSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEVX returned an error code: +*> <= N: if INFO = i, DSYEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYeigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE DSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LWKMIN, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEVX, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF (INFO.EQ.0) THEN + IF (LDZ.LT.1 .OR. (WANTZ .AND. LDZ.LT.N)) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 8*N ) + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYGVX +* + END diff --git a/dspl/liblapack/SRC/dsyrfs.f b/dspl/liblapack/SRC/dsyrfs.f new file mode 100644 index 0000000..2732f17 --- /dev/null +++ b/dspl/liblapack/SRC/dsyrfs.f @@ -0,0 +1,441 @@ +*> \brief \b DSYRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DSYTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DSYMV, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of DSYRFS +* + END diff --git a/dspl/liblapack/SRC/dsyrfsx.f b/dspl/liblapack/SRC/dsyrfsx.f new file mode 100644 index 0000000..e128cd4 --- /dev/null +++ b/dspl/liblapack/SRC/dsyrfsx.f @@ -0,0 +1,700 @@ +*> \brief \b DSYRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYRFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE, N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYCON, DLA_SYRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, DLANSY, DLA_SYRCOND + DOUBLE PRECISION DLAMCH, DLANSY, DLA_SYRCOND + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N )*DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF ( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = DLANSY( NORM, UPLO, N, A, LDA, WORK ) + CALL DSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + CALL DLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), WORK( 1 ), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) )*DLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = DLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ -1, S, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = DLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ 0, S, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF (N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF (ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF (N_ERR_BNDS .GE. LA_LINRX_RCOND_I) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = DLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ 1, X(1,J), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( .NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of DSYRFSX +* + END diff --git a/dspl/liblapack/SRC/dsysv.f b/dspl/liblapack/SRC/dsysv.f new file mode 100644 index 0000000..c9811b5 --- /dev/null +++ b/dspl/liblapack/SRC/dsysv.f @@ -0,0 +1,270 @@ +*> \brief DSYSV computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> DSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by DSYTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> DSYTRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRF, DSYTRS, DSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL DSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL DSYTRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV +* + END diff --git a/dspl/liblapack/SRC/dsysv_aa.f b/dspl/liblapack/SRC/dsysv_aa.f new file mode 100644 index 0000000..cbccd5e --- /dev/null +++ b/dspl/liblapack/SRC/dsysv_aa.f @@ -0,0 +1,254 @@ +*> \brief DSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> DSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for +*> the best performance, LWORK >= MAX(1,N*NB), where NB is +*> the optimal blocksize for DSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRF_AA, DSYTRS_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL DSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL DSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL DSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV_AA +* + END diff --git a/dspl/liblapack/SRC/dsysv_aa_2stage.f b/dspl/liblapack/SRC/dsysv_aa_2stage.f new file mode 100644 index 0000000..ac3c77d --- /dev/null +++ b/dspl/liblapack/SRC/dsysv_aa_2stage.f @@ -0,0 +1,280 @@ +*> \brief DSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* @generated from SRC/chesv_aa_2stage.f, fortran c -> d, Tue Oct 31 11:22:31 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSV_AA_2STAGE computes the solution to a real system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSYTRF_AA_2STAGE, DSYTRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsysv_rk.f b/dspl/liblapack/SRC/dsysv_rk.f new file mode 100644 index 0000000..6a6036b --- /dev/null +++ b/dspl/liblapack/SRC/dsysv_rk.f @@ -0,0 +1,317 @@ +*> \brief DSYSV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYSV_RK computes the solution to a real system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> DSYTRF_RK is called to compute the factorization of a real +*> symmetric matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine DSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by DSYTRF_RK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine DSYTRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by DSYTRF_RK. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for DSYTRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRF_RK, DSYTRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL DSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = P*U*D*(U**T)*(P**T) or +* A = P*U*D*(U**T)*(P**T). +* + CALL DSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL DSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV_RK +* + END diff --git a/dspl/liblapack/SRC/dsysv_rook.f b/dspl/liblapack/SRC/dsysv_rook.f new file mode 100644 index 0000000..d69c176 --- /dev/null +++ b/dspl/liblapack/SRC/dsysv_rook.f @@ -0,0 +1,293 @@ +*> \brief DSYSV_ROOK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSV_ROOK computes the solution to a real system of linear +*> equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> DSYTRF_ROOK is called to compute the factorization of a real +*> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling DSYTRS_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by DSYTRF_ROOK. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> DSYTRF_ROOK. +*> +*> TRS will be done with Level 2 BLAS +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRF_ROOK, DSYTRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL DSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL DSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS_ROOK ( Use Level 2 BLAS) +* + CALL DSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSV_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsysvx.f b/dspl/liblapack/SRC/dsysvx.f new file mode 100644 index 0000000..cd05986 --- /dev/null +++ b/dspl/liblapack/SRC/dsysvx.f @@ -0,0 +1,416 @@ +*> \brief DSYSVX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSVX uses the diagonal pivoting factorization to compute the +*> solution to a real system of linear equations A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form of +*> A. AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by DSYTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,3*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,3*N,N*NB), where +*> NB is the optimal blocksize for DSYTRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DSYCON, DSYRFS, DSYTRF, DSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 3*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = DLANSY( 'I', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL DSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Compute the solution vectors X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYSVX +* + END diff --git a/dspl/liblapack/SRC/dsysvxx.f b/dspl/liblapack/SRC/dsysvxx.f new file mode 100644 index 0000000..6e167d8 --- /dev/null +++ b/dspl/liblapack/SRC/dsysvxx.f @@ -0,0 +1,696 @@ +*> \brief \b DSYSVXX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSVXX uses the diagonal pivoting factorization to compute the +*> solution to a double precision system of linear equations A * X = B, where A +*> is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. DSYSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> DSYSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> DSYSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what DSYSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is DOUBLE PRECISION array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by DSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by DSYTRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension (NPARAMS) +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYsolve +* +* ===================================================================== + SUBROUTINE DSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + DOUBLE PRECISION A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, DLA_SYRPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLA_SYRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL DSYEQUB, DSYTRF, DSYTRS, + $ DLACPY, DLAQSY, XERBLA, DLASCL2, DSYRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in DSYRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until DSYRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME(UPLO, 'U') .AND. + $ .NOT.LSAME(UPLO, 'L') ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL DSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL DLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL DSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF ( N.GT.0 ) + $ RPVGRW = DLA_SYRPVGRW(UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, WORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF ( N.GT.0 ) + $ RPVGRW = DLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, WORK ) +* +* Compute the solution matrix X. +* + CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL DSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL DSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL DLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of DSYSVXX +* + END diff --git a/dspl/liblapack/SRC/dsyswapr.f b/dspl/liblapack/SRC/dsyswapr.f new file mode 100644 index 0000000..6e6c0f7 --- /dev/null +++ b/dspl/liblapack/SRC/dsyswapr.f @@ -0,0 +1,193 @@ +*> \brief \b DSYSWAPR applies an elementary permutation on the rows and columns of a symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYSWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYSWAPR applies an elementary permutation on the rows and the columns of +*> a symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYauxiliary +* +* ===================================================================== + SUBROUTINE DSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL DSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=A(I1+I,I2) + A(I1+I,I2)=TMP + END DO +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from I1 to I1-1 + CALL DSWAP( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=A(I2,I1+I) + A(I2,I1+I)=TMP + END DO +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + END SUBROUTINE DSYSWAPR + diff --git a/dspl/liblapack/SRC/dsytd2.f b/dspl/liblapack/SRC/dsytd2.f new file mode 100644 index 0000000..6fb4d55 --- /dev/null +++ b/dspl/liblapack/SRC/dsytd2.f @@ -0,0 +1,323 @@ +*> \brief \b DSYTD2 reduces a symmetric matrix to real symmetric tridiagonal form by an orthogonal similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +*> form T by an orthogonal similarity transformation: Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, HALF + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, + $ HALF = 1.0D0 / 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DLARFG, DSYMV, DSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(1:i-1,i+1) +* + CALL DLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL DSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL DAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(i+2:n,i) +* + CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL DSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL DSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of DSYTD2 +* + END diff --git a/dspl/liblapack/SRC/dsytf2.f b/dspl/liblapack/SRC/dsytf2.f new file mode 100644 index 0000000..39ef4de --- /dev/null +++ b/dspl/liblapack/SRC/dsytf2.f @@ -0,0 +1,610 @@ +*> \brief \b DSYTF2 computes the factorization of a real symmetric indefinite matrix, using the diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTF2 computes the factorization of a real symmetric matrix A using +*> the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.204 and l.372 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + INTEGER IDAMAX + EXTERNAL LSAME, IDAMAX, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IDAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IDAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL DSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL DSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = ONE / A( K, K ) + CALL DSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IDAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IDAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL DSCAL( N-K, D11, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N +* + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE +* + A( J, K ) = WK + A( J, K+1 ) = WKP1 +* + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of DSYTF2 +* + END diff --git a/dspl/liblapack/SRC/dsytf2_rk.f b/dspl/liblapack/SRC/dsytf2_rk.f new file mode 100644 index 0000000..45cf62a --- /dev/null +++ b/dspl/liblapack/SRC/dsytf2_rk.f @@ -0,0 +1,943 @@ +*> \brief \b DSYTF2_RK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYTF2_RK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ ROWMAX, DTEMP, T, WK, WKM1, WKP1, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = ZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = ZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IDAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IDAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = ABS( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL DSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL DSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL DSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL DSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL DSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL DSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL DSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = ZERO + A( K-1, K ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = ZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = ZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IDAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IDAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = ABS( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL DSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL DSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL DSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL DSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL DSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL DSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = ZERO + A( K+1, K ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of DSYTF2_RK +* + END diff --git a/dspl/liblapack/SRC/dsytf2_rook.f b/dspl/liblapack/SRC/dsytf2_rook.f new file mode 100644 index 0000000..237c998 --- /dev/null +++ b/dspl/liblapack/SRC/dsytf2_rook.f @@ -0,0 +1,813 @@ +*> \brief \b DSYTF2_ROOK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTF2_ROOK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ ROWMAX, DTEMP, T, WK, WKM1, WKP1, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, DSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IDAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IDAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IDAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = ABS( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL DSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL DSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL DSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL DSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL DSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL DSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IDAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IDAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IDAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = ABS( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL DSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL DSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL DSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL DSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of DSYTF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsytrd.f b/dspl/liblapack/SRC/dsytrd.f new file mode 100644 index 0000000..d330b24 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrd.f @@ -0,0 +1,376 @@ +*> \brief \b DSYTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by an orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLATRD, DSYR2K, DSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'DSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'DSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**T - W*V**T +* + CALL DSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL DLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W**T - W*V**T +* + CALL DSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL DSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRD +* + END diff --git a/dspl/liblapack/SRC/dsytrd_2stage.f b/dspl/liblapack/SRC/dsytrd_2stage.f new file mode 100644 index 0000000..522602b --- /dev/null +++ b/dspl/liblapack/SRC/dsytrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b DSYTRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> d, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* DOUBLE PRECISION A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD_2STAGE reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q1**T Q2**T* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is DOUBLE PRECISION array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + DOUBLE PRECISION A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYTRD_SY2SB, DSYTRD_SB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV2STAGE( 1, 'DSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'DSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'DSYTRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'DSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL DSYTRD_SY2SB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SY2SB', -INFO ) + RETURN + END IF + CALL DSYTRD_SB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsytrd_sb2st.F b/dspl/liblapack/SRC/dsytrd_sb2st.F new file mode 100644 index 0000000..1e86000 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrd_sb2st.F @@ -0,0 +1,556 @@ +*> \brief \b DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD_SB2ST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the dsytrd_sy2sb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the dsytrd_sy2sb +*> routine has been called to produce AB (e.g., AB is +*> the output of dsytrd_sy2sb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is DOUBLE PRECISION array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup real16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RZERO + DOUBLE PRECISION ZERO, ONE + PARAMETER ( RZERO = 0.0D+0, + $ ZERO = 0.0D+0, + $ ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN +* .. +* .. External Subroutines .. + EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'DSYTRD_SB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SIDEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* real because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 50 CONTINUE +* + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Main code start here. +* Reduce the symmetric band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL DLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL DLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) EXIT + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + EXIT + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = ( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_SB2ST +* + END + diff --git a/dspl/liblapack/SRC/dsytrd_sy2sb.f b/dspl/liblapack/SRC/dsytrd_sy2sb.f new file mode 100644 index 0000000..85337f7 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrd_sy2sb.f @@ -0,0 +1,517 @@ +*> \brief \b DSYTRD_SY2SB +* +* @generated from zhetrd_he2hb.f, fortran z -> d, Wed Dec 7 08:22:39 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRD_SY2SB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric +*> band-diagonal form AB by a orthogonal similarity transformation: +*> Q**T * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +*> A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RONE + DOUBLE PRECISION ZERO, ONE, HALF + PARAMETER ( RONE = 1.0D+0, + $ ZERO = 0.0D+0, + $ ONE = 1.0D+0, + $ HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DSYR2K, DSYMM, DGEMM, DCOPY, + $ DLARFT, DGELQF, DGEQRF, DLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'DSYTRD_SY2SB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRD_SY2SB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL DCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL DCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL DLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL DGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL DLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL DLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL DGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL DSYMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL DGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL DGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL DSYR2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL DGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL DLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL DLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL DSYMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL DGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL DSYR2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL DCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of DSYTRD_SY2SB +* + END diff --git a/dspl/liblapack/SRC/dsytrf.f b/dspl/liblapack/SRC/dsytrf.f new file mode 100644 index 0000000..d8da4f1 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrf.f @@ -0,0 +1,363 @@ +*> \brief \b DSYTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRF computes the factorization of a real symmetric matrix A using +*> the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASYF, DSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'DSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by DLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL DLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK, + $ IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL DSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by DLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL DLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL DSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRF +* + END diff --git a/dspl/liblapack/SRC/dsytrf_aa.f b/dspl/liblapack/SRC/dsytrf_aa.f new file mode 100644 index 0000000..24b3f39 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrf_aa.f @@ -0,0 +1,467 @@ +*> \brief \b DSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRF_AA computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + DOUBLE PRECISION ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASYF_AA, DGEMM, DGEMV, DSCAL, DCOPY, DSWAP, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'DSYTRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL DCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by DLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL DLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL DSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL DCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL DSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with DGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL DGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with DGEMM +* + CALL DGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL DCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL DCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by DLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL DLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL DSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL DCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL DSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with DGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL DGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with DGEMM +* + CALL DGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL DCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of DSYTRF_AA +* + END diff --git a/dspl/liblapack/SRC/dsytrf_aa_2stage.f b/dspl/liblapack/SRC/dsytrf_aa_2stage.f new file mode 100644 index 0000000..2991305 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrf_aa_2stage.f @@ -0,0 +1,647 @@ +*> \brief \b DSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRF_AA_2STAGE computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + DOUBLE PRECISION PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DCOPY, DLACGV, DLACPY, + $ DLASET, DGBTRF, DGEMM, DGETRF, + $ DSYGST, DSWAP, DTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'DSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I .EQ. 1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL DLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL DGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL DGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL DSYGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL DGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call DGETRF +* + DO K = 1, NB + CALL DCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL DGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL DCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL DLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL DLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL DTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL DLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL DSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL DSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL DSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL DSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL DGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL DGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL DLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL DSYGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL DGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL DGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL DGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL DLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL DLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL DTRSM( 'R', 'L', 'T', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL DLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL DSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL DSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL DSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL DSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL DLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL DGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of DSYTRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsytrf_rk.f b/dspl/liblapack/SRC/dsytrf_rk.f new file mode 100644 index 0000000..e6fc4ec --- /dev/null +++ b/dspl/liblapack/SRC/dsytrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b DSYTRF_RK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYTRF_RK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASYF_RK, DSYTF2_RK, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'DSYTRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'DSYTRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by DLASYF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL DLASYF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL DSYTF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL DSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by DLASYF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL DLASYF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL DSYTF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL DSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRF_RK +* + END diff --git a/dspl/liblapack/SRC/dsytrf_rook.f b/dspl/liblapack/SRC/dsytrf_rook.f new file mode 100644 index 0000000..d269049 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrf_rook.f @@ -0,0 +1,393 @@ +*> \brief \b DSYTRF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRF_ROOK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLASYF_ROOK, DSYTF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'DSYTRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'DSYTRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by DLASYF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL DLASYF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL DSYTF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by DLASYF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL DLASYF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL DSYTF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYTRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsytri.f b/dspl/liblapack/SRC/dsytri.f new file mode 100644 index 0000000..f093a13 --- /dev/null +++ b/dspl/liblapack/SRC/dsytri.f @@ -0,0 +1,382 @@ +*> \brief \b DSYTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRI computes the inverse of a real symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> DSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSWAP, DSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ DDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL DCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ DDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL DCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of DSYTRI +* + END diff --git a/dspl/liblapack/SRC/dsytri2.f b/dspl/liblapack/SRC/dsytri2.f new file mode 100644 index 0000000..9aa21a8 --- /dev/null +++ b/dspl/liblapack/SRC/dsytri2.f @@ -0,0 +1,205 @@ +*> \brief \b DSYTRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRI2 computes the inverse of a DOUBLE PRECISION symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> DSYTRF. DSYTRI2 sets the LEADING DIMENSION of the workspace +*> before calling DSYTRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LDWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LDWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DSYTRI, DSYTRI2X, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL DSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of DSYTRI2 +* + END diff --git a/dspl/liblapack/SRC/dsytri2x.f b/dspl/liblapack/SRC/dsytri2x.f new file mode 100644 index 0000000..ae29f12 --- /dev/null +++ b/dspl/liblapack/SRC/dsytri2x.f @@ -0,0 +1,591 @@ +*> \brief \b DSYTRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRI2X computes the inverse of a real symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> DSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + DOUBLE PRECISION AK, AKKP1, AKP1, D, T + DOUBLE PRECISION U01_I_J, U01_IP1_J + DOUBLE PRECISION U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSYCONV, XERBLA, DTRTRI + EXTERNAL DGEMM, DTRMM, DSYSWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL DSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K+1,1) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = -AKKP1 / D + K=K+2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**T*invD1*U11->U11 +* + CALL DTRMM('L','U','T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**T*invD*U01->A(CUT+I,CUT+J) +* + CALL DGEMM('T','N',NNB,NNB,CUT,ONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* U11 = U11**T*invD1*U11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T*invD0*U01 +* + CALL DTRMM('L',UPLO,'T','U',CUT, NNB, + $ ONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL DSYSWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K-1,1) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = -AKKP1 / D + K=K-2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GT. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**T*invD1*L11->L11 +* + CALL DTRMM('L',UPLO,'T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) + +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**T*invD2*L21->A(CUT+I,CUT+J) +* + CALL DGEMM('T','N',NNB,NNB,N-NNB-CUT,ONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**T*invD1*L11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T*invD2*L21 +* + CALL DTRMM('L',UPLO,'T','U', N-NNB-CUT, NNB, + $ ONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) +* +* Update L21 +* + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + + ELSE +* +* L11 = L11**T*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP, I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of DSYTRI2X +* + END + diff --git a/dspl/liblapack/SRC/dsytri_3.f b/dspl/liblapack/SRC/dsytri_3.f new file mode 100644 index 0000000..d1980f8 --- /dev/null +++ b/dspl/liblapack/SRC/dsytri_3.f @@ -0,0 +1,248 @@ +*> \brief \b DSYTRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYTRI_3 computes the inverse of a real symmetric indefinite +*> matrix A using the factorization computed by DSYTRF_RK or DSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> DSYTRI_3 sets the leading dimension of the workspace before calling +*> DSYTRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by DSYTRF_RK and DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_RK or DSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DSYTRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'DSYTRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYTRI_3 +* + END diff --git a/dspl/liblapack/SRC/dsytri_3x.f b/dspl/liblapack/SRC/dsytri_3x.f new file mode 100644 index 0000000..d95e627 --- /dev/null +++ b/dspl/liblapack/SRC/dsytri_3x.f @@ -0,0 +1,645 @@ +*> \brief \b DSYTRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYTRI_3X computes the inverse of a real symmetric indefinite +*> matrix A using the factorization computed by DSYTRF_RK or DSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by DSYTRF_RK and DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_RK or DSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, + $ U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DSYSWAPR, DTRTRI, DTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. +* + CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / A( K, K ) + WORK( K, INVD+1 ) = ZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K+1, 1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-ONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = WORK( K, INVD+1 ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = ONE + DO J = 1, I-1 + WORK( U11+I, J ) = ZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**T * invD1 * U11 -> U11 +* + CALL DTRMM( 'L', 'U', 'T', 'U', NNB, NNB, + $ ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**T * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL DGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 ) + +* +* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T * invD0 * U01 +* + CALL DTRMM( 'L', UPLO, 'T', 'U', CUT, NNB, + $ ONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**T) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. +* + CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / A( K, K ) + WORK( K, INVD+1 ) = ZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K-1, 1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-ONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**T) = (inv(L))**T +* +* inv(L**T) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = ONE + DO J = I+1, NNB + WORK( U11+I, J ) = ZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**T * invD1 * L11 -> L11 +* + CALL DTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL DGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ ZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T * invD2 * L21 +* + CALL DTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**T * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**T) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of DSYTRI_3X +* + END + diff --git a/dspl/liblapack/SRC/dsytri_rook.f b/dspl/liblapack/SRC/dsytri_rook.f new file mode 100644 index 0000000..cad2a7e --- /dev/null +++ b/dspl/liblapack/SRC/dsytri_rook.f @@ -0,0 +1,450 @@ +*> \brief \b DSYTRI_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRI_ROOK computes the inverse of a real symmetric +*> matrix A using the factorization A = U*D*U**T or A = L*D*L**T +*> computed by DSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by DSYTRF_ROOK. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + DOUBLE PRECISION AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT + EXTERNAL LSAME, DDOT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSWAP, DSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ DDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL DCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k+1,1:k+1) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1)in the leading submatrix A(1:k+1,1:k+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ DDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL DCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k-1:n,k-1:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of DSYTRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/dsytrs.f b/dspl/liblapack/SRC/dsytrs.f new file mode 100644 index 0000000..e5988f2 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs.f @@ -0,0 +1,445 @@ +*> \brief \b DSYTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by DSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL DGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL DGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of DSYTRS +* + END diff --git a/dspl/liblapack/SRC/dsytrs2.f b/dspl/liblapack/SRC/dsytrs2.f new file mode 100644 index 0000000..c7ca8e9 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs2.f @@ -0,0 +1,361 @@ +*> \brief \b DSYTRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS2 solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by DSYTRF and converted by DSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF. +*> Note that A is input / output. This might be counter-intuitive, +*> and one may think that A is input only. A is input / output. This +*> is because, at the start of the subroutine, we permute A in a +*> "better" form and then we permute A back to its original form at +*> the end. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSYCONV, DSWAP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL DSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL DTRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + CALL DSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL DTRSM('L','U','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL DTRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + CALL DSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL DTRSM('L','L','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL DSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of DSYTRS2 +* + END diff --git a/dspl/liblapack/SRC/dsytrs_3.f b/dspl/liblapack/SRC/dsytrs_3.f new file mode 100644 index 0000000..2d61f6b --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs_3.f @@ -0,0 +1,371 @@ +*> \brief \b DSYTRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> DSYTRS_3 solves a system of linear equations A * X = B with a real +*> symmetric matrix A using the factorization computed +*> by DSYTRF_RK or DSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by DSYTRF_RK and DSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_RK or DSYTRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSWAP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL DTRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + CALL DSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL DTRSM( 'L', 'U', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL DTRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + CALL DSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL DTRSM('L', 'L', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of DSYTRS_3 +* + END diff --git a/dspl/liblapack/SRC/dsytrs_aa.f b/dspl/liblapack/SRC/dsytrs_aa.f new file mode 100644 index 0000000..05ef31f --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs_aa.f @@ -0,0 +1,285 @@ +*> \brief \b DSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS_AA solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by DSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> Details of factors computed by DSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by DSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLACPY, DGTSV, DSWAP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL DTRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL DLACPY( 'F', 1, N, A( 1, 1 ), LDA+1, WORK( N ), 1) + IF( N.GT.1 ) THEN + CALL DLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1 ) + CALL DLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL DGTSV( N, NRHS, WORK( 1 ), WORK( N ), WORK( 2*N ), B, LDB, + $ INFO ) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL DTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL DTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL DLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL DLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1 ) + CALL DLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL DGTSV( N, NRHS, WORK( 1 ), WORK(N), WORK( 2*N ), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL DTRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of DSYTRS_AA +* + END diff --git a/dspl/liblapack/SRC/dsytrs_aa_2stage.f b/dspl/liblapack/SRC/dsytrs_aa_2stage.f new file mode 100644 index 0000000..caff5d4 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b DSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by DSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> Details of factors computed by DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is DOUBLE PRECISION array, dimension (LTB) +*> Details of factors computed by DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> DSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + DOUBLE PRECISION A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGBTRS, DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL DTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL DGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL DTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL DTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL DGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL DTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL DLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of DSYTRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/dsytrs_rook.f b/dspl/liblapack/SRC/dsytrs_rook.f new file mode 100644 index 0000000..94a5e00 --- /dev/null +++ b/dspl/liblapack/SRC/dsytrs_rook.f @@ -0,0 +1,484 @@ +*> \brief \b DSYTRS_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYTRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYTRS_ROOK solves a system of linear equations A*X = B with +*> a real symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by DSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DGER, DSCAL, DSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYTRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL DGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.GT.2 ) THEN + CALL DGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), + $ LDB, B( 1, 1 ), LDB ) + CALL DGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) + $ CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of DSYTRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/dtbcon.f b/dspl/liblapack/SRC/dtbcon.f new file mode 100644 index 0000000..ec0d3a1 --- /dev/null +++ b/dspl/liblapack/SRC/dtbcon.f @@ -0,0 +1,284 @@ +*> \brief \b DTBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTBCON estimates the reciprocal of the condition number of a +*> triangular band matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTB + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTB +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATBS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL DLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB, + $ LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTBCON +* + END diff --git a/dspl/liblapack/SRC/dtbrfs.f b/dspl/liblapack/SRC/dtbrfs.f new file mode 100644 index 0000000..05bfb73 --- /dev/null +++ b/dspl/liblapack/SRC/dtbrfs.f @@ -0,0 +1,485 @@ +*> \brief \b DTBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTBRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular band +*> coefficient matrix. +*> +*> The solution matrix X must be computed by DTBTRS or some other +*> means before entering this routine. DTBRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DTBMV, DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ), + $ 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTBRFS +* + END diff --git a/dspl/liblapack/SRC/dtbtrs.f b/dspl/liblapack/SRC/dtbtrs.f new file mode 100644 index 0000000..e5fb876 --- /dev/null +++ b/dspl/liblapack/SRC/dtbtrs.f @@ -0,0 +1,244 @@ +*> \brief \b DTBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTBTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular band matrix of order N, and B is an +*> N-by NRHS matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is DOUBLE PRECISION array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B or A**T * X = B. +* + DO 30 J = 1, NRHS + CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of DTBTRS +* + END diff --git a/dspl/liblapack/SRC/dtfsm.f b/dspl/liblapack/SRC/dtfsm.f new file mode 100644 index 0000000..515f6f5 --- /dev/null +++ b/dspl/liblapack/SRC/dtfsm.f @@ -0,0 +1,1006 @@ +*> \brief \b DTFSM solves a matrix equation (one operand is a triangular matrix in RFP format). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTFSM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO +* INTEGER LDB, M, N +* DOUBLE PRECISION ALPHA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for A in RFP Format. +*> +*> DTFSM solves the matrix equation +*> +*> op( A )*X = alpha*B or X*op( A ) = alpha*B +*> +*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or +*> non-unit, upper or lower triangular matrix and op( A ) is one of +*> +*> op( A ) = A or op( A ) = A**T. +*> +*> A is in Rectangular Full Packed (RFP) Format. +*> +*> The matrix X is overwritten on B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'T': The Transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether op( A ) appears on the left +*> or right of X as follows: +*> +*> SIDE = 'L' or 'l' op( A )*X = alpha*B. +*> +*> SIDE = 'R' or 'r' X*op( A ) = alpha*B. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> UPLO = 'U' or 'u' RFP A came from an upper triangular matrix +*> UPLO = 'L' or 'l' RFP A came from a lower triangular matrix +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the form of op( A ) to be used +*> in the matrix multiplication as follows: +*> +*> TRANS = 'N' or 'n' op( A ) = A. +*> +*> TRANS = 'T' or 't' op( A ) = A'. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not RFP A is unit +*> triangular as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of B. M must be at +*> least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of B. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. When alpha is +*> zero then A is not referenced and B need not be set before +*> entry. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (NT) +*> NT = N*(N+1)/2. On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'T' then RFP is the transpose of RFP A as +*> defined when TRANSR = 'N'. The contents of RFP A are defined +*> by UPLO as follows: If UPLO = 'U' the RFP A contains the NT +*> elements of upper packed A either in normal or +*> transpose Format. If UPLO = 'L' the RFP A contains +*> the NT elements of lower packed A either in normal or +*> transpose Format. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and is N when is odd. +*> See the Note below for more details. Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> Before entry, the leading m by n part of the array B must +*> contain the right-hand side matrix B, and on exit is +*> overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, + $ B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO + INTEGER LDB, M, N + DOUBLE PRECISION ALPHA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LSIDE, MISODD, NISODD, NORMALTRANSR, + $ NOTRANS + INTEGER M1, M2, N1, N2, K, INFO, I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DGEMM, DTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LSIDE = LSAME( SIDE, 'L' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSIDE .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -4 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTFSM ', -INFO ) + RETURN + END IF +* +* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* +* Quick return when ALPHA.EQ.(0D+0) +* + IF( ALPHA.EQ.ZERO ) THEN + DO 20 J = 0, N - 1 + DO 10 I = 0, M - 1 + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* + IF( LSIDE ) THEN +* +* SIDE = 'L' +* +* A is M-by-M. +* If M is odd, set NISODD = .TRUE., and M1 and M2. +* If M is even, NISODD = .FALSE., and M. +* + IF( MOD( M, 2 ).EQ.0 ) THEN + MISODD = .FALSE. + K = M / 2 + ELSE + MISODD = .TRUE. + IF( LOWER ) THEN + M2 = M / 2 + M1 = M - M2 + ELSE + M1 = M / 2 + M2 = M - M1 + END IF + END IF +* +* + IF( MISODD ) THEN +* +* SIDE = 'L' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL DTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A, M, B, LDB ) + ELSE + CALL DTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + CALL DGEMM( 'N', 'N', M2, N, M1, -ONE, A( M1 ), + $ M, B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL DTRSM( 'L', 'U', 'T', DIAG, M2, N, ONE, + $ A( M ), M, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'T' +* + IF( M.EQ.1 ) THEN + CALL DTRSM( 'L', 'L', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + ELSE + CALL DTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M ), M, B( M1, 0 ), LDB ) + CALL DGEMM( 'T', 'N', M1, N, M2, -ONE, A( M1 ), + $ M, B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'L', 'T', DIAG, M1, N, ONE, + $ A( 0 ), M, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL DTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( M2 ), M, B, LDB ) + CALL DGEMM( 'T', 'N', M2, N, M1, -ONE, A( 0 ), M, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL DTRSM( 'L', 'U', 'T', DIAG, M2, N, ONE, + $ A( M1 ), M, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'T' +* + CALL DTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M1 ), M, B( M1, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M1, N, M2, -ONE, A( 0 ), M, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'L', 'T', DIAG, M1, N, ONE, + $ A( M2 ), M, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL DTRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL DTRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + CALL DGEMM( 'T', 'N', M2, N, M1, -ONE, + $ A( M1*M1 ), M1, B, LDB, ALPHA, + $ B( M1, 0 ), LDB ) + CALL DTRSM( 'L', 'L', 'N', DIAG, M2, N, ONE, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'T' +* + IF( M.EQ.1 ) THEN + CALL DTRSM( 'L', 'U', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL DTRSM( 'L', 'L', 'T', DIAG, M2, N, ALPHA, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M1, N, M2, -ONE, + $ A( M1*M1 ), M1, B( M1, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL DTRSM( 'L', 'U', 'N', DIAG, M1, N, ONE, + $ A( 0 ), M1, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'N' +* + CALL DTRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( M2*M2 ), M2, B, LDB ) + CALL DGEMM( 'N', 'N', M2, N, M1, -ONE, A( 0 ), M2, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL DTRSM( 'L', 'L', 'N', DIAG, M2, N, ONE, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'T' +* + CALL DTRSM( 'L', 'L', 'T', DIAG, M2, N, ALPHA, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) + CALL DGEMM( 'T', 'N', M1, N, M2, -ONE, A( 0 ), M2, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'U', 'N', DIAG, M1, N, ONE, + $ A( M2*M2 ), M2, B, LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL DTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( 1 ), M+1, B, LDB ) + CALL DGEMM( 'N', 'N', K, N, K, -ONE, A( K+1 ), + $ M+1, B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL DTRSM( 'L', 'U', 'T', DIAG, K, N, ONE, + $ A( 0 ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'T' +* + CALL DTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( 0 ), M+1, B( K, 0 ), LDB ) + CALL DGEMM( 'T', 'N', K, N, K, -ONE, A( K+1 ), + $ M+1, B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'L', 'T', DIAG, K, N, ONE, + $ A( 1 ), M+1, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL DTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( K+1 ), M+1, B, LDB ) + CALL DGEMM( 'T', 'N', K, N, K, -ONE, A( 0 ), M+1, + $ B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL DTRSM( 'L', 'U', 'T', DIAG, K, N, ONE, + $ A( K ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'T' + CALL DTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( K ), M+1, B( K, 0 ), LDB ) + CALL DGEMM( 'N', 'N', K, N, K, -ONE, A( 0 ), M+1, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'L', 'T', DIAG, K, N, ONE, + $ A( K+1 ), M+1, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'N' +* + CALL DTRSM( 'L', 'U', 'T', DIAG, K, N, ALPHA, + $ A( K ), K, B, LDB ) + CALL DGEMM( 'T', 'N', K, N, K, -ONE, + $ A( K*( K+1 ) ), K, B, LDB, ALPHA, + $ B( K, 0 ), LDB ) + CALL DTRSM( 'L', 'L', 'N', DIAG, K, N, ONE, + $ A( 0 ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'T' +* + CALL DTRSM( 'L', 'L', 'T', DIAG, K, N, ALPHA, + $ A( 0 ), K, B( K, 0 ), LDB ) + CALL DGEMM( 'N', 'N', K, N, K, -ONE, + $ A( K*( K+1 ) ), K, B( K, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL DTRSM( 'L', 'U', 'N', DIAG, K, N, ONE, + $ A( K ), K, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'N' +* + CALL DTRSM( 'L', 'U', 'T', DIAG, K, N, ALPHA, + $ A( K*( K+1 ) ), K, B, LDB ) + CALL DGEMM( 'N', 'N', K, N, K, -ONE, A( 0 ), K, B, + $ LDB, ALPHA, B( K, 0 ), LDB ) + CALL DTRSM( 'L', 'L', 'N', DIAG, K, N, ONE, + $ A( K*K ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'T' +* + CALL DTRSM( 'L', 'L', 'T', DIAG, K, N, ALPHA, + $ A( K*K ), K, B( K, 0 ), LDB ) + CALL DGEMM( 'T', 'N', K, N, K, -ONE, A( 0 ), K, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL DTRSM( 'L', 'U', 'N', DIAG, K, N, ONE, + $ A( K*( K+1 ) ), K, B, LDB ) +* + END IF +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' +* +* A is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and K. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + K = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* SIDE = 'R' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + CALL DTRSM( 'R', 'U', 'T', DIAG, M, N2, ALPHA, + $ A( N ), N, B( 0, N1 ), LDB ) + CALL DGEMM( 'N', 'N', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, 0 ), + $ LDB ) + CALL DTRSM( 'R', 'L', 'N', DIAG, M, N1, ONE, + $ A( 0 ), N, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'T' +* + CALL DTRSM( 'R', 'L', 'T', DIAG, M, N1, ALPHA, + $ A( 0 ), N, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'T', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'N', DIAG, M, N2, ONE, + $ A( N ), N, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL DTRSM( 'R', 'L', 'T', DIAG, M, N1, ALPHA, + $ A( N2 ), N, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'N', DIAG, M, N2, ONE, + $ A( N1 ), N, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'T' +* + CALL DTRSM( 'R', 'U', 'T', DIAG, M, N2, ALPHA, + $ A( N1 ), N, B( 0, N1 ), LDB ) + CALL DGEMM( 'N', 'T', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, 0 ), LDB ) + CALL DTRSM( 'R', 'L', 'N', DIAG, M, N1, ONE, + $ A( N2 ), N, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'N' +* + CALL DTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( 1 ), N1, B( 0, N1 ), LDB ) + CALL DGEMM( 'N', 'T', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'T', DIAG, M, N1, ONE, + $ A( 0 ), N1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'T' +* + CALL DTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( 0 ), N1, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, N1 ), + $ LDB ) + CALL DTRSM( 'R', 'L', 'T', DIAG, M, N2, ONE, + $ A( 1 ), N1, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'N' +* + CALL DTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'T', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, N1 ), + $ LDB ) + CALL DTRSM( 'R', 'L', 'T', DIAG, M, N2, ONE, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'T' +* + CALL DTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) + CALL DGEMM( 'N', 'N', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, 0 ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'T', DIAG, M, N1, ONE, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL DTRSM( 'R', 'U', 'T', DIAG, M, K, ALPHA, + $ A( 0 ), N+1, B( 0, K ), LDB ) + CALL DGEMM( 'N', 'N', M, K, K, -ONE, B( 0, K ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL DTRSM( 'R', 'L', 'N', DIAG, M, K, ONE, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'T' +* + CALL DTRSM( 'R', 'L', 'T', DIAG, M, K, ALPHA, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'T', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'N', DIAG, M, K, ONE, + $ A( 0 ), N+1, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL DTRSM( 'R', 'L', 'T', DIAG, M, K, ALPHA, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL DTRSM( 'R', 'U', 'N', DIAG, M, K, ONE, + $ A( K ), N+1, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'T' +* + CALL DTRSM( 'R', 'U', 'T', DIAG, M, K, ALPHA, + $ A( K ), N+1, B( 0, K ), LDB ) + CALL DGEMM( 'N', 'T', M, K, K, -ONE, B( 0, K ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL DTRSM( 'R', 'L', 'N', DIAG, M, K, ONE, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'N' +* + CALL DTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( 0 ), K, B( 0, K ), LDB ) + CALL DGEMM( 'N', 'T', M, K, K, -ONE, B( 0, K ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, 0 ), LDB ) + CALL DTRSM( 'R', 'U', 'T', DIAG, M, K, ONE, + $ A( K ), K, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'T' +* + CALL DTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( K ), K, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'N', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, K ), LDB ) + CALL DTRSM( 'R', 'L', 'T', DIAG, M, K, ONE, + $ A( 0 ), K, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'N' +* + CALL DTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) + CALL DGEMM( 'N', 'T', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), K, ALPHA, B( 0, K ), LDB ) + CALL DTRSM( 'R', 'L', 'T', DIAG, M, K, ONE, + $ A( K*K ), K, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'T' +* + CALL DTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( K*K ), K, B( 0, K ), LDB ) + CALL DGEMM( 'N', 'N', M, K, K, -ONE, B( 0, K ), + $ LDB, A( 0 ), K, ALPHA, B( 0, 0 ), LDB ) + CALL DTRSM( 'R', 'U', 'T', DIAG, M, K, ONE, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + END IF + END IF +* + RETURN +* +* End of DTFSM +* + END diff --git a/dspl/liblapack/SRC/dtftri.f b/dspl/liblapack/SRC/dtftri.f new file mode 100644 index 0000000..9debec9 --- /dev/null +++ b/dspl/liblapack/SRC/dtftri.f @@ -0,0 +1,472 @@ +*> \brief \b DTFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO, DIAG +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTFTRI computes the inverse of a triangular matrix A stored in RFP +*> format. +*> +*> This is a Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (0:nt-1); +*> nt=N*(N+1)/2. On entry, the triangular factor of a Hermitian +*> Positive Definite matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A; If UPLO = 'L' the RFP A contains the nt +*> elements of lower packed A. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and N is odd. See the Note below for more details. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO, DIAG + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DTRMM, DTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL DTRTRI( 'L', DIAG, N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'L', 'N', DIAG, N2, N1, -ONE, A( 0 ), + $ N, A( N1 ), N ) + CALL DTRTRI( 'U', DIAG, N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'U', 'T', DIAG, N2, N1, ONE, A( N ), N, + $ A( N1 ), N ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL DTRTRI( 'L', DIAG, N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'L', 'T', DIAG, N1, N2, -ONE, A( N2 ), + $ N, A( 0 ), N ) + CALL DTRTRI( 'U', DIAG, N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'U', 'N', DIAG, N1, N2, ONE, A( N1 ), + $ N, A( 0 ), N ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) +* + CALL DTRTRI( 'U', DIAG, N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'U', 'N', DIAG, N1, N2, -ONE, A( 0 ), + $ N1, A( N1*N1 ), N1 ) + CALL DTRTRI( 'L', DIAG, N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'L', 'T', DIAG, N1, N2, ONE, A( 1 ), + $ N1, A( N1*N1 ), N1 ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) +* + CALL DTRTRI( 'U', DIAG, N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'U', 'T', DIAG, N2, N1, -ONE, + $ A( N2*N2 ), N2, A( 0 ), N2 ) + CALL DTRTRI( 'L', DIAG, N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'L', 'N', DIAG, N2, N1, ONE, + $ A( N1*N2 ), N2, A( 0 ), N2 ) + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL DTRTRI( 'L', DIAG, K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'L', 'N', DIAG, K, K, -ONE, A( 1 ), + $ N+1, A( K+1 ), N+1 ) + CALL DTRTRI( 'U', DIAG, K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'U', 'T', DIAG, K, K, ONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL DTRTRI( 'L', DIAG, K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'L', 'T', DIAG, K, K, -ONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL DTRTRI( 'U', DIAG, K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'U', 'N', DIAG, K, K, ONE, A( K ), N+1, + $ A( 0 ), N+1 ) + END IF + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL DTRTRI( 'U', DIAG, K, A( K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'U', 'N', DIAG, K, K, -ONE, A( K ), K, + $ A( K*( K+1 ) ), K ) + CALL DTRTRI( 'L', DIAG, K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'L', 'T', DIAG, K, K, ONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL DTRTRI( 'U', DIAG, K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'R', 'U', 'T', DIAG, K, K, -ONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL DTRTRI( 'L', DIAG, K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL DTRMM( 'L', 'L', 'N', DIAG, K, K, ONE, A( K*K ), K, + $ A( 0 ), K ) + END IF + END IF + END IF +* + RETURN +* +* End of DTFTRI +* + END diff --git a/dspl/liblapack/SRC/dtfttp.f b/dspl/liblapack/SRC/dtfttp.f new file mode 100644 index 0000000..c292982 --- /dev/null +++ b/dspl/liblapack/SRC/dtfttp.f @@ -0,0 +1,517 @@ +*> \brief \b DTFTTP copies a triangular matrix from the rectangular full packed format (TF) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTFTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTFTTP copies a triangular matrix A from rectangular full packed +*> format (TF) to standard packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'T': ARF is in Transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTFTTP', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + AP( 0 ) = ARF( 0 ) + ELSE + AP( 0 ) = ARF( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DTFTTP +* + END diff --git a/dspl/liblapack/SRC/dtfttr.f b/dspl/liblapack/SRC/dtfttr.f new file mode 100644 index 0000000..bb1c622 --- /dev/null +++ b/dspl/liblapack/SRC/dtfttr.f @@ -0,0 +1,495 @@ +*> \brief \b DTFTTR copies a triangular matrix from the rectangular full packed format (TF) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTFTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTFTTR copies a triangular matrix A from rectangular full packed +*> format (TF) to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'T': ARF is in Transpose format. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices ARF and A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is DOUBLE PRECISION array, dimension (N*(N+1)/2). +*> On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') +*> matrix A in RFP format. See the "Notes" below for more +*> details. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On exit, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT, NX2, NP1X2 + INTEGER I, J, L, IJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTFTTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + A( 0, 0 ) = ARF( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + A( N2+J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + A( J-N1, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + A( I, N1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + A( N2+J, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + A( K+J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + A( J-K, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + J = K + DO I = K, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + A( I, K+1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + A( K+1+J, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* Note that here, on exit of the loop, J = K-1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DTFTTR +* + END diff --git a/dspl/liblapack/SRC/dtgevc.f b/dspl/liblapack/SRC/dtgevc.f new file mode 100644 index 0000000..756474c --- /dev/null +++ b/dspl/liblapack/SRC/dtgevc.f @@ -0,0 +1,1211 @@ +*> \brief \b DTGEVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGEVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, +* LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION P( LDP, * ), S( LDS, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGEVC computes some or all of the right and/or left eigenvectors of +*> a pair of real matrices (S,P), where S is a quasi-triangular matrix +*> and P is upper triangular. Matrix pairs of this type are produced by +*> the generalized Schur factorization of a matrix pair (A,B): +*> +*> A = Q*S*Z**T, B = Q*P*Z**T +*> +*> as computed by DGGHRD + DHGEQZ. +*> +*> The right eigenvector x and the left eigenvector y of (S,P) +*> corresponding to an eigenvalue w are defined by: +*> +*> S*x = w*P*x, (y**H)*S = w*(y**H)*P, +*> +*> where y**H denotes the conjugate tranpose of y. +*> The eigenvalues are not input to this routine, but are computed +*> directly from the diagonal blocks of S and P. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of (S,P), or the products Z*X and/or Q*Y, +*> where Z and Q are input matrices. +*> If Q and Z are the orthogonal factors from the generalized Schur +*> factorization of a matrix pair (A,B), then Z*X and Q*Y +*> are the matrices of right and left eigenvectors of (A,B). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> specified by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY='S', SELECT specifies the eigenvectors to be +*> computed. If w(j) is a real eigenvalue, the corresponding +*> real eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector +*> is computed if either SELECT(j) or SELECT(j+1) is .TRUE., +*> and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is +*> set to .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices S and P. N >= 0. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (LDS,N) +*> The upper quasi-triangular matrix S from a generalized Schur +*> factorization, as computed by DHGEQZ. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of array S. LDS >= max(1,N). +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is DOUBLE PRECISION array, dimension (LDP,N) +*> The upper triangular matrix P from a generalized Schur +*> factorization, as computed by DHGEQZ. +*> 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks +*> of S must be in positive diagonal form. +*> \endverbatim +*> +*> \param[in] LDP +*> \verbatim +*> LDP is INTEGER +*> The leading dimension of array P. LDP >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of left Schur vectors returned by DHGEQZ). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VL, in the same order as their eigenvalues. +*> +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Z (usually the orthogonal matrix Z +*> of right Schur vectors returned by DHGEQZ). +*> +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); +*> if HOWMNY = 'B' or 'b', the matrix Z*X; +*> if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) +*> specified by SELECT, stored consecutively in the +*> columns of VR, in the same order as their +*> eigenvalues. +*> +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected real eigenvector occupies one +*> column and each selected complex eigenvector occupies two +*> columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (6*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Allocation of workspace: +*> ---------- -- --------- +*> +*> WORK( j ) = 1-norm of j-th column of A, above the diagonal +*> WORK( N+j ) = 1-norm of j-th column of B, above the diagonal +*> WORK( 2*N+1:3*N ) = real part of eigenvector +*> WORK( 3*N+1:4*N ) = imaginary part of eigenvector +*> WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector +*> WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector +*> +*> Rowwise vs. columnwise solution methods: +*> ------- -- ---------- -------- ------- +*> +*> Finding a generalized eigenvector consists basically of solving the +*> singular triangular system +*> +*> (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) +*> +*> Consider finding the i-th right eigenvector (assume all eigenvalues +*> are real). The equation to be solved is: +*> n i +*> 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 +*> k=j k=j +*> +*> where C = (A - w B) (The components v(i+1:n) are 0.) +*> +*> The "rowwise" method is: +*> +*> (1) v(i) := 1 +*> for j = i-1,. . .,1: +*> i +*> (2) compute s = - sum C(j,k) v(k) and +*> k=j+1 +*> +*> (3) v(j) := s / C(j,j) +*> +*> Step 2 is sometimes called the "dot product" step, since it is an +*> inner product between the j-th row and the portion of the eigenvector +*> that has been computed so far. +*> +*> The "columnwise" method consists basically in doing the sums +*> for all the rows in parallel. As each v(j) is computed, the +*> contribution of v(j) times the j-th column of C is added to the +*> partial sums. Since FORTRAN arrays are stored columnwise, this has +*> the advantage that at each step, the elements of C that are accessed +*> are adjacent to one another, whereas with the rowwise method, the +*> elements accessed at a step are spaced LDS (and LDP) words apart. +*> +*> When finding left eigenvectors, the matrix in question is the +*> transpose of the one in storage, so the rowwise method then +*> actually accesses columns of A and B at each step, and so is the +*> preferred method. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, + $ LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION P( LDP, * ), S( LDS, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, SAFETY + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ SAFETY = 1.0D+2 ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK, + $ ILBBAD, ILCOMP, ILCPLX, LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE, + $ J, JA, JC, JE, JR, JW, NA, NW + DOUBLE PRECISION ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI, + $ BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A, + $ CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA, + $ CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE, + $ SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX, + $ XSCALE +* .. +* .. Local Arrays .. + DOUBLE PRECISION BDIAG( 2 ), SUM( 2, 2 ), SUMS( 2, 2 ), + $ SUMP( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLABAD, DLACPY, DLAG2, DLALN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + ILALL = .TRUE. + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDS.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDP.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors to be computed +* + IF( .NOT.ILALL ) THEN + IM = 0 + ILCPLX = .FALSE. + DO 10 J = 1, N + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 10 + END IF + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) + $ ILCPLX = .TRUE. + END IF + IF( ILCPLX ) THEN + IF( SELECT( J ) .OR. SELECT( J+1 ) ) + $ IM = IM + 2 + ELSE + IF( SELECT( J ) ) + $ IM = IM + 1 + END IF + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check 2-by-2 diagonal blocks of A, B +* + ILABAD = .FALSE. + ILBBAD = .FALSE. + DO 20 J = 1, N - 1 + IF( S( J+1, J ).NE.ZERO ) THEN + IF( P( J, J ).EQ.ZERO .OR. P( J+1, J+1 ).EQ.ZERO .OR. + $ P( J, J+1 ).NE.ZERO )ILBBAD = .TRUE. + IF( J.LT.N-1 ) THEN + IF( S( J+2, J+1 ).NE.ZERO ) + $ ILABAD = .TRUE. + END IF + END IF + 20 CONTINUE +* + IF( ILABAD ) THEN + INFO = -5 + ELSE IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = DLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL DLABAD( SAFMIN, BIG ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part (i.e., excluding all elements belonging to the diagonal +* blocks) of A and B to check for possible overflow in the +* triangular solver. +* + ANORM = ABS( S( 1, 1 ) ) + IF( N.GT.1 ) + $ ANORM = ANORM + ABS( S( 2, 1 ) ) + BNORM = ABS( P( 1, 1 ) ) + WORK( 1 ) = ZERO + WORK( N+1 ) = ZERO +* + DO 50 J = 2, N + TEMP = ZERO + TEMP2 = ZERO + IF( S( J, J-1 ).EQ.ZERO ) THEN + IEND = J - 1 + ELSE + IEND = J - 2 + END IF + DO 30 I = 1, IEND + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 30 CONTINUE + WORK( J ) = TEMP + WORK( N+J ) = TEMP2 + DO 40 I = IEND + 1, MIN( J+1, N ) + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 40 CONTINUE + ANORM = MAX( ANORM, TEMP ) + BNORM = MAX( BNORM, TEMP2 ) + 50 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 220 JE = 1, N +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at. +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 220 + END IF + NW = 1 + IF( JE.LT.N ) THEN + IF( S( JE+1, JE ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 220 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + IEIG = IEIG + 1 + DO 60 JR = 1, N + VL( JR, IEIG ) = ZERO + 60 CONTINUE + VL( IEIG, IEIG ) = ONE + GO TO 220 + END IF + END IF +* +* Clear vector +* + DO 70 JR = 1, NW*N + WORK( 2*N+JR ) = ZERO + 70 CONTINUE +* T +* Compute coefficients in ( a A - b B ) y = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE + ELSE +* +* Complex eigenvalue +* + CALL DLAG2( S( JE, JE ), LDS, P( JE, JE ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + BCOEFI = -BCOEFI + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* + TEMP = ACOEF*S( JE+1, JE ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE+1 ) = -TEMP2R / TEMP + WORK( 3*N+JE+1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE+1 ) = ONE + WORK( 3*N+JE+1 ) = ZERO + TEMP = ACOEF*S( JE, JE+1 ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE+1, JE+1 )-ACOEF* + $ S( JE+1, JE+1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE+1, JE+1 ) / TEMP + END IF + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) ) + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* T +* Triangular solve of (a A - b B) y = 0 +* +* T +* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) +* + IL2BY2 = .FALSE. +* + DO 160 J = JE + NW, N + IF( IL2BY2 ) THEN + IL2BY2 = .FALSE. + GO TO 160 + END IF +* + NA = 1 + BDIAG( 1 ) = P( J, J ) + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + BDIAG( 2 ) = P( J+1, J+1 ) + NA = 2 + END IF + END IF +* +* Check whether scaling is necessary for dot products +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = MAX( WORK( J ), WORK( N+J ), + $ ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ), + $ ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN + DO 90 JW = 0, NW - 1 + DO 80 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 80 CONTINUE + 90 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute dot products +* +* j-1 +* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) +* k=je +* +* To reduce the op count, this is done as +* +* _ j-1 _ j-1 +* a*conjg( sum S(k,j)*x(k) ) - b*conjg( sum P(k,j)*x(k) ) +* k=je k=je +* +* which may cause underflow problems if A or B are close +* to underflow. (E.g., less than SMALL.) +* +* + DO 120 JW = 1, NW + DO 110 JA = 1, NA + SUMS( JA, JW ) = ZERO + SUMP( JA, JW ) = ZERO +* + DO 100 JR = JE, J - 1 + SUMS( JA, JW ) = SUMS( JA, JW ) + + $ S( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + SUMP( JA, JW ) = SUMP( JA, JW ) + + $ P( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + 100 CONTINUE + 110 CONTINUE + 120 CONTINUE +* + DO 130 JA = 1, NA + IF( ILCPLX ) THEN + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) - + $ BCOEFI*SUMP( JA, 2 ) + SUM( JA, 2 ) = -ACOEF*SUMS( JA, 2 ) + + $ BCOEFR*SUMP( JA, 2 ) + + $ BCOEFI*SUMP( JA, 1 ) + ELSE + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) + END IF + 130 CONTINUE +* +* T +* Solve ( a A - b B ) y = SUM(,) +* with scaling and perturbation of the denominator +* + CALL DLALN2( .TRUE., NA, NW, DMIN, ACOEF, S( J, J ), LDS, + $ BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR, + $ BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN + DO 150 JW = 0, NW - 1 + DO 140 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 140 CONTINUE + 150 CONTINUE + XMAX = SCALE*XMAX + END IF + XMAX = MAX( XMAX, TEMP ) + 160 CONTINUE +* +* Copy eigenvector to VL, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG + 1 + IF( ILBACK ) THEN + DO 170 JW = 0, NW - 1 + CALL DGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL, + $ WORK( ( JW+2 )*N+JE ), 1, ZERO, + $ WORK( ( JW+4 )*N+1 ), 1 ) + 170 CONTINUE + CALL DLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ), + $ LDVL ) + IBEG = 1 + ELSE + CALL DLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ), + $ LDVL ) + IBEG = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 180 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+ + $ ABS( VL( J, IEIG+1 ) ) ) + 180 CONTINUE + ELSE + DO 190 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) ) + 190 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX +* + DO 210 JW = 0, NW - 1 + DO 200 JR = IBEG, N + VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW ) + 200 CONTINUE + 210 CONTINUE + END IF + IEIG = IEIG + NW - 1 +* + 220 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 500 JE = N, 1, -1 +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) +* or SELECT(JE-1). +* If this is a complex pair, the 2-by-2 diagonal block +* corresponding to the eigenvalue is in rows/columns JE-1:JE +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 500 + END IF + NW = 1 + IF( JE.GT.1 ) THEN + IF( S( JE, JE-1 ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 500 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- unit eigenvector +* + IEIG = IEIG - 1 + DO 230 JR = 1, N + VR( JR, IEIG ) = ZERO + 230 CONTINUE + VR( IEIG, IEIG ) = ONE + GO TO 500 + END IF + END IF +* +* Clear vector +* + DO 250 JW = 0, NW - 1 + DO 240 JR = 1, N + WORK( ( JW+2 )*N+JR ) = ZERO + 240 CONTINUE + 250 CONTINUE +* +* Compute coefficients in ( a A - b B ) x = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE +* +* Compute contribution from column JE of A and B to sum +* (See "Further Details", above.) +* + DO 260 JR = 1, JE - 1 + WORK( 2*N+JR ) = BCOEFR*P( JR, JE ) - + $ ACOEF*S( JR, JE ) + 260 CONTINUE + ELSE +* +* Complex eigenvalue +* + CALL DLAG2( S( JE-1, JE-1 ), LDS, P( JE-1, JE-1 ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE - 1 + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* and contribution to sums +* + TEMP = ACOEF*S( JE, JE-1 ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE-1 ) = -TEMP2R / TEMP + WORK( 3*N+JE-1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE-1 ) = ONE + WORK( 3*N+JE-1 ) = ZERO + TEMP = ACOEF*S( JE-1, JE ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE-1, JE-1 )-ACOEF* + $ S( JE-1, JE-1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE-1, JE-1 ) / TEMP + END IF +* + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) ) +* +* Compute contribution from columns JE and JE-1 +* of A and B to the sums. +* + CREALA = ACOEF*WORK( 2*N+JE-1 ) + CIMAGA = ACOEF*WORK( 3*N+JE-1 ) + CREALB = BCOEFR*WORK( 2*N+JE-1 ) - + $ BCOEFI*WORK( 3*N+JE-1 ) + CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) + + $ BCOEFR*WORK( 3*N+JE-1 ) + CRE2A = ACOEF*WORK( 2*N+JE ) + CIM2A = ACOEF*WORK( 3*N+JE ) + CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE ) + CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE ) + DO 270 JR = 1, JE - 2 + WORK( 2*N+JR ) = -CREALA*S( JR, JE-1 ) + + $ CREALB*P( JR, JE-1 ) - + $ CRE2A*S( JR, JE ) + CRE2B*P( JR, JE ) + WORK( 3*N+JR ) = -CIMAGA*S( JR, JE-1 ) + + $ CIMAGB*P( JR, JE-1 ) - + $ CIM2A*S( JR, JE ) + CIM2B*P( JR, JE ) + 270 CONTINUE + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Columnwise triangular solve of (a A - b B) x = 0 +* + IL2BY2 = .FALSE. + DO 370 J = JE - NW, 1, -1 +* +* If a 2-by-2 block, is in position j-1:j, wait until +* next iteration to process it (when it will be j:j+1) +* + IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN + IF( S( J, J-1 ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + GO TO 370 + END IF + END IF + BDIAG( 1 ) = P( J, J ) + IF( IL2BY2 ) THEN + NA = 2 + BDIAG( 2 ) = P( J+1, J+1 ) + ELSE + NA = 1 + END IF +* +* Compute x(j) (and x(j+1), if 2-by-2 block) +* + CALL DLALN2( .FALSE., NA, NW, DMIN, ACOEF, S( J, J ), + $ LDS, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ), + $ N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN +* + DO 290 JW = 0, NW - 1 + DO 280 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 280 CONTINUE + 290 CONTINUE + END IF + XMAX = MAX( SCALE*XMAX, TEMP ) +* + DO 310 JW = 1, NW + DO 300 JA = 1, NA + WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW ) + 300 CONTINUE + 310 CONTINUE +* +* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling +* + IF( J.GT.1 ) THEN +* +* Check whether scaling is necessary for sum. +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA* + $ WORK( N+J+1 ) ) + TEMP = MAX( TEMP, ACOEFA, BCOEFA ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN +* + DO 330 JW = 0, NW - 1 + DO 320 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 320 CONTINUE + 330 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute the contributions of the off-diagonals of +* column j (and j+1, if 2-by-2 block) of A and B to the +* sums. +* +* + DO 360 JA = 1, NA + IF( ILCPLX ) THEN + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CIMAGA = ACOEF*WORK( 3*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) - + $ BCOEFI*WORK( 3*N+J+JA-1 ) + CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) + + $ BCOEFR*WORK( 3*N+J+JA-1 ) + DO 340 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + WORK( 3*N+JR ) = WORK( 3*N+JR ) - + $ CIMAGA*S( JR, J+JA-1 ) + + $ CIMAGB*P( JR, J+JA-1 ) + 340 CONTINUE + ELSE + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) + DO 350 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF +* + IL2BY2 = .FALSE. + 370 CONTINUE +* +* Copy eigenvector to VR, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG - NW + IF( ILBACK ) THEN +* + DO 410 JW = 0, NW - 1 + DO 380 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )* + $ VR( JR, 1 ) + 380 CONTINUE +* +* A series of compiler directives to defeat +* vectorization for the next loop +* +* + DO 400 JC = 2, JE + DO 390 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) + + $ WORK( ( JW+2 )*N+JC )*VR( JR, JC ) + 390 CONTINUE + 400 CONTINUE + 410 CONTINUE +* + DO 430 JW = 0, NW - 1 + DO 420 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR ) + 420 CONTINUE + 430 CONTINUE +* + IEND = N + ELSE + DO 450 JW = 0, NW - 1 + DO 440 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR ) + 440 CONTINUE + 450 CONTINUE +* + IEND = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 460 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+ + $ ABS( VR( J, IEIG+1 ) ) ) + 460 CONTINUE + ELSE + DO 470 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) ) + 470 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX + DO 490 JW = 0, NW - 1 + DO 480 JR = 1, IEND + VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW ) + 480 CONTINUE + 490 CONTINUE + END IF + 500 CONTINUE + END IF +* + RETURN +* +* End of DTGEVC +* + END diff --git a/dspl/liblapack/SRC/dtgex2.f b/dspl/liblapack/SRC/dtgex2.f new file mode 100644 index 0000000..93ff03a --- /dev/null +++ b/dspl/liblapack/SRC/dtgex2.f @@ -0,0 +1,697 @@ +*> \brief \b DTGEX2 swaps adjacent diagonal blocks in an upper (quasi) triangular matrix pair by an orthogonal equivalence transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGEX2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) +*> of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair +*> (A, B) by an orthogonal equivalence transformation. +*> +*> (A, B) must be in generalized real Schur canonical form (as returned +*> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +*> diagonal blocks. B is upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T +*> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimensions (LDA,N) +*> On entry, the matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimensions (LDB,N) +*> On entry, the matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +*> On exit, the updated matrix Q. +*> Not referenced if WANTQ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> On entry, if WANTZ =.TRUE., the orthogonal matrix Z. +*> On exit, the updated matrix Z. +*> Not referenced if WANTZ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index to the first block (A11, B11). 1 <= J1 <= N. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The order of the first block (A11, B11). N1 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> The order of the second block (A22, B22). N2 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)). +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX( 1, N*(N2+N1), (N2+N1)*(N2+N1)*2 ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit +*> >0: If INFO = 1, the transformed matrix (A, B) would be +*> too far from generalized Schur form; the blocks are +*> not swapped and (A, B) and (Q, Z) are unchanged. +*> The problem of swapping is too ill-conditioned. +*> <0: If INFO = -16: LWORK is too small. Appropriate value +*> for LWORK is returned in WORK(1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEauxiliary +* +*> \par Further Details: +* ===================== +*> +*> In the current code both weak and strong stability tests are +*> performed. The user can omit the strong stability test by changing +*> the internal logical parameter WANDS to .FALSE.. See ref. [2] for +*> details. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to DCOPY by calls to DLASET, or by DO +* loops. Sven Hammarling, 1/5/02. +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWENTY + PARAMETER ( TWENTY = 2.0D+01 ) + INTEGER LDST + PARAMETER ( LDST = 4 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL DTRONG, WEAK + INTEGER I, IDUM, LINFO, M + DOUBLE PRECISION BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS, + $ F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS +* .. +* .. Local Arrays .. + INTEGER IWORK( LDST ) + DOUBLE PRECISION AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), + $ IRCOP( LDST, LDST ), LI( LDST, LDST ), + $ LICOP( LDST, LDST ), S( LDST, LDST ), + $ SCPY( LDST, LDST ), T( LDST, LDST ), + $ TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DGEQR2, DGERQ2, DLACPY, DLAGV2, DLARTG, + $ DLASET, DLASSQ, DORG2R, DORGR2, DORM2R, DORMR2, + $ DROT, DSCAL, DTGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 ) + $ RETURN + IF( N1.GT.N .OR. ( J1+N1 ).GT.N ) + $ RETURN + M = N1 + N2 + IF( LWORK.LT.MAX( 1, N*M, M*M*2 ) ) THEN + INFO = -16 + WORK( 1 ) = MAX( 1, N*M, M*M*2 ) + RETURN + END IF +* + WEAK = .FALSE. + DTRONG = .FALSE. +* +* Make a local copy of selected block +* + CALL DLASET( 'Full', LDST, LDST, ZERO, ZERO, LI, LDST ) + CALL DLASET( 'Full', LDST, LDST, ZERO, ZERO, IR, LDST ) + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute threshold for testing acceptance of swapping. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + DSCALE = ZERO + DSUM = ONE + CALL DLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + CALL DLACPY( 'Full', M, M, T, LDST, WORK, M ) + CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + DNORM = DSCALE*SQRT( DSUM ) +* +* THRES has been changed from +* THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* to +* THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* on 04/01/10. +* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by +* Jim Demmel and Guillaume Revy. See forum post 1783. +* + THRESH = MAX( TWENTY*EPS*DNORM, SMLNUM ) +* + IF( M.EQ.2 ) THEN +* +* CASE 1: Swap 1-by-1 and 1-by-1 blocks. +* +* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SB = ABS( T( 2, 2 ) ) + SA = ABS( S( 2, 2 ) ) + CALL DLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM ) + IR( 2, 1 ) = -IR( 1, 2 ) + IR( 2, 2 ) = IR( 1, 1 ) + CALL DROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( SA.GE.SB ) THEN + CALL DLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + ELSE + CALL DLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + END IF + CALL DROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + CALL DROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + LI( 2, 2 ) = LI( 1, 1 ) + LI( 1, 2 ) = -LI( 2, 1 ) +* +* Weak stability test: +* |S21| + |T21| <= O(EPS * F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 70 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL**T*S*QR, B-QL**T*T*QR)) <= O(EPS*F-norm((A,B))) +* + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + DTRONG = SS.LE.THRESH + IF( .NOT.DTRONG ) + $ GO TO 70 + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + CALL DROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL DROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, + $ LI( 1, 1 ), LI( 2, 1 ) ) + CALL DROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, + $ LI( 1, 1 ), LI( 2, 1 ) ) +* +* Set N1-by-N2 (2,1) - blocks to ZERO. +* + A( J1+1, J1 ) = ZERO + B( J1+1, J1 ) = ZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL DROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( WANTQ ) + $ CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ), + $ LI( 2, 1 ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + ELSE +* +* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 +* and 2-by-2 blocks. +* +* Solve the generalized Sylvester equation +* S11 * R - L * S22 = SCALE * S12 +* T11 * R - L * T22 = SCALE * T12 +* for R and L. Solutions in LI and IR. +* + CALL DLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST ) + CALL DLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST ) + CALL DTGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ), + $ LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM, + $ LINFO ) +* +* Compute orthogonal matrix QL: +* +* QL**T * LI = [ TL ] +* [ 0 ] +* where +* LI = [ -L ] +* [ SCALE * identity(N2) ] +* + DO 10 I = 1, N2 + CALL DSCAL( N1, -ONE, LI( 1, I ), 1 ) + LI( N1+I, I ) = SCALE + 10 CONTINUE + CALL DGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute orthogonal matrix RQ: +* +* IR * RQ**T = [ 0 TR], +* +* where IR = [ SCALE * identity(N1), R ] +* + DO 20 I = 1, N1 + IR( N2+I, I ) = SCALE + 20 CONTINUE + CALL DGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Perform the swapping tentatively: +* + CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S, + $ LDST ) + CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T, + $ LDST ) + CALL DLACPY( 'F', M, M, S, LDST, SCPY, LDST ) + CALL DLACPY( 'F', M, M, T, LDST, TCPY, LDST ) + CALL DLACPY( 'F', M, M, IR, LDST, IRCOP, LDST ) + CALL DLACPY( 'F', M, M, LI, LDST, LICOP, LDST ) +* +* Triangularize the B-part by an RQ factorization. +* Apply transformation (from left) to A-part, giving S. +* + CALL DGERQ2( M, M, T, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BRQA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 30 I = 1, N2 + CALL DLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM ) + 30 CONTINUE + BRQA21 = DSCALE*SQRT( DSUM ) +* +* Triangularize the B-part by a QR factorization. +* Apply transformation (from right) to A-part, giving S. +* + CALL DGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL DORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST, + $ WORK, INFO ) + CALL DORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST, + $ WORK, INFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BQRA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 40 I = 1, N2 + CALL DLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM ) + 40 CONTINUE + BQRA21 = DSCALE*SQRT( DSUM ) +* +* Decide which method to use. +* Weak stability test: +* F-norm(S21) <= O(EPS * F-norm((S, T))) +* + IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN + CALL DLACPY( 'F', M, M, SCPY, LDST, S, LDST ) + CALL DLACPY( 'F', M, M, TCPY, LDST, T, LDST ) + CALL DLACPY( 'F', M, M, IRCOP, LDST, IR, LDST ) + CALL DLACPY( 'F', M, M, LICOP, LDST, LI, LDST ) + ELSE IF( BRQA21.GE.THRESH ) THEN + GO TO 70 + END IF +* +* Set lower triangle of B-part to zero +* + CALL DLASET( 'Lower', M-1, M-1, ZERO, ZERO, T(2,1), LDST ) +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL*S*QR**T, B-QL*T*QR**T)) <= O(EPS*F-norm((A,B))) +* + CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + DTRONG = ( SS.LE.THRESH ) + IF( .NOT.DTRONG ) + $ GO TO 70 +* + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* transformations and set N1-by-N2 (2,1)-block to zero. +* + CALL DLASET( 'Full', N1, N2, ZERO, ZERO, S(N2+1,1), LDST ) +* +* copy back M-by-M diagonal block starting at index J1 of (A, B) +* + CALL DLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA ) + CALL DLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB ) + CALL DLASET( 'Full', LDST, LDST, ZERO, ZERO, T, LDST ) +* +* Standardize existing 2-by-2 blocks. +* + CALL DLASET( 'Full', M, M, ZERO, ZERO, WORK, M ) + WORK( 1 ) = ONE + T( 1, 1 ) = ONE + IDUM = LWORK - M*M - 2 + IF( N2.GT.1 ) THEN + CALL DLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE, + $ WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) ) + WORK( M+1 ) = -WORK( 2 ) + WORK( M+2 ) = WORK( 1 ) + T( N2, N2 ) = T( 1, 1 ) + T( 1, 2 ) = -T( 2, 1 ) + END IF + WORK( M*M ) = ONE + T( M, M ) = ONE +* + IF( N1.GT.1 ) THEN + CALL DLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB, + $ TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ), + $ WORK( N2*M+N2+2 ), T( N2+1, N2+1 ), + $ T( M, M-1 ) ) + WORK( M*M ) = WORK( N2*M+N2+1 ) + WORK( M*M-1 ) = -WORK( N2*M+N2+2 ) + T( M, M ) = T( N2+1, N2+1 ) + T( M-1, M ) = -T( M, M-1 ) + END IF + CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ), + $ LDA, ZERO, WORK( M*M+1 ), N2 ) + CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ), + $ LDA ) + CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ), + $ LDB, ZERO, WORK( M*M+1 ), N2 ) + CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ), + $ LDB ) + CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO, + $ WORK( M*M+1 ), M ) + CALL DLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST ) + CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL DLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA ) + CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDB, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL DLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB ) + CALL DGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL DLACPY( 'Full', M, M, WORK, M, IR, LDST ) +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTQ ) THEN + CALL DGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI, + $ LDST, ZERO, WORK, N ) + CALL DLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ ) +* + END IF +* + IF( WANTZ ) THEN + CALL DGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR, + $ LDST, ZERO, WORK, N ) + CALL DLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ ) +* + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + I = J1 + M + IF( I.LE.N ) THEN + CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ A( J1, I ), LDA, ZERO, WORK, M ) + CALL DLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA ) + CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ B( J1, I ), LDB, ZERO, WORK, M ) + CALL DLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDB ) + END IF + I = J1 - 1 + IF( I.GT.0 ) THEN + CALL DGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR, + $ LDST, ZERO, WORK, I ) + CALL DLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA ) + CALL DGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR, + $ LDST, ZERO, WORK, I ) + CALL DLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB ) + END IF +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + END IF +* +* Exit with INFO = 1 if swap was rejected. +* + 70 CONTINUE +* + INFO = 1 + RETURN +* +* End of DTGEX2 +* + END diff --git a/dspl/liblapack/SRC/dtgexc.f b/dspl/liblapack/SRC/dtgexc.f new file mode 100644 index 0000000..0a905b8 --- /dev/null +++ b/dspl/liblapack/SRC/dtgexc.f @@ -0,0 +1,544 @@ +*> \brief \b DTGEXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGEXC reorders the generalized real Schur decomposition of a real +*> matrix pair (A,B) using an orthogonal equivalence transformation +*> +*> (A, B) = Q * (A, B) * Z**T, +*> +*> so that the diagonal block of (A, B) with row index IFST is moved +*> to row ILST. +*> +*> (A, B) must be in generalized real Schur canonical form (as returned +*> by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +*> diagonal blocks. B is upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T +*> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the matrix A in generalized real Schur canonical +*> form. +*> On exit, the updated matrix A, again in generalized +*> real Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the matrix B in generalized real Schur canonical +*> form (A,B). +*> On exit, the updated matrix B, again in generalized +*> real Schur canonical form (A,B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +*> On exit, the updated matrix Q. +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., the orthogonal matrix Z. +*> On exit, the updated matrix Z. +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in,out] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> Specify the reordering of the diagonal blocks of (A, B). +*> The block with row index IFST is moved to row ILST, by a +*> sequence of swapping between adjacent blocks. +*> On exit, if IFST pointed on entry to the second row of +*> a 2-by-2 block, it is changed to point to the first row; +*> ILST always points to the first row of the block in its +*> final position (which may differ from its input value by +*> +1 or -1). 1 <= IFST, ILST <= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= 1 when N <= 1, otherwise LWORK >= 4*N + 16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit. +*> <0: if INFO = -i, the i-th argument had an illegal value. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. (A, B) may have been partially reordered, +*> and ILST points to the first row of the current +*> position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleGEcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER HERE, LWMIN, NBF, NBL, NBNEXT +* .. +* .. External Subroutines .. + EXTERNAL DTGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + ELSE + LWMIN = 4*N + 16 + END IF + WORK(1) = LWMIN +* + IF (LWORK.LT.LWMIN .AND. .NOT.LQUERY) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGEXC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of the specified block and find out +* if it is 1-by-1 or 2-by-2. +* + IF( IFST.GT.1 ) THEN + IF( A( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( A( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out if it is 1-by-1 or 2-by-2. +* + IF( ILST.GT.1 ) THEN + IF( A( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( A( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST. +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( A( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 +* + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2-by-2 block did split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + END IF +* + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 + ELSE + HERE = IFST +* + 20 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2-by-2 block did split. +* + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE + WORK( 1 ) = LWMIN + RETURN +* +* End of DTGEXC +* + END diff --git a/dspl/liblapack/SRC/dtgsen.f b/dspl/liblapack/SRC/dtgsen.f new file mode 100644 index 0000000..22323cf --- /dev/null +++ b/dspl/liblapack/SRC/dtgsen.f @@ -0,0 +1,865 @@ +*> \brief \b DTGSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, +* ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, +* PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, +* $ M, N +* DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGSEN reorders the generalized real Schur decomposition of a real +*> matrix pair (A, B) (in terms of an orthonormal equivalence trans- +*> formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues +*> appears in the leading diagonal blocks of the upper quasi-triangular +*> matrix A and the upper triangular B. The leading columns of Q and +*> Z form orthonormal bases of the corresponding left and right eigen- +*> spaces (deflating subspaces). (A, B) must be in generalized real +*> Schur canonical form (as returned by DGGES), i.e. A is block upper +*> triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper +*> triangular. +*> +*> DTGSEN also computes the generalized eigenvalues +*> +*> w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) +*> +*> of the reordered matrix pair (A, B). +*> +*> Optionally, DTGSEN computes the estimates of reciprocal condition +*> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +*> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +*> between the matrix pairs (A11, B11) and (A22,B22) that correspond to +*> the selected cluster and the eigenvalues outside the cluster, resp., +*> and norms of "projections" onto left and right eigenspaces w.r.t. +*> the selected cluster in the (1,1)-block. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (PL and PR) or the deflating subspaces +*> (Difu and Difl): +*> =0: Only reorder w.r.t. SELECT. No extras. +*> =1: Reciprocal of norms of "projections" onto left and right +*> eigenspaces w.r.t. the selected cluster (PL and PR). +*> =2: Upper bounds on Difu and Difl. F-norm-based estimate +*> (DIF(1:2)). +*> =3: Estimate of Difu and Difl. 1-norm-based estimate +*> (DIF(1:2)). +*> About 5 times as expensive as IJOB = 2. +*> =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +*> version to get it all. +*> =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +*> \endverbatim +*> +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. +*> To select a real eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. To select a complex conjugate pair of eigenvalues +*> w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; a complex conjugate pair of eigenvalues must be +*> either both included in the cluster or both excluded. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension(LDA,N) +*> On entry, the upper quasi-triangular matrix A, with (A, B) in +*> generalized real Schur canonical form. +*> On exit, A is overwritten by the reordered matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension(LDB,N) +*> On entry, the upper triangular matrix B, with (A, B) in +*> generalized real Schur canonical form. +*> On exit, B is overwritten by the reordered matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real generalized Schur form of (A,B) were further reduced +*> to triangular form using complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +*> On exit, Q has been postmultiplied by the left orthogonal +*> transformation matrix which reorder (A, B); The leading M +*> columns of Q form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> and if WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +*> On exit, Z has been postmultiplied by the left orthogonal +*> transformation matrix which reorder (A, B); The leading M +*> columns of Z form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified pair of left and right eigen- +*> spaces (deflating subspaces). 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] PL +*> \verbatim +*> PL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] PR +*> \verbatim +*> PR is DOUBLE PRECISION +*> +*> If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +*> reciprocal of the norm of "projections" onto left and right +*> eigenspaces with respect to the selected cluster. +*> 0 < PL, PR <= 1. +*> If M = 0 or M = N, PL = PR = 1. +*> If IJOB = 0, 2 or 3, PL and PR are not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION array, dimension (2). +*> If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +*> If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +*> Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +*> estimates of Difu and Difl. +*> If M = 0 or N, DIF(1:2) = F-norm([A, B]). +*> If IJOB = 0 or 1, DIF is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 4*N+16. +*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +*> If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= 1. +*> If IJOB = 1, 2 or 4, LIWORK >= N+6. +*> If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> =1: Reordering of (A, B) failed because the transformed +*> matrix pair (A, B) would be too far from generalized +*> Schur form; the problem is very ill-conditioned. +*> (A, B) may have been partially reordered. +*> If requested, 0 is returned in DIF(*), PL and PR. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> DTGSEN first collects the selected eigenvalues by computing +*> orthogonal U and W that move them to the top left corner of (A, B). +*> In other words, the selected eigenvalues are the eigenvalues of +*> (A11, B11) in: +*> +*> U**T*(A, B)*W = (A11 A12) (B11 B12) n1 +*> ( 0 A22),( 0 B22) n2 +*> n1 n2 n1 n2 +*> +*> where N = n1+n2 and U**T means the transpose of U. The first n1 columns +*> of U and W span the specified pair of left and right eigenspaces +*> (deflating subspaces) of (A, B). +*> +*> If (A, B) has been obtained from the generalized real Schur +*> decomposition of a matrix pair (C, D) = Q*(A, B)*Z**T, then the +*> reordered generalized real Schur form of (C, D) is given by +*> +*> (C, D) = (Q*U)*(U**T*(A, B)*W)*(Z*W)**T, +*> +*> and the first n1 columns of Q*U and Z*W span the corresponding +*> deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +*> +*> Note that if the selected eigenvalue is sufficiently ill-conditioned, +*> then its value may differ significantly from its value before +*> reordering. +*> +*> The reciprocal condition numbers of the left and right eigenspaces +*> spanned by the first n1 columns of U and W (or Q*U and Z*W) may +*> be returned in DIF(1:2), corresponding to Difu and Difl, resp. +*> +*> The Difu and Difl are defined as: +*> +*> Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +*> and +*> Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +*> +*> where sigma-min(Zu) is the smallest singular value of the +*> (2*n1*n2)-by-(2*n1*n2) matrix +*> +*> Zu = [ kron(In2, A11) -kron(A22**T, In1) ] +*> [ kron(In2, B11) -kron(B22**T, In1) ]. +*> +*> Here, Inx is the identity matrix of size nx and A22**T is the +*> transpose of A22. kron(X, Y) is the Kronecker product between +*> the matrices X and Y. +*> +*> When DIF(2) is small, small changes in (A, B) can cause large changes +*> in the deflating subspace. An approximate (asymptotic) bound on the +*> maximum angular error in the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / DIF(2), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal norm of the projectors on the left and right +*> eigenspaces associated with (A11, B11) may be returned in PL and PR. +*> They are computed as follows. First we compute L and R so that +*> P*(A, B)*Q is block diagonal, where +*> +*> P = ( I -L ) n1 Q = ( I R ) n1 +*> ( 0 I ) n2 and ( 0 I ) n2 +*> n1 n2 n1 n2 +*> +*> and (L, R) is the solution to the generalized Sylvester equation +*> +*> A11*R - L*A22 = -A12 +*> B11*R - L*B22 = -B12 +*> +*> Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / PL. +*> +*> There are also global error bounds which valid for perturbations up +*> to a certain restriction: A lower bound (x) on the smallest +*> F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +*> coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +*> (i.e. (A + E, B + F), is +*> +*> x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +*> +*> An approximate bound on x can be computed from DIF(1:2), PL and PR. +*> +*> If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +*> (L', R') and unperturbed (L, R) left and right deflating subspaces +*> associated with the selected cluster in the (1,1)-blocks can be +*> bounded as +*> +*> max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +*> max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +*> +*> See LAPACK User's Guide section 4.11 or the following references +*> for more information. +*> +*> Note that if the default method for computing the Frobenius-norm- +*> based estimate DIF is not wanted (see DLATDF), then the parameter +*> IDIFJB (see below) should be changed from 3 to 4 (routine DLATDF +*> (IJOB = 2 will be used)). See DTGSYL for more details. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, + $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2, + $ WANTP + INTEGER I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN, + $ MN2, N1, N2 + DOUBLE PRECISION DSCALE, DSUM, EPS, RDSCAL, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLACPY, DLAG2, DLASSQ, DTGEXC, DTGSYL, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSEN', -INFO ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + PAIR = .FALSE. + IF( .NOT.LQUERY .OR. IJOB.NE.0 ) THEN + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + END IF +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 4*N+16, 2*M*( N-M ) ) + LIWMIN = MAX( 1, N+6 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*N+16, 4*M*( N-M ) ) + LIWMIN = MAX( 1, 2*M*( N-M ), N+6 ) + ELSE + LWMIN = MAX( 1, 4*N+16 ) + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL DLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL DLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 60 + END IF +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + PAIR = .FALSE. + DO 30 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF +* + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* Perform the reordering of diagonal blocks in (A, B) +* by orthogonal transformation matrices and update +* Q and Z accordingly (if requested): +* + KK = K + IF( K.NE.KS ) + $ CALL DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 60 + END IF +* + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L +* and compute PL and PR. +* + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + CALL DLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL DLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto left +* and right eigenspaces. +* + RDSCAL = ZERO + DSUM = ONE + CALL DLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL DLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF +* + IF( WANTD ) THEN +* +* Compute estimates of Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu-estimate. +* + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl-estimate. +* + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with DLACN2. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL DLACN2( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL DTGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL DLACN2( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL DTGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) +* + END IF + END IF +* + 60 CONTINUE +* +* Compute generalized eigenvalues of reordered pair (A, B) and +* normalize the generalized Schur form. +* + PAIR = .FALSE. + DO 80 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + END IF + END IF +* + IF( PAIR ) THEN +* +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), + $ ALPHAI( K ) ) + ALPHAI( K+1 ) = -ALPHAI( K ) +* + ELSE +* + IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN +* +* If B(K,K) is negative, make it positive +* + DO 70 I = 1, N + A( K, I ) = -A( K, I ) + B( K, I ) = -B( K, I ) + IF( WANTQ ) Q( I, K ) = -Q( I, K ) + 70 CONTINUE + END IF +* + ALPHAR( K ) = A( K, K ) + ALPHAI( K ) = ZERO + BETA( K ) = B( K, K ) +* + END IF + END IF + 80 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DTGSEN +* + END diff --git a/dspl/liblapack/SRC/dtgsja.f b/dspl/liblapack/SRC/dtgsja.f new file mode 100644 index 0000000..66f32b7 --- /dev/null +++ b/dspl/liblapack/SRC/dtgsja.f @@ -0,0 +1,655 @@ +*> \brief \b DTGSJA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGSJA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, +* LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, +* Q, LDQ, WORK, NCYCLE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, +* $ NCYCLE, P +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGSJA computes the generalized singular value decomposition (GSVD) +*> of two real upper triangular (or trapezoidal) matrices A and B. +*> +*> On entry, it is assumed that matrices A and B have the following +*> forms, which may be obtained by the preprocessing subroutine DGGSVP +*> from a general M-by-N matrix A and P-by-N matrix B: +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> B = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. +*> +*> On exit, +*> +*> U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), +*> +*> where U, V and Q are orthogonal matrices. +*> R is a nonsingular upper triangular matrix, and D1 and D2 are +*> ``diagonal'' matrices, which are of the following structures: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) K +*> L ( 0 0 R22 ) L +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The computation of the orthogonal transformation matrices U, V or Q +*> is optional. These matrices may either be formed explicitly, or they +*> may be postmultiplied into input matrices U1, V1, or Q1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': U must contain an orthogonal matrix U1 on entry, and +*> the product U1*U is returned; +*> = 'I': U is initialized to the unit matrix, and the +*> orthogonal matrix U is returned; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': V must contain an orthogonal matrix V1 on entry, and +*> the product V1*V is returned; +*> = 'I': V is initialized to the unit matrix, and the +*> orthogonal matrix V is returned; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Q must contain an orthogonal matrix Q1 on entry, and +*> the product Q1*Q is returned; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> +*> K and L specify the subblocks in the input matrices A and B: +*> A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) +*> of A and B, whose GSVD is going to be computed by DTGSJA. +*> See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +*> matrix R or part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +*> a part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the convergence criteria for the Jacobi- +*> Kogbetliantz iteration procedure. Generally, they are the +*> same as used in the preprocessing step, say +*> TOLA = max(M,N)*norm(A)*MAZHEPS, +*> TOLB = max(P,N)*norm(B)*MAZHEPS. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = diag(C), +*> BETA(K+1:K+L) = diag(S), +*> or if M-K-L < 0, +*> ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +*> BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +*> Furthermore, if K+L < N, +*> ALPHA(K+L+1:N) = 0 and +*> BETA(K+L+1:N) = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension (LDU,M) +*> On entry, if JOBU = 'U', U must contain a matrix U1 (usually +*> the orthogonal matrix returned by DGGSVP). +*> On exit, +*> if JOBU = 'I', U contains the orthogonal matrix U; +*> if JOBU = 'U', U contains the product U1*U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDV,P) +*> On entry, if JOBV = 'V', V must contain a matrix V1 (usually +*> the orthogonal matrix returned by DGGSVP). +*> On exit, +*> if JOBV = 'I', V contains the orthogonal matrix V; +*> if JOBV = 'V', V contains the product V1*V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +*> the orthogonal matrix returned by DGGSVP). +*> On exit, +*> if JOBQ = 'I', Q contains the orthogonal matrix Q; +*> if JOBQ = 'Q', Q contains the product Q1*Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] NCYCLE +*> \verbatim +*> NCYCLE is INTEGER +*> The number of cycles required for convergence. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the procedure does not converge after MAXIT cycles. +*> \endverbatim +*> +*> \verbatim +*> Internal Parameters +*> =================== +*> +*> MAXIT INTEGER +*> MAXIT specifies the total loops that the iterative procedure +*> may take. If after MAXIT cycles, the routine fails to +*> converge, we return INFO = 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +*> min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +*> matrix B13 to the form: +*> +*> U1**T *A13*Q1 = C1*R1; V1**T *B13*Q1 = S1*R1, +*> +*> where U1, V1 and Q1 are orthogonal matrix, and Z**T is the transpose +*> of Z. C1 and S1 are diagonal matrices satisfying +*> +*> C1**2 + S1**2 = I, +*> +*> and R1 is an L-by-L nonsingular upper triangular matrix. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + DOUBLE PRECISION A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR, + $ GAMMA, RWK, SNQ, SNU, SNV, SSMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAGS2, DLAPLL, DLARTG, DLASET, DROT, + $ DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL DLASET( 'Full', M, M, ZERO, ONE, U, LDU ) + IF( INITV ) + $ CALL DLASET( 'Full', P, P, ZERO, ONE, V, LDV ) + IF( INITQ ) + $ CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = ZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = A( K+I, N-L+I ) + IF( K+J.LE.M ) + $ A3 = A( K+J, N-L+J ) +* + B1 = B( I, N-L+I ) + B3 = B( J, N-L+J ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U**T *A +* + IF( K+J.LE.M ) + $ CALL DROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, SNU ) +* +* Update I-th and J-th rows of matrix B: V**T *B +* + CALL DROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, SNV ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL DROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL DROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = ZERO + B( I, N-L+J ) = ZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = ZERO + B( J, N-L+I ) = ZERO + END IF +* +* Update orthogonal matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL DROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL DROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL DROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL DCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL DLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = A( K+I, N-L+I ) + B1 = B( I, N-L+I ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* +* change sign if necessary +* + IF( GAMMA.LT.ZERO ) THEN + CALL DSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL DSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL DSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL DSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE +* + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) +* + END IF +* + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE + RETURN +* +* End of DTGSJA +* + END diff --git a/dspl/liblapack/SRC/dtgsna.f b/dspl/liblapack/SRC/dtgsna.f new file mode 100644 index 0000000..68a68ca --- /dev/null +++ b/dspl/liblapack/SRC/dtgsna.f @@ -0,0 +1,700 @@ +*> \brief \b DTGSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, +* LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), +* $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or eigenvectors of a matrix pair (A, B) in +*> generalized real Schur canonical form (or of any matrix pair +*> (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where +*> Z**T denotes the transpose of Z. +*> +*> (A, B) must be in generalized real Schur form (as returned by DGGES), +*> i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal +*> blocks. B is upper triangular. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (DIF): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (DIF); +*> = 'B': for both eigenvalues and eigenvectors (S and DIF). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the eigenpair corresponding to a real eigenvalue w(j), +*> SELECT(j) must be set to .TRUE.. To select condition numbers +*> corresponding to a complex conjugate pair of eigenvalues w(j) +*> and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +*> set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the square matrix pair (A, B). N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The upper quasi-triangular matrix A in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The upper triangular matrix B in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VL, as returned by DTGEVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1. +*> If JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns ov VR, as returned by DTGEVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1. +*> If JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. For a complex conjugate pair of eigenvalues two +*> consecutive elements of S are set to the same value. Thus +*> S(j), DIF(j), and the j-th columns of VL and VR all +*> correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. For a complex eigenvector two +*> consecutive elements of DIF are set to the same value. If +*> the eigenvalues cannot be reordered to compute DIF(j), DIF(j) +*> is set to 0; this can only occur when the true value would be +*> very small anyway. +*> If JOB = 'E', DIF is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S and DIF. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and DIF used to store +*> the specified condition numbers; for each selected real +*> eigenvalue one element is used, and for each selected complex +*> conjugate pair of eigenvalues, two elements are used. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N + 6) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of a generalized eigenvalue +*> w = (a, b) is defined as +*> +*> S(w) = (|u**TAv|**2 + |u**TBv|**2)**(1/2) / (norm(u)*norm(v)) +*> +*> where u and v are the left and right eigenvectors of (A, B) +*> corresponding to w; |z| denotes the absolute value of the complex +*> number, and norm(u) denotes the 2-norm of the vector u. +*> The pair (a, b) corresponds to an eigenvalue w = a/b (= u**TAv/u**TBv) +*> of the matrix pair (A, B). If both a and b equal zero, then (A B) is +*> singular and S(I) = -1 is returned. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(A, B) / S(I) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number DIF(i) of right eigenvector u +*> and left eigenvector v corresponding to the generalized eigenvalue w +*> is defined as follows: +*> +*> a) If the i-th eigenvalue w = (a,b) is real +*> +*> Suppose U and V are orthogonal transformations such that +*> +*> U**T*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 +*> ( 0 S22 ),( 0 T22 ) n-1 +*> 1 n-1 1 n-1 +*> +*> Then the reciprocal condition number DIF(i) is +*> +*> Difl((a, b), (S22, T22)) = sigma-min( Zl ), +*> +*> where sigma-min(Zl) denotes the smallest singular value of the +*> 2(n-1)-by-2(n-1) matrix +*> +*> Zl = [ kron(a, In-1) -kron(1, S22) ] +*> [ kron(b, In-1) -kron(1, T22) ] . +*> +*> Here In-1 is the identity matrix of size n-1. kron(X, Y) is the +*> Kronecker product between the matrices X and Y. +*> +*> Note that if the default method for computing DIF(i) is wanted +*> (see DLATDF), then the parameter DIFDRI (see below) should be +*> changed from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). +*> See DTGSYL for more details. +*> +*> b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, +*> +*> Suppose U and V are orthogonal transformations such that +*> +*> U**T*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 +*> ( 0 S22 ),( 0 T22) n-2 +*> 2 n-2 2 n-2 +*> +*> and (S11, T11) corresponds to the complex conjugate eigenvalue +*> pair (w, conjg(w)). There exist unitary matrices U1 and V1 such +*> that +*> +*> U1**T*S11*V1 = ( s11 s12 ) and U1**T*T11*V1 = ( t11 t12 ) +*> ( 0 s22 ) ( 0 t22 ) +*> +*> where the generalized eigenvalues w = s11/t11 and +*> conjg(w) = s22/t22. +*> +*> Then the reciprocal condition number DIF(i) is bounded by +*> +*> min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) +*> +*> where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where +*> Z1 is the complex 2-by-2 matrix +*> +*> Z1 = [ s11 -s22 ] +*> [ t11 -t22 ], +*> +*> This is done by computing (using real arithmetic) the +*> roots of the characteristical polynomial det(Z1**T * Z1 - lambda I), +*> where Z1**T denotes the transpose of Z1 and det(X) denotes +*> the determinant of X. +*> +*> and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an +*> upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) +*> +*> Z2 = [ kron(S11**T, In-2) -kron(I2, S22) ] +*> [ kron(T11**T, In-2) -kron(I2, T22) ] +*> +*> Note that if the default method for computing DIF is wanted (see +*> DLATDF), then the parameter DIFDRI (see below) should be changed +*> from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). See DTGSYL +*> for more details. +*> +*> For each eigenvalue/vector specified by SELECT, DIF stores a +*> Frobenius norm-based estimate of Difl. +*> +*> An approximate error bound for the i-th computed eigenvector VL(i) or +*> VR(i) is given by +*> +*> EPS * norm(A, B) / DIF(i). +*> +*> See ref. [2-3] for more details and further references. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER DIFDRI + PARAMETER ( DIFDRI = 3 ) + DOUBLE PRECISION ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0, + $ FOUR = 4.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2 + DOUBLE PRECISION ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND, + $ EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM, + $ TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV, + $ UHBVI +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLAPY2, DNRM2 + EXTERNAL LSAME, DDOT, DLAMCH, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, DLACPY, DLAG2, DTGEXC, DTGSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( N.EQ.0 ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = 2*N*( N + 2 ) + 16 + ELSE + LWMIN = N + END IF + WORK( 1 ) = LWMIN +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + KS = 0 + PAIR = .FALSE. +* + DO 20 K = 1, N +* +* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + ELSE + IF( K.LT.N ) + $ PAIR = A( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 20 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( PAIR ) THEN +* +* Complex eigenvalue pair. +* + RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ), + $ DNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ), + $ DNRM2( N, VL( 1, KS+1 ), 1 ) ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHAV = TMPRR + TMPII + UHAVI = TMPIR - TMPRI + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHBV = TMPRR + TMPII + UHBVI = TMPIR - TMPRI + UHAV = DLAPY2( UHAV, UHAVI ) + UHBV = DLAPY2( UHBV, UHBVI ) + COND = DLAPY2( UHAV, UHBV ) + S( KS ) = COND / ( RNRM*LNRM ) + S( KS+1 ) = S( KS ) +* + ELSE +* +* Real eigenvalue. +* + RNRM = DNRM2( N, VR( 1, KS ), 1 ) + LNRM = DNRM2( N, VL( 1, KS ), 1 ) + CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHAV = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHBV = DDOT( N, WORK, 1, VL( 1, KS ), 1 ) + COND = DLAPY2( UHAV, UHBV ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = DLAPY2( A( 1, 1 ), B( 1, 1 ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. + IF( PAIR ) THEN +* +* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA, + $ DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI ) + ALPRQT = ONE + C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA ) + C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI + ROOT1 = C1 + SQRT( C1*C1-4.0D0*C2 ) + ROOT2 = C2 / ROOT1 + ROOT1 = ROOT1 / TWO + COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) ) + END IF +* +* Copy the matrix (A, B) to the array WORK and swap the +* diagonal block beginning at A(k,k) to the (1,1) position. +* + CALL DLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL DLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL DTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, + $ WORK( N*N*2+1 ), LWORK-2*N*N, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl((A11,B11), (A22, B22)). +* + N1 = 1 + IF( WORK( 2 ).NE.ZERO ) + $ N1 = 2 + N2 = N - N1 + IF( N2.EQ.0 ) THEN + DIF( KS ) = COND + ELSE + I = N*N + 1 + IZ = 2*N*N + 1 + CALL DTGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), + $ WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR ) +* + IF( PAIR ) + $ DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ), + $ COND ) + END IF + END IF + IF( PAIR ) + $ DIF( KS+1 ) = DIF( KS ) + END IF + IF( PAIR ) + $ KS = KS + 1 +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of DTGSNA +* + END diff --git a/dspl/liblapack/SRC/dtgsy2.f b/dspl/liblapack/SRC/dtgsy2.f new file mode 100644 index 0000000..1c687b1 --- /dev/null +++ b/dspl/liblapack/SRC/dtgsy2.f @@ -0,0 +1,1075 @@ +*> \brief \b DTGSY2 solves the generalized Sylvester equation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGSY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, +* IWORK, PQ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, +* $ PQ +* DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGSY2 solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F, +*> +*> using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, +*> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +*> N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) +*> must be in generalized Schur canonical form, i.e. A, B are upper +*> quasi triangular and D, E are upper triangular. The solution (R, L) +*> overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor +*> chosen to avoid overflow. +*> +*> In matrix notation solving equation (1) corresponds to solve +*> Z*x = scale*b, where Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**T, Im) ] (2) +*> [ kron(In, D) -kron(E**T, Im) ], +*> +*> Ik is the identity matrix of size k and X**T is the transpose of X. +*> kron(X, Y) is the Kronecker product between the matrices X and Y. +*> In the process of solving (1), we solve a number of such systems +*> where Dim(In), Dim(In) = 1 or 2. +*> +*> If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y, +*> which is equivalent to solve for R and L in +*> +*> A**T * R + D**T * L = scale * C (3) +*> R * B**T + L * E**T = scale * -F +*> +*> This case is used to compute an estimate of Dif[(A, D), (B, E)] = +*> sigma_min(Z) using reverse communicaton with DLACON. +*> +*> DTGSY2 also (IJOB >= 1) contributes to the computation in DTGSYL +*> of an upper bound on the separation between to matrix pairs. Then +*> the input (A, D), (B, E) are sub-pencils of the matrix pair in +*> DTGSYL. See DTGSYL for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T': solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> = 0: solve (1) only. +*> = 1: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (look ahead strategy is used). +*> = 2: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (DGECON on sub-systems is used.) +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the order of A and D, and the row +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of B and E, and the column +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, M) +*> On entry, A contains an upper quasi triangular matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, B contains an upper quasi triangular matrix. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1). +*> On exit, if IJOB = 0, C has been overwritten by the +*> solution R. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the matrix C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (LDD, M) +*> On entry, D contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the matrix D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (LDE, N) +*> On entry, E contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the matrix E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is DOUBLE PRECISION array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1). +*> On exit, if IJOB = 0, F has been overwritten by the +*> solution L. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the matrix F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +*> R and L (C and F on entry) will hold the solutions to a +*> slightly perturbed system but the input matrices A, B, D and +*> E have not been changed. If SCALE = 0, R and L will hold the +*> solutions to the homogeneous system with C = F = 0. Normally, +*> SCALE = 1. +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is DOUBLE PRECISION +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by DTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when DTGSY2 is called by DTGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is DOUBLE PRECISION +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when DTGSY2 is called by +*> DTGSYL. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+2) +*> \endverbatim +*> +*> \param[out] PQ +*> \verbatim +*> PQ is INTEGER +*> On exit, the number of subsystems (of size 2-by-2, 4-by-4 and +*> 8-by-8) solved by this routine. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, if INFO is set to +*> =0: Successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: The matrix pairs (A, D) and (B, E) have common or very +*> close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE DTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ IWORK, PQ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, + $ PQ + DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to DCOPY by calls to DLASET. +* Sven Hammarling, 27/5/02. +* +* .. Parameters .. + INTEGER LDZ + PARAMETER ( LDZ = 8 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1, + $ K, MB, NB, P, Q, ZDIM + DOUBLE PRECISION ALPHA, SCALOC +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + DOUBLE PRECISION RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMM, DGEMV, DGER, DGESC2, + $ DGETC2, DLASET, DLATDF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSY2', -INFO ) + RETURN + END IF +* +* Determine block structure of A +* + PQ = 0 + P = 0 + I = 1 + 10 CONTINUE + IF( I.GT.M ) + $ GO TO 20 + P = P + 1 + IWORK( P ) = I + IF( I.EQ.M ) + $ GO TO 20 + IF( A( I+1, I ).NE.ZERO ) THEN + I = I + 2 + ELSE + I = I + 1 + END IF + GO TO 10 + 20 CONTINUE + IWORK( P+1 ) = M + 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 30 CONTINUE + IF( J.GT.N ) + $ GO TO 40 + Q = Q + 1 + IWORK( Q ) = J + IF( J.EQ.N ) + $ GO TO 40 + IF( B( J+1, J ).NE.ZERO ) THEN + J = J + 2 + ELSE + J = J + 1 + END IF + GO TO 30 + 40 CONTINUE + IWORK( Q+1 ) = N + 1 + PQ = P*( Q-P-1 ) +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + SCALE = ONE + SCALOC = ONE + DO 120 J = P + 2, Q + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 110 I = P, 1, -1 +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + ZDIM = MB*NB*2 +* + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = D( IS, IS ) + Z( 1, 2 ) = -B( JS, JS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 50 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL DAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ), + $ 1 ) + CALL DAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ), + $ 1 ) + END IF + IF( J.LT.Q ) THEN + CALL DAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = D( IS, IS ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = -B( JS, JSP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = -E( JS, JSP1 ) +* + Z( 1, 4 ) = -B( JSP1, JS ) + Z( 2, 4 ) = -B( JSP1, JSP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 60 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 60 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ), + $ 1, C( 1, JS ), LDC ) + CALL DGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ), + $ 1, F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL DAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + CALL DAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL DAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = D( IS, ISP1 ) + Z( 4, 2 ) = D( ISP1, ISP1 ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = -B( JS, JS ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 70 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA, + $ RHS( 1 ), 1, ONE, C( 1, JS ), 1 ) + CALL DGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD, + $ RHS( 1 ), 1, ONE, F( 1, JS ), 1 ) + END IF + IF( J.LT.Q ) THEN + CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC ) + CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ E( JS, JE+1 ), LDE, F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z * x = RHS +* + CALL DLASET( 'F', LDZ, LDZ, ZERO, ZERO, Z, LDZ ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 5, 1 ) = D( IS, IS ) +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 5, 2 ) = D( IS, ISP1 ) + Z( 6, 2 ) = D( ISP1, ISP1 ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( ISP1, IS ) + Z( 7, 3 ) = D( IS, IS ) +* + Z( 3, 4 ) = A( IS, ISP1 ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 7, 4 ) = D( IS, ISP1 ) + Z( 8, 4 ) = D( ISP1, ISP1 ) +* + Z( 1, 5 ) = -B( JS, JS ) + Z( 3, 5 ) = -B( JS, JSP1 ) + Z( 5, 5 ) = -E( JS, JS ) + Z( 7, 5 ) = -E( JS, JSP1 ) +* + Z( 2, 6 ) = -B( JS, JS ) + Z( 4, 6 ) = -B( JS, JSP1 ) + Z( 6, 6 ) = -E( JS, JS ) + Z( 8, 6 ) = -E( JS, JSP1 ) +* + Z( 1, 7 ) = -B( JSP1, JS ) + Z( 3, 7 ) = -B( JSP1, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 2, 8 ) = -B( JSP1, JS ) + Z( 4, 8 ) = -B( JSP1, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 80 JJ = 0, NB - 1 + CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 80 CONTINUE +* +* Solve Z * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 90 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 100 JJ = 0, NB - 1 + CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 100 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, RHS( 1 ), MB, ONE, + $ C( 1, JS ), LDC ) + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, RHS( 1 ), MB, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + K = MB*NB + 1 + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, B( JS, JE+1 ), LDB, ONE, + $ C( IS, JE+1 ), LDC ) + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, E( JS, JE+1 ), LDE, ONE, + $ F( IS, JE+1 ), LDF ) + END IF +* + END IF +* + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Solve (I, J) - subsystem +* A(I, I)**T * R(I, J) + D(I, I)**T * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 200 I = 1, P +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK ( I+1 ) - 1 + MB = IE - IS + 1 + DO 190 J = Q, P + 2, -1 +* + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + ZDIM = MB*NB*2 + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = -B( JS, JS ) + Z( 1, 2 ) = D( IS, IS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z**T * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 130 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + ALPHA = RHS( 1 ) + CALL DAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + ALPHA = RHS( 2 ) + CALL DAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + ALPHA = -RHS( 1 ) + CALL DAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA, + $ C( IE+1, JS ), 1 ) + ALPHA = -RHS( 2 ) + CALL DAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD, + $ C( IE+1, JS ), 1 ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = -B( JS, JSP1 ) + Z( 4, 2 ) = -B( JSP1, JSP1 ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( IS, IS ) + Z( 3, 4 ) = -E( JS, JSP1 ) + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z**T * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 140 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL DAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA, + $ RHS( 1 ), 1, C( IE+1, JS ), LDC ) + CALL DGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD, + $ RHS( 3 ), 1, C( IE+1, JS ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = -B( JS, JS ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = D( IS, ISP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( ISP1, ISP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z**T * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 150 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + CALL DGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ), + $ LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + CALL DGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ), + $ LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z**T * x = RHS +* + CALL DLASET( 'F', LDZ, LDZ, ZERO, ZERO, Z, LDZ ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 5, 1 ) = -B( JS, JS ) + Z( 7, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 6, 2 ) = -B( JS, JS ) + Z( 8, 2 ) = -B( JSP1, JS ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( IS, ISP1 ) + Z( 5, 3 ) = -B( JS, JSP1 ) + Z( 7, 3 ) = -B( JSP1, JSP1 ) +* + Z( 3, 4 ) = A( ISP1, IS ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 6, 4 ) = -B( JS, JSP1 ) + Z( 8, 4 ) = -B( JSP1, JSP1 ) +* + Z( 1, 5 ) = D( IS, IS ) + Z( 2, 5 ) = D( IS, ISP1 ) + Z( 5, 5 ) = -E( JS, JS ) +* + Z( 2, 6 ) = D( ISP1, ISP1 ) + Z( 6, 6 ) = -E( JS, JS ) +* + Z( 3, 7 ) = D( IS, IS ) + Z( 4, 7 ) = D( IS, ISP1 ) + Z( 5, 7 ) = -E( JS, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 4, 8 ) = D( ISP1, ISP1 ) + Z( 6, 8 ) = -E( JS, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 160 JJ = 0, NB - 1 + CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 160 CONTINUE +* +* +* Solve Z**T * x = RHS +* + CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 170 K = 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 180 JJ = 0, NB - 1 + CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 180 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ C( IS, JS ), LDC, B( 1, JS ), LDB, ONE, + $ F( IS, 1 ), LDF ) + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ F( IS, JS ), LDF, E( 1, JS ), LDE, ONE, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, + $ ONE, C( IE+1, JS ), LDC ) + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, + $ ONE, C( IE+1, JS ), LDC ) + END IF +* + END IF +* + 190 CONTINUE + 200 CONTINUE +* + END IF + RETURN +* +* End of DTGSY2 +* + END diff --git a/dspl/liblapack/SRC/dtgsyl.f b/dspl/liblapack/SRC/dtgsyl.f new file mode 100644 index 0000000..1cc3a1b --- /dev/null +++ b/dspl/liblapack/SRC/dtgsyl.f @@ -0,0 +1,682 @@ +*> \brief \b DTGSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTGSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, +* $ LWORK, M, N +* DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTGSYL solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> where R and L are unknown m-by-n matrices, (A, D), (B, E) and +*> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +*> respectively, with real entries. (A, D) and (B, E) must be in +*> generalized (real) Schur canonical form, i.e. A, B are upper quasi +*> triangular and D, E are upper triangular. +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +*> scaling factor chosen to avoid overflow. +*> +*> In matrix notation (1) is equivalent to solve Zx = scale b, where +*> Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**T, Im) ] (2) +*> [ kron(In, D) -kron(E**T, Im) ]. +*> +*> Here Ik is the identity matrix of size k and X**T is the transpose of +*> X. kron(X, Y) is the Kronecker product between the matrices X and Y. +*> +*> If TRANS = 'T', DTGSYL solves the transposed system Z**T*y = scale*b, +*> which is equivalent to solve for R and L in +*> +*> A**T * R + D**T * L = scale * C (3) +*> R * B**T + L * E**T = scale * -F +*> +*> This case (TRANS = 'T') is used to compute an one-norm-based estimate +*> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +*> and (B,E), using DLACON. +*> +*> If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate +*> of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +*> reciprocal of the smallest singular value of Z. See [1-2] for more +*> information. +*> +*> This is a level 3 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T', solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: The functionality of 0 and 3. +*> =2: The functionality of 0 and 4. +*> =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (look ahead strategy IJOB = 1 is used). +*> =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> ( DGECON on sub-systems is used ). +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrices A and D, and the row dimension of +*> the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices B and E, and the column dimension +*> of the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, M) +*> The upper quasi triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> The upper quasi triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, C has been overwritten by +*> the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (LDD, M) +*> The upper triangular matrix D. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the array D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (LDE, N) +*> The upper triangular matrix E. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the array E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is DOUBLE PRECISION array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, F has been overwritten by +*> the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION +*> On exit DIF is the reciprocal of a lower bound of the +*> reciprocal of the Dif-function, i.e. DIF is an upper bound of +*> Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). +*> IF IJOB = 0 or TRANS = 'T', DIF is not touched. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit SCALE is the scaling factor in (1) or (3). +*> If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +*> to a slightly perturbed system but the input matrices A, B, D +*> and E have not been changed. If SCALE = 0, C and F hold the +*> solutions R and L, respectively, to the homogeneous system +*> with C = F = 0. Normally, SCALE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK > = 1. +*> If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+6) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: (A, D) and (B, E) have common or close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> +*> [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +*> Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +*> Appl., 15(4):1045-1060, 1994 +*> +*> [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +*> Condition Estimators for Solving the Generalized Sylvester +*> Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +*> July 1989, pp 745-751. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to DCOPY by calls to DLASET. +* Sven Hammarling, 1/5/02. +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q + DOUBLE PRECISION DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLACPY, DLASET, DSCAL, DTGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NOTRAN ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF + ELSE + LWMIN = 1 + END IF + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + SCALE = 1 + IF( NOTRAN ) THEN + IF( IJOB.NE.0 ) THEN + DIF = 0 + END IF + END IF + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'DTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'DTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( NOTRAN ) THEN + IF( IJOB.GE.3 ) THEN + IFUNC = IJOB - 2 + CALL DLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL DLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( IJOB.GE.1 ) THEN + ISOLVE = 2 + END IF + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* + DO 30 IROUND = 1, ISOLVE +* +* Use unblocked Level 2 solver +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + CALL DTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ IWORK, PQ, INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF +* + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL DLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL DLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL DLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL DLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + IF( A( I, I-1 ).NE.ZERO ) + $ I = I + 1 + GO TO 40 + 50 CONTINUE +* + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + IF( B( J, J-1 ).NE.ZERO ) + $ J = J + 1 + GO TO 60 + 70 CONTINUE +* + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN +* + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J)-subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1,..., 1; J = 1, 2,..., Q +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + SCALE = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + PPQQ = 0 + CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO +* + PQ = PQ + PPQQ + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, C( IS, JS ), LDC, ONE, + $ C( 1, JS ), LDC ) + CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, C( IS, JS ), LDC, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, B( JS, JE+1 ), LDB, + $ ONE, C( IS, JE+1 ), LDC ) + CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, E( JS, JE+1 ), LDE, + $ ONE, F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL DLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL DLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL DLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL DLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE +* + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)**T * R(I, J) + D(I, I)**T * L(I, J) = C(I, J) +* R(I, J) * B(J, J)**T + L(I, J) * E(J, J)**T = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL DSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL DSCAL( M, SCALOC, F( 1, K ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ), + $ LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ), + $ LDF ) + CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ), + $ LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE, + $ C( IE+1, JS ), LDC ) + CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE, + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE +* + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of DTGSYL +* + END diff --git a/dspl/liblapack/SRC/dtpcon.f b/dspl/liblapack/SRC/dtpcon.f new file mode 100644 index 0000000..9932a76 --- /dev/null +++ b/dspl/liblapack/SRC/dtpcon.f @@ -0,0 +1,267 @@ +*> \brief \b DTPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPCON estimates the reciprocal of the condition number of a packed +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTP + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTP +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATPS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL DLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTPCON +* + END diff --git a/dspl/liblapack/SRC/dtplqt.f b/dspl/liblapack/SRC/dtplqt.f new file mode 100644 index 0000000..4712950 --- /dev/null +++ b/dspl/liblapack/SRC/dtplqt.f @@ -0,0 +1,270 @@ +*> \brief \b DTPLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPLQT computes a blocked LQ factorization of a real +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B, and the order of the +*> triangular matrix A. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. M >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The lower triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MB*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ] [ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L +*> upper trapezoidal matrix B2: +*> [ B ] = [ B1 ] [ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> [ C ] = [ A ] [ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> [ W ] = [ I ] [ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> [ V ] = [ V1 ] [ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(M/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = M - (M-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, NB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL DTPLQT2, DTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( MB.LT.1 .OR. (MB.GT.M .AND. M.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPLQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, M, MB +* +* Compute the QR factorization of the current block +* + IB = MIN( M-I+1, MB ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF +* + CALL DTPLQT2( IB, NB, LB, A(I,I), LDA, B( I, 1 ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**T to B(I+IB:M,:) from the right +* + IF( I+IB.LE.M ) THEN + CALL DTPRFB( 'R', 'N', 'F', 'R', M-I-IB+1, NB, IB, LB, + $ B( I, 1 ), LDB, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, B( I+IB, 1 ), LDB, + $ WORK, M-I-IB+1) + END IF + END DO + RETURN +* +* End of DTPLQT +* + END diff --git a/dspl/liblapack/SRC/dtplqt2.f b/dspl/liblapack/SRC/dtplqt2.f new file mode 100644 index 0000000..e2fefb6 --- /dev/null +++ b/dspl/liblapack/SRC/dtplqt2.f @@ -0,0 +1,312 @@ +*> \brief \b DTPLQT2 computes a LQ factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPLQT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPLQT2 computes a LQ a factorization of a real "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,M) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ][ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ][ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> +*> C = [ A ][ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ][ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> W = [ V1 ][ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W**T * T * W +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DGEMV, DGER, DTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPLQT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, M +* +* Generate elementary reflector H(I) to annihilate B(I,:) +* + P = N-L+MIN( L, I ) + CALL DLARFG( P+1, A( I, I ), B( I, 1 ), LDB, T( 1, I ) ) + IF( I.LT.M ) THEN +* +* W(M-I:1) := C(I+1:M,I:N) * C(I,I:N) [use W = T(M,:)] +* + DO J = 1, M-I + T( M, J ) = (A( I+J, I )) + END DO + CALL DGEMV( 'N', M-I, P, ONE, B( I+1, 1 ), LDB, + $ B( I, 1 ), LDB, ONE, T( M, 1 ), LDT ) +* +* C(I+1:M,I:N) = C(I+1:M,I:N) + alpha * C(I,I:N)*W(M-1:1)^H +* + ALPHA = -(T( 1, I )) + DO J = 1, M-I + A( I+J, I ) = A( I+J, I ) + ALPHA*(T( M, J )) + END DO + CALL DGER( M-I, P, ALPHA, T( M, 1 ), LDT, + $ B( I, 1 ), LDB, B( I+1, 1 ), LDB ) + END IF + END DO +* + DO I = 2, M +* +* T(I,1:I-1) := C(I:I-1,1:N) * (alpha * C(I,I:N)^H) +* + ALPHA = -T( 1, I ) + + DO J = 1, I-1 + T( I, J ) = ZERO + END DO + P = MIN( I-1, L ) + NP = MIN( N-L+1, N ) + MP = MIN( P+1, M ) +* +* Triangular part of B2 +* + DO J = 1, P + T( I, J ) = ALPHA*B( I, N-L+J ) + END DO + CALL DTRMV( 'L', 'N', 'N', P, B( 1, NP ), LDB, + $ T( I, 1 ), LDT ) +* +* Rectangular part of B2 +* + CALL DGEMV( 'N', I-1-P, L, ALPHA, B( MP, NP ), LDB, + $ B( I, NP ), LDB, ZERO, T( I,MP ), LDT ) +* +* B1 +* + CALL DGEMV( 'N', I-1, N-L, ALPHA, B, LDB, B( I, 1 ), LDB, + $ ONE, T( I, 1 ), LDT ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(I,1:I-1) +* + CALL DTRMV( 'L', 'T', 'N', I-1, T, LDT, T( I, 1 ), LDT ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( 1, I ) + T( 1, I ) = ZERO + END DO + DO I=1,M + DO J= I+1,M + T(I,J)=T(J,I) + T(J,I)= ZERO + END DO + END DO + +* +* End of DTPLQT2 +* + END diff --git a/dspl/liblapack/SRC/dtpmlqt.f b/dspl/liblapack/SRC/dtpmlqt.f new file mode 100644 index 0000000..3782d0c --- /dev/null +++ b/dspl/liblapack/SRC/dtpmlqt.f @@ -0,0 +1,366 @@ +*> \brief \b DTPMLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION V( LDV, * ), A( LDA, * ), B( LDB, * ), +* $ T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPMQRT applies a real orthogonal matrix Q obtained from a +*> "triangular-pentagonal" real block reflector H to a general +*> real matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DTPLQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTPLQT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DTPLQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. The dimension of WORK is +*> N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] [V2]. +*> +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is lower trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is lower triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is K-by-M. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is K-by-N. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='T' and SIDE='L', C is on exit replaced with Q**T * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='T' and SIDE='R', C is on exit replaced with C * Q**T. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION V( LDV, * ), A( LDA, * ), B( LDB, * ), + $ T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, NB, LB, KF, LDAQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLARFB, DTPRFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.K ) THEN + INFO = -9 + ELSE IF( LDT.LT.MB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL DTPRFB( 'L', 'T', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL DTPRFB( 'R', 'N', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL DTPRFB( 'L', 'N', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL DTPRFB( 'R', 'T', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of DTPMLQT +* + END diff --git a/dspl/liblapack/SRC/dtpmqrt.f b/dspl/liblapack/SRC/dtpmqrt.f new file mode 100644 index 0000000..44985a8 --- /dev/null +++ b/dspl/liblapack/SRC/dtpmqrt.f @@ -0,0 +1,368 @@ +*> \brief \b DTPMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION V( LDV, * ), A( LDA, * ), B( LDB, * ), +* $ T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPMQRT applies a real orthogonal matrix Q obtained from a +*> "triangular-pentagonal" real block reflector H to a general +*> real matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CTPQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTPQRT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CTPQRT, stored as a NB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] +*> [V2]. +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='T' and SIDE='L', C is on exit replaced with Q**T * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='T' and SIDE='R', C is on exit replaced with C * Q**T. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. + DOUBLE PRECISION V( LDV, * ), A( LDA, * ), B( LDB, * ), + $ T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, MB, LB, KF, LDAQ, LDVQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTPRFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDVQ = MAX( 1, M ) + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDVQ = MAX( 1, N ) + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.LDVQ ) THEN + INFO = -9 + ELSE IF( LDT.LT.NB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL DTPRFB( 'L', 'T', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL DTPRFB( 'R', 'N', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL DTPRFB( 'L', 'N', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL DTPRFB( 'R', 'T', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of DTPMQRT +* + END diff --git a/dspl/liblapack/SRC/dtpqrt.f b/dspl/liblapack/SRC/dtpqrt.f new file mode 100644 index 0000000..1a3f954 --- /dev/null +++ b/dspl/liblapack/SRC/dtpqrt.f @@ -0,0 +1,270 @@ +*> \brief \b DTPQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPQRT computes a blocked QR factorization of a real +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of the +*> triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(N/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, MB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL DTPQRT2, DTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. (NB.GT.N .AND. N.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, N, NB +* +* Compute the QR factorization of the current block +* + IB = MIN( N-I+1, NB ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF +* + CALL DTPQRT2( MB, IB, LB, A(I,I), LDA, B( 1, I ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**T to B(:,I+IB:N) from the left +* + IF( I+IB.LE.N ) THEN + CALL DTPRFB( 'L', 'T', 'F', 'C', MB, N-I-IB+1, IB, LB, + $ B( 1, I ), LDB, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, B( 1, I+IB ), LDB, + $ WORK, IB ) + END IF + END DO + RETURN +* +* End of DTPQRT +* + END diff --git a/dspl/liblapack/SRC/dtpqrt2.f b/dspl/liblapack/SRC/dtpqrt2.f new file mode 100644 index 0000000..2e18f4e --- /dev/null +++ b/dspl/liblapack/SRC/dtpqrt2.f @@ -0,0 +1,302 @@ +*> \brief \b DTPQRT2 computes a QR factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPQRT2 computes a QR factorization of a real "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W * T * W**T +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + DOUBLE PRECISION ALPHA +* .. +* .. External Subroutines .. + EXTERNAL DLARFG, DGEMV, DGER, DTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPQRT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, N +* +* Generate elementary reflector H(I) to annihilate B(:,I) +* + P = M-L+MIN( L, I ) + CALL DLARFG( P+1, A( I, I ), B( 1, I ), 1, T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* W(1:N-I) := C(I:M,I+1:N)^H * C(I:M,I) [use W = T(:,N)] +* + DO J = 1, N-I + T( J, N ) = (A( I, I+J )) + END DO + CALL DGEMV( 'T', P, N-I, ONE, B( 1, I+1 ), LDB, + $ B( 1, I ), 1, ONE, T( 1, N ), 1 ) +* +* C(I:M,I+1:N) = C(I:m,I+1:N) + alpha*C(I:M,I)*W(1:N-1)^H +* + ALPHA = -(T( I, 1 )) + DO J = 1, N-I + A( I, I+J ) = A( I, I+J ) + ALPHA*(T( J, N )) + END DO + CALL DGER( P, N-I, ALPHA, B( 1, I ), 1, + $ T( 1, N ), 1, B( 1, I+1 ), LDB ) + END IF + END DO +* + DO I = 2, N +* +* T(1:I-1,I) := C(I:M,1:I-1)^H * (alpha * C(I:M,I)) +* + ALPHA = -T( I, 1 ) + + DO J = 1, I-1 + T( J, I ) = ZERO + END DO + P = MIN( I-1, L ) + MP = MIN( M-L+1, M ) + NP = MIN( P+1, N ) +* +* Triangular part of B2 +* + DO J = 1, P + T( J, I ) = ALPHA*B( M-L+J, I ) + END DO + CALL DTRMV( 'U', 'T', 'N', P, B( MP, 1 ), LDB, + $ T( 1, I ), 1 ) +* +* Rectangular part of B2 +* + CALL DGEMV( 'T', L, I-1-P, ALPHA, B( MP, NP ), LDB, + $ B( MP, I ), 1, ZERO, T( NP, I ), 1 ) +* +* B1 +* + CALL DGEMV( 'T', M-L, I-1, ALPHA, B, LDB, B( 1, I ), 1, + $ ONE, T( 1, I ), 1 ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL DTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1 ) = ZERO + END DO + +* +* End of DTPQRT2 +* + END diff --git a/dspl/liblapack/SRC/dtprfb.f b/dspl/liblapack/SRC/dtprfb.f new file mode 100644 index 0000000..6ae8fad --- /dev/null +++ b/dspl/liblapack/SRC/dtprfb.f @@ -0,0 +1,811 @@ +*> \brief \b DTPRFB applies a real or complex "triangular-pentagonal" blocked reflector to a real or complex matrix, which is composed of two blocks. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPRFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, +* V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPRFB applies a real "triangular-pentagonal" block reflector H or its +*> transpose H**T to a real matrix C, which is composed of two +*> blocks A and B, either from the left or right. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columns +*> = 'R': Rows +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T, i.e. the number of elementary +*> reflectors whose product defines the block reflector. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is DOUBLE PRECISION array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The pentagonal matrix V, which contains the elementary reflectors +*> H(1), H(2), ..., H(K). See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension +*> (LDA,N) if SIDE = 'L' or (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> H*C or H**T*C or C*H or C*H**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> H*C or H**T*C or C*H or C*H**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension +*> (LDWORK,N) if SIDE = 'L', +*> (LDWORK,K) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= K; +*> if SIDE = 'R', LDWORK >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix C is a composite matrix formed from blocks A and B. +*> The block B is of size M-by-N; if SIDE = 'R', A is of size M-by-K, +*> and if SIDE = 'L', A is of size K-by-N. +*> +*> If SIDE = 'R' and DIRECT = 'F', C = [A B]. +*> +*> If SIDE = 'L' and DIRECT = 'F', C = [A] +*> [B]. +*> +*> If SIDE = 'R' and DIRECT = 'B', C = [B A]. +*> +*> If SIDE = 'L' and DIRECT = 'B', C = [B] +*> [A]. +*> +*> The pentagonal matrix V is composed of a rectangular block V1 and a +*> trapezoidal block V2. The size of the trapezoidal block is determined by +*> the parameter L, where 0<=L<=K. If L=K, the V2 block of V is triangular; +*> if L=0, there is no trapezoidal block, thus V = V1 is rectangular. +*> +*> If DIRECT = 'F' and STOREV = 'C': V = [V1] +*> [V2] +*> - V2 is upper trapezoidal (first L rows of K-by-K upper triangular) +*> +*> If DIRECT = 'F' and STOREV = 'R': V = [V1 V2] +*> +*> - V2 is lower trapezoidal (first L columns of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'C': V = [V2] +*> [V1] +*> - V2 is lower trapezoidal (last L rows of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'R': V = [V2 V1] +*> +*> - V2 is upper trapezoidal (last L columns of K-by-K upper triangular) +*> +*> If STOREV = 'C' and SIDE = 'L', V is M-by-K with V2 L-by-K. +*> +*> If STOREV = 'C' and SIDE = 'R', V is N-by-K with V2 L-by-K. +*> +*> If STOREV = 'R' and SIDE = 'L', V is K-by-M with V2 K-by-L. +*> +*> If STOREV = 'R' and SIDE = 'R', V is K-by-N with V2 K-by-L. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, + $ V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* ========================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, MP, NP, KP + LOGICAL LEFT, FORWARD, COLUMN, RIGHT, BACKWARD, ROW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 .OR. L.LT.0 ) RETURN +* + IF( LSAME( STOREV, 'C' ) ) THEN + COLUMN = .TRUE. + ROW = .FALSE. + ELSE IF ( LSAME( STOREV, 'R' ) ) THEN + COLUMN = .FALSE. + ROW = .TRUE. + ELSE + COLUMN = .FALSE. + ROW = .FALSE. + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN + LEFT = .TRUE. + RIGHT = .FALSE. + ELSE IF( LSAME( SIDE, 'R' ) ) THEN + LEFT = .FALSE. + RIGHT = .TRUE. + ELSE + LEFT = .FALSE. + RIGHT = .FALSE. + END IF +* + IF( LSAME( DIRECT, 'F' ) ) THEN + FORWARD = .TRUE. + BACKWARD = .FALSE. + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + FORWARD = .FALSE. + BACKWARD = .TRUE. + ELSE + FORWARD = .FALSE. + BACKWARD = .FALSE. + END IF +* +* --------------------------------------------------------------------------- +* + IF( COLUMN .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (M-by-K) +* +* Form H C or H**T C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W T W**T or H**T = I - W T**T W**T +* +* A = A - T (A + V**T B) or A = A - T**T (A + V**T B) +* B = B - V T (A + V**T B) or B = B - V T**T (A + V**T B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL DTRMM( 'L', 'U', 'T', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + CALL DGEMM( 'T', 'N', L, N, M-L, ONE, V, LDV, B, LDB, + $ ONE, WORK, LDWORK ) + CALL DGEMM( 'T', 'N', K-L, N, M, ONE, V( 1, KP ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL DGEMM( 'N', 'N', L, N, K-L, -ONE, V( MP, KP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL DTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (N-by-K) +* +* Form C H or C H**T where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W T W**T or H**T = I - W T**T W**T +* +* A = A - (A + B V) T or A = A - (A + B V) T**T +* B = B - (A + B V) T V**T or B = B - (A + B V) T**T V**T +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL DTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + CALL DGEMM( 'N', 'N', M, L, N-L, ONE, B, LDB, + $ V, LDV, ONE, WORK, LDWORK ) + CALL DGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V( 1, KP ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'T', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL DGEMM( 'N', 'T', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( NP, KP ), LDV, ONE, B( 1, NP ), LDB ) + CALL DTRMM( 'R', 'U', 'T', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (M-by-K) +* [ I ] (K-by-K) +* +* Form H C or H**T C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W T W**T or H**T = I - W T**T W**T +* +* A = A - T (A + V**T B) or A = A - T**T (A + V**T B) +* B = B - V T (A + V**T B) or B = B - V T**T (A + V**T B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO +* + CALL DTRMM( 'L', 'L', 'T', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL DGEMM( 'T', 'N', L, N, M-L, ONE, V( MP, KP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL DGEMM( 'T', 'N', K-L, N, M, ONE, V, LDV, + $ B, LDB, ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'L', 'L', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'N', M-L, N, K, -ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL DGEMM( 'N', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL DTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (N-by-K) +* [ I ] (K-by-K) +* +* Form C H or C H**T where C = [ B A ] (B is M-by-N, A is M-by-K) +* +* H = I - W T W**T or H**T = I - W T**T W**T +* +* A = A - (A + B V) T or A = A - (A + B V) T**T +* B = B - (A + B V) T V**T or B = B - (A + B V) T**T V**T +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL DTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL DGEMM( 'N', 'N', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( NP, KP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL DGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V, LDV, ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'T', M, N-L, K, -ONE, WORK, LDWORK, + $ V( NP, 1 ), LDV, ONE, B( 1, NP ), LDB ) + CALL DGEMM( 'N', 'T', M, L, K-L, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL DTRMM( 'R', 'L', 'T', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**T C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W**T T W or H**T = I - W**T T**T W +* +* A = A - T (A + V B) or A = A - T**T (A + V B) +* B = B - V**T T (A + V B) or B = B - V**T T**T (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL DTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDB ) + CALL DGEMM( 'N', 'N', L, N, M-L, ONE, V, LDV,B, LDB, + $ ONE, WORK, LDWORK ) + CALL DGEMM( 'N', 'N', K-L, N, M, ONE, V( KP, 1 ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'T', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL DGEMM( 'T', 'N', L, N, K-L, -ONE, V( KP, MP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL DTRMM( 'L', 'L', 'T', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**T where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W**T T W or H**T = I - W**T T**T W +* +* A = A - (A + B V**T) T or A = A - (A + B V**T) T**T +* B = B - (A + B V**T) T V or B = B - (A + B V**T) T**T V +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL DTRMM( 'R', 'L', 'T', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + CALL DGEMM( 'N', 'T', M, L, N-L, ONE, B, LDB, V, LDV, + $ ONE, WORK, LDWORK ) + CALL DGEMM( 'N', 'T', M, K-L, N, ONE, B, LDB, + $ V( KP, 1 ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL DGEMM( 'N', 'N', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( KP, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL DTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**T C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W**T T W or H**T = I - W**T T**T W +* +* A = A - T (A + V B) or A = A - T**T (A + V B) +* B = B - V**T T (A + V B) or B = B - V**T T**T (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO + CALL DTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL DGEMM( 'N', 'N', L, N, M-L, ONE, V( KP, MP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL DGEMM( 'N', 'N', K-L, N, M, ONE, V, LDV, B, LDB, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'L', 'L ', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'T', 'N', M-L, N, K, -ONE, V( 1, MP ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL DGEMM( 'T', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL DTRMM( 'L', 'U', 'T', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**T where C = [ B A ] (A is M-by-K, B is M-by-N) +* +* H = I - W**T T W or H**T = I - W**T T**T W +* +* A = A - (A + B V**T) T or A = A - (A + B V**T) T**T +* B = B - (A + B V**T) T V or B = B - (A + B V**T) T**T V +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL DTRMM( 'R', 'U', 'T', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL DGEMM( 'N', 'T', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( KP, NP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL DGEMM( 'N', 'T', M, K-L, N, ONE, B, LDB, V, LDV, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL DTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL DGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V( 1, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL DGEMM( 'N', 'N', M, L, K-L , -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL DTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* + END IF +* + RETURN +* +* End of DTPRFB +* + END diff --git a/dspl/liblapack/SRC/dtprfs.f b/dspl/liblapack/SRC/dtprfs.f new file mode 100644 index 0000000..2dc427e --- /dev/null +++ b/dspl/liblapack/SRC/dtprfs.f @@ -0,0 +1,473 @@ +*> \brief \b DTPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, +* FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION AP( * ), B( LDB, * ), BERR( * ), FERR( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular packed +*> coefficient matrix. +*> +*> The solution matrix X must be computed by DTPTRS or some other +*> means before entering this routine. DTPRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION AP( * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, KC, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DTPMV, DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DTPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTPRFS +* + END diff --git a/dspl/liblapack/SRC/dtptri.f b/dspl/liblapack/SRC/dtptri.f new file mode 100644 index 0000000..32f3834 --- /dev/null +++ b/dspl/liblapack/SRC/dtptri.f @@ -0,0 +1,241 @@ +*> \brief \b DTPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPTRI computes the inverse of a real upper or lower triangular +*> matrix A stored in packed format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangular matrix A, stored +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same packed storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A triangular matrix A can be transferred to packed storage using one +*> of the following program segments: +*> +*> UPLO = 'U': UPLO = 'L': +*> +*> JC = 1 JC = 1 +*> DO 2 J = 1, N DO 2 J = 1, N +*> DO 1 I = 1, J DO 1 I = J, N +*> AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +*> 1 CONTINUE 1 CONTINUE +*> JC = JC + J JC = JC + N - J + 1 +*> 2 CONTINUE 2 CONTINUE +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DTPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL DTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL DSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL DTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL DSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of DTPTRI +* + END diff --git a/dspl/liblapack/SRC/dtptrs.f b/dspl/liblapack/SRC/dtptrs.f new file mode 100644 index 0000000..c627241 --- /dev/null +++ b/dspl/liblapack/SRC/dtptrs.f @@ -0,0 +1,228 @@ +*> \brief \b DTPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular matrix of order N stored in packed format, +*> and B is an N-by-NRHS matrix. A check is made to verify that A is +*> nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b or A**T * x = b. +* + DO 30 J = 1, NRHS + CALL DTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of DTPTRS +* + END diff --git a/dspl/liblapack/SRC/dtpttf.f b/dspl/liblapack/SRC/dtpttf.f new file mode 100644 index 0000000..a37a3e3 --- /dev/null +++ b/dspl/liblapack/SRC/dtpttf.f @@ -0,0 +1,502 @@ +*> \brief \b DTPTTF copies a triangular matrix from the standard packed format (TP) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AP( 0: * ), ARF( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPTTF copies a triangular matrix A from standard packed format (TP) +*> to rectangular full packed format (TF). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal format is wanted; +*> = 'T': ARF in Conjugate-transpose format is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION AP( 0: * ), ARF( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = AP( 0 ) + ELSE + ARF( 0 ) = AP( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DTPTTF +* + END diff --git a/dspl/liblapack/SRC/dtpttr.f b/dspl/liblapack/SRC/dtpttr.f new file mode 100644 index 0000000..6258179 --- /dev/null +++ b/dspl/liblapack/SRC/dtpttr.f @@ -0,0 +1,176 @@ +*> \brief \b DTPTTR copies a triangular matrix from the standard packed format (TP) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPTTR copies a triangular matrix A from standard packed format (TP) +*> to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTPTTR', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + END IF +* +* + RETURN +* +* End of DTPTTR +* + END diff --git a/dspl/liblapack/SRC/dtrcon.f b/dspl/liblapack/SRC/dtrcon.f new file mode 100644 index 0000000..ad40d37 --- /dev/null +++ b/dspl/liblapack/SRC/dtrcon.f @@ -0,0 +1,276 @@ +*> \brief \b DTRCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRCON estimates the reciprocal of the condition number of a +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANTR + EXTERNAL LSAME, IDAMAX, DLAMCH, DLANTR +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLATRS, DRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = DLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL DLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL DLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL DLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IDAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL DRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of DTRCON +* + END diff --git a/dspl/liblapack/SRC/dtrevc.f b/dspl/liblapack/SRC/dtrevc.f new file mode 100644 index 0000000..2ed3efa --- /dev/null +++ b/dspl/liblapack/SRC/dtrevc.f @@ -0,0 +1,1077 @@ +*> \brief \b DTREVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTREVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, MM, M, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTREVC computes some or all of the right and/or left eigenvectors of +*> a real upper quasi-triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal blocks of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the orthogonal factor that reduces a matrix +*> A to Schur form T, then Q*X and Q*Y are the matrices of right and +*> left eigenvectors of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> If w(j) is a real eigenvalue, the corresponding real +*> eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector is +*> computed if either SELECT(j) or SELECT(j+1) is .TRUE., and +*> on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to +*> .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The upper quasi-triangular matrix T in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by DHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by DHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected real eigenvector occupies one column and each +*> selected complex eigenvector occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2 + DOUBLE PRECISION BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + DOUBLE PRECISION X( 2, 2 ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* + N2 = 2*N +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IP = 0 + IS = M + DO 140 KI = N, 1, -1 +* + IF( IP.EQ.1 ) + $ GO TO 130 + IF( KI.EQ.1 ) + $ GO TO 40 + IF( T( KI, KI-1 ).EQ.ZERO ) + $ GO TO 40 + IP = -1 +* + 40 CONTINUE + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 130 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real right eigenvector +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 50 K = 1, KI - 1 + WORK( K+N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve the upper quasi-triangular system: +* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 ) +* + II = IDAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL DGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI+N ), + $ VR( 1, KI ), 1 ) +* + II = IDAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* + ELSE +* +* Complex right eigenvector. +* +* Initial solve +* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. +* [ (T(KI,KI-1) T(KI,KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1+N ) = ONE + WORK( KI+N2 ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1+N ) = -WI / T( KI, KI-1 ) + WORK( KI+N2 ) = ONE + END IF + WORK( KI+N ) = ZERO + WORK( KI-1+N2 ) = ZERO +* +* Form right-hand side +* + DO 80 K = 1, KI - 2 + WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 ) + WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, WI, + $ X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, WI, X, 2, SCALE, + $ XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) + WORK( J-1+N2 ) = X( 1, 2 ) + WORK( J+N2 ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL DAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+N2 ), 1 ) + CALL DAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 ) + CALL DCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE +* + REMAX = ONE / EMAX + CALL DSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE +* + IF( KI.GT.2 ) THEN + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI-1+N ), + $ VR( 1, KI-1 ), 1 ) + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N2 ), 1, WORK( KI+N2 ), + $ VR( 1, KI ), 1 ) + ELSE + CALL DSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 ) + CALL DSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 ) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF + END IF +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 130 CONTINUE + IF( IP.EQ.1 ) + $ IP = 0 + IF( IP.EQ.-1 ) + $ IP = 1 + 140 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IP = 0 + IS = 1 + DO 260 KI = 1, N +* + IF( IP.EQ.-1 ) + $ GO TO 250 + IF( KI.EQ.N ) + $ GO TO 150 + IF( T( KI+1, KI ).EQ.ZERO ) + $ GO TO 150 + IP = 1 +* + 150 CONTINUE + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 250 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real left eigenvector. +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 160 K = KI + 1, N + WORK( K+N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve the quasi-triangular system: +* (T(KI+1:N,KI+1:N) - WR)**T*X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve (T(J,J)-WR)**T*X = WORK +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ DDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve +* [T(J,J)-WR T(J,J+1) ]**T * X = SCALE*( WORK1 ) +* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + WORK( J+1+N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+1+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) +* + II = IDAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE +* + IF( KI.LT.N ) + $ CALL DGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) +* + II = IDAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + END IF +* + ELSE +* +* Complex left eigenvector. +* +* Initial solve: +* ((T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI))*X = 0. +* ((T(KI+1,KI) T(KI+1,KI+1)) ) +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI+N ) = WI / T( KI, KI+1 ) + WORK( KI+1+N2 ) = ONE + ELSE + WORK( KI+N ) = ONE + WORK( KI+1+N2 ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1+N ) = ZERO + WORK( KI+N2 ) = ZERO +* +* Form right-hand side +* + DO 190 K = KI + 2, N + WORK( K+N ) = -WORK( KI+N )*T( KI, K ) + WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K ) + 190 CONTINUE +* +* Solve complex quasi-triangular system: +* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) + WORK( J+N2 ) = WORK( J+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+N2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+N2 ) = WORK( J+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+1+N2 ) = WORK( J+1+N2 ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve 2-by-2 complex linear equation +* ([T(j,j) T(j,j+1) ]**T-(wr-i*wi)*I)*X = SCALE*B +* ([T(j+1,j) T(j+1,j+1)] ) +* + CALL DLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + WORK( J+1+N ) = X( 2, 1 ) + WORK( J+1+N2 ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) + CALL DCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ), + $ 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE + ELSE + IF( KI.LT.N-1 ) THEN + CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) + CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N2 ), 1, + $ WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + ELSE + CALL DSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 ) + CALL DSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL DSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + END IF +* + END IF +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 250 CONTINUE + IF( IP.EQ.-1 ) + $ IP = 0 + IF( IP.EQ.1 ) + $ IP = -1 +* + 260 CONTINUE +* + END IF +* + RETURN +* +* End of DTREVC +* + END diff --git a/dspl/liblapack/SRC/dtrevc3.f b/dspl/liblapack/SRC/dtrevc3.f new file mode 100644 index 0000000..745f636 --- /dev/null +++ b/dspl/liblapack/SRC/dtrevc3.f @@ -0,0 +1,1304 @@ +*> \brief \b DTREVC3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTREVC3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, +* VR, LDVR, MM, M, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTREVC3 computes some or all of the right and/or left eigenvectors of +*> a real upper quasi-triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a real general matrix: A = Q*T*Q**T, as computed by DHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal blocks of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the orthogonal factor that reduces a matrix +*> A to Schur form T, then Q*X and Q*Y are the matrices of right and +*> left eigenvectors of A. +*> +*> This uses a Level 3 BLAS version of the back transformation. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> If w(j) is a real eigenvalue, the corresponding real +*> eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector is +*> computed if either SELECT(j) or SELECT(j+1) is .TRUE., and +*> on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to +*> .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The upper quasi-triangular matrix T in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by DHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1, and if SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by DHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1, and if SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected real eigenvector occupies one column and each +*> selected complex eigenvector occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. LWORK >= max(1,3*N). +*> For optimum performance, LWORK >= N + 2*N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* @precisions fortran d -> s +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, + $ VR, LDVR, MM, M, WORK, LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER NBMIN, NBMAX + PARAMETER ( NBMIN = 8, NBMAX = 128 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, PAIR, + $ RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, + $ IV, MAXWRK, NB, KI2 + DOUBLE PRECISION BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IDAMAX, ILAENV, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, XERBLA, + $ DGEMM, DLASET, DLABAD, DLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + DOUBLE PRECISION X( 2, 2 ) + INTEGER ISCOMPLEX( NBMAX ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + NB = ILAENV( 1, 'DTREVC', SIDE // HOWMNY, N, -1, -1, -1 ) + MAXWRK = N + 2*N*NB + WORK(1) = MAXWRK + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTREVC3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Use blocked version of back-transformation if sufficient workspace. +* Zero-out the workspace to avoid potential NaN propagation. +* + IF( OVER .AND. LWORK .GE. N + 2*N*NBMIN ) THEN + NB = (LWORK - N) / (2*N) + NB = MIN( NB, NBMAX ) + CALL DLASET( 'F', N, 1+2*NB, ZERO, ZERO, WORK, N ) + ELSE + NB = 1 + END IF +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* ISCOMPLEX array stores IP for each column in current block. +* + IF( RIGHTV ) THEN +* +* ============================================================ +* Compute right eigenvectors. +* +* IV is index of column in current block. +* For complex right vector, uses IV-1 for real part and IV for complex part. +* Non-blocked version always uses IV=2; +* blocked version starts with IV=NB, goes down to 1 or 2. +* (Note the "0-th" column is used for 1-norms computed above.) + IV = 2 + IF( NB.GT.2 ) THEN + IV = NB + END IF + + IP = 0 + IS = M + DO 140 KI = N, 1, -1 + IF( IP.EQ.-1 ) THEN +* previous iteration (ki+1) was second of conjugate pair, +* so this ki is first of conjugate pair; skip to end of loop + IP = 1 + GO TO 140 + ELSE IF( KI.EQ.1 ) THEN +* last column, so this ki must be real eigenvalue + IP = 0 + ELSE IF( T( KI, KI-1 ).EQ.ZERO ) THEN +* zero on sub-diagonal, so this ki is real eigenvalue + IP = 0 + ELSE +* non-zero on sub-diagonal, so this ki is second of conjugate pair + IP = -1 + END IF + + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 140 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 140 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* -------------------------------------------------------- +* Real right eigenvector +* + WORK( KI + IV*N ) = ONE +* +* Form right-hand side. +* + DO 50 K = 1, KI - 1 + WORK( K + IV*N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve upper quasi-triangular system: +* [ T(1:KI-1,1:KI-1) - WR ]*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+IV*N ), 1 ) + WORK( J+IV*N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+IV*N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+IV*N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( KI, SCALE, WORK( 1+IV*N ), 1 ) + WORK( J-1+IV*N ) = X( 1, 1 ) + WORK( J +IV*N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+IV*N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+IV*N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL DCOPY( KI, WORK( 1 + IV*N ), 1, VR( 1, IS ), 1 ) +* + II = IDAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.1 ) + $ CALL DGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1 + IV*N ), 1, WORK( KI + IV*N ), + $ VR( 1, KI ), 1 ) +* + II = IDAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + IV*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP +* back-transform and normalization is done below + END IF + ELSE +* +* -------------------------------------------------------- +* Complex right eigenvector. +* +* Initial solve +* [ ( T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I*WI) ]*X = 0. +* [ ( T(KI, KI-1) T(KI, KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1 + (IV-1)*N ) = ONE + WORK( KI + (IV )*N ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1 + (IV-1)*N ) = -WI / T( KI, KI-1 ) + WORK( KI + (IV )*N ) = ONE + END IF + WORK( KI + (IV-1)*N ) = ZERO + WORK( KI-1 + (IV )*N ) = ZERO +* +* Form right-hand side. +* + DO 80 K = 1, KI - 2 + WORK( K+(IV-1)*N ) = -WORK( KI-1+(IV-1)*N )*T(K,KI-1) + WORK( K+(IV )*N ) = -WORK( KI +(IV )*N )*T(K,KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* [ T(1:KI-2,1:KI-2) - (WR+i*WI) ]*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+(IV-1)*N ), N, + $ WR, WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+(IV-1)*N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+(IV )*N ), 1 ) + END IF + WORK( J+(IV-1)*N ) = X( 1, 1 ) + WORK( J+(IV )*N ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL DAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+(IV )*N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL DLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+(IV-1)*N ), N, WR, WI, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( KI, SCALE, WORK( 1+(IV-1)*N ), 1 ) + CALL DSCAL( KI, SCALE, WORK( 1+(IV )*N ), 1 ) + END IF + WORK( J-1+(IV-1)*N ) = X( 1, 1 ) + WORK( J +(IV-1)*N ) = X( 2, 1 ) + WORK( J-1+(IV )*N ) = X( 1, 2 ) + WORK( J +(IV )*N ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL DAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+(IV )*N ), 1 ) + CALL DAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+(IV )*N ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL DCOPY( KI, WORK( 1+(IV-1)*N ), 1, VR(1,IS-1), 1 ) + CALL DCOPY( KI, WORK( 1+(IV )*N ), 1, VR(1,IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.2 ) THEN + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1 + (IV-1)*N ), 1, + $ WORK( KI-1 + (IV-1)*N ), VR(1,KI-1), 1) + CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1 + (IV)*N ), 1, + $ WORK( KI + (IV)*N ), VR( 1, KI ), 1 ) + ELSE + CALL DSCAL( N, WORK(KI-1+(IV-1)*N), VR(1,KI-1), 1) + CALL DSCAL( N, WORK(KI +(IV )*N), VR(1,KI ), 1) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL DSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + (IV-1)*N ) = ZERO + WORK( K + (IV )*N ) = ZERO + END DO + ISCOMPLEX( IV-1 ) = -IP + ISCOMPLEX( IV ) = IP + IV = IV - 1 +* back-transform and normalization is done below + END IF + END IF + + IF( NB.GT.1 ) THEN +* -------------------------------------------------------- +* Blocked version of back-transform +* For complex case, KI2 includes both vectors (KI-1 and KI) + IF( IP.EQ.0 ) THEN + KI2 = KI + ELSE + KI2 = KI - 1 + END IF + +* Columns IV:NB of work are valid vectors. +* When the number of vectors stored reaches NB-1 or NB, +* or if this was last vector, do the GEMM + IF( (IV.LE.2) .OR. (KI2.EQ.1) ) THEN + CALL DGEMM( 'N', 'N', N, NB-IV+1, KI2+NB-IV, ONE, + $ VR, LDVR, + $ WORK( 1 + (IV)*N ), N, + $ ZERO, + $ WORK( 1 + (NB+IV)*N ), N ) +* normalize vectors + DO K = IV, NB + IF( ISCOMPLEX(K).EQ.0 ) THEN +* real eigenvector + II = IDAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / ABS( WORK( II + (NB+K)*N ) ) + ELSE IF( ISCOMPLEX(K).EQ.1 ) THEN +* first eigenvector of conjugate pair + EMAX = ZERO + DO II = 1, N + EMAX = MAX( EMAX, + $ ABS( WORK( II + (NB+K )*N ) )+ + $ ABS( WORK( II + (NB+K+1)*N ) ) ) + END DO + REMAX = ONE / EMAX +* else if ISCOMPLEX(K).EQ.-1 +* second eigenvector of conjugate pair +* reuse same REMAX as previous K + END IF + CALL DSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL DLACPY( 'F', N, NB-IV+1, + $ WORK( 1 + (NB+IV)*N ), N, + $ VR( 1, KI2 ), LDVR ) + IV = NB + ELSE + IV = IV - 1 + END IF + END IF ! blocked back-transform +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 140 CONTINUE + END IF + + IF( LEFTV ) THEN +* +* ============================================================ +* Compute left eigenvectors. +* +* IV is index of column in current block. +* For complex left vector, uses IV for real part and IV+1 for complex part. +* Non-blocked version always uses IV=1; +* blocked version starts with IV=1, goes up to NB-1 or NB. +* (Note the "0-th" column is used for 1-norms computed above.) + IV = 1 + IP = 0 + IS = 1 + DO 260 KI = 1, N + IF( IP.EQ.1 ) THEN +* previous iteration (ki-1) was first of conjugate pair, +* so this ki is second of conjugate pair; skip to end of loop + IP = -1 + GO TO 260 + ELSE IF( KI.EQ.N ) THEN +* last column, so this ki must be real eigenvalue + IP = 0 + ELSE IF( T( KI+1, KI ).EQ.ZERO ) THEN +* zero on sub-diagonal, so this ki is real eigenvalue + IP = 0 + ELSE +* non-zero on sub-diagonal, so this ki is first of conjugate pair + IP = 1 + END IF +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 260 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* -------------------------------------------------------- +* Real left eigenvector +* + WORK( KI + IV*N ) = ONE +* +* Form right-hand side. +* + DO 160 K = KI + 1, N + WORK( K + IV*N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve transposed quasi-triangular system: +* [ T(KI+1:N,KI+1:N) - WR ]**T * X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+IV*N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+IV*N ) = WORK( J+IV*N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* +* Solve [ T(J,J) - WR ]**T * X = WORK +* + CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+IV*N ), 1 ) + WORK( J+IV*N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+IV*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK( KI+IV*N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+IV*N ) = WORK( J+IV*N ) - + $ DDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* + WORK( J+1+IV*N ) = WORK( J+1+IV*N ) - + $ DDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* +* Solve +* [ T(J,J)-WR T(J,J+1) ]**T * X = SCALE*( WORK1 ) +* [ T(J+1,J) T(J+1,J+1)-WR ] ( WORK2 ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL DSCAL( N-KI+1, SCALE, WORK( KI+IV*N ), 1 ) + WORK( J +IV*N ) = X( 1, 1 ) + WORK( J+1+IV*N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J +IV*N ) ), + $ ABS( WORK( J+1+IV*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL DCOPY( N-KI+1, WORK( KI + IV*N ), 1, + $ VL( KI, IS ), 1 ) +* + II = IDAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N ) + $ CALL DGEMV( 'N', N, N-KI, ONE, + $ VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 + IV*N ), 1, + $ WORK( KI + IV*N ), VL( 1, KI ), 1 ) +* + II = IDAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + IV*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP +* back-transform and normalization is done below + END IF + ELSE +* +* -------------------------------------------------------- +* Complex left eigenvector. +* +* Initial solve: +* [ ( T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI) ]*X = 0. +* [ ( T(KI+1,KI) T(KI+1,KI+1) ) ] +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI + (IV )*N ) = WI / T( KI, KI+1 ) + WORK( KI+1 + (IV+1)*N ) = ONE + ELSE + WORK( KI + (IV )*N ) = ONE + WORK( KI+1 + (IV+1)*N ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1 + (IV )*N ) = ZERO + WORK( KI + (IV+1)*N ) = ZERO +* +* Form right-hand side. +* + DO 190 K = KI + 2, N + WORK( K+(IV )*N ) = -WORK( KI +(IV )*N )*T(KI, K) + WORK( K+(IV+1)*N ) = -WORK( KI+1+(IV+1)*N )*T(KI+1,K) + 190 CONTINUE +* +* Solve transposed quasi-triangular system: +* [ T(KI+2:N,KI+2:N)**T - (WR-i*WI) ]*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK(KI+(IV )*N), 1 ) + CALL DSCAL( N-KI+1, REC, WORK(KI+(IV+1)*N), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+(IV )*N ) = WORK( J+(IV)*N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) + WORK( J+(IV+1)*N ) = WORK( J+(IV+1)*N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* +* Solve [ T(J,J)-(WR-i*WI) ]*(X11+i*X12)= WK+I*WK2 +* + CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK(KI+(IV )*N), 1) + CALL DSCAL( N-KI+1, SCALE, WORK(KI+(IV+1)*N), 1) + END IF + WORK( J+(IV )*N ) = X( 1, 1 ) + WORK( J+(IV+1)*N ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+(IV )*N ) ), + $ ABS( WORK( J+(IV+1)*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL DSCAL( N-KI+1, REC, WORK(KI+(IV )*N), 1 ) + CALL DSCAL( N-KI+1, REC, WORK(KI+(IV+1)*N), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J +(IV )*N ) = WORK( J+(IV)*N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) +* + WORK( J +(IV+1)*N ) = WORK( J+(IV+1)*N ) - + $ DDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* + WORK( J+1+(IV )*N ) = WORK( J+1+(IV)*N ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) +* + WORK( J+1+(IV+1)*N ) = WORK( J+1+(IV+1)*N ) - + $ DDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* +* Solve 2-by-2 complex linear equation +* [ (T(j,j) T(j,j+1) )**T - (wr-i*wi)*I ]*X = SCALE*B +* [ (T(j+1,j) T(j+1,j+1)) ] +* + CALL DLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N-KI+1, SCALE, WORK(KI+(IV )*N), 1) + CALL DSCAL( N-KI+1, SCALE, WORK(KI+(IV+1)*N), 1) + END IF + WORK( J +(IV )*N ) = X( 1, 1 ) + WORK( J +(IV+1)*N ) = X( 1, 2 ) + WORK( J+1+(IV )*N ) = X( 2, 1 ) + WORK( J+1+(IV+1)*N ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), + $ VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL DCOPY( N-KI+1, WORK( KI + (IV )*N ), 1, + $ VL( KI, IS ), 1 ) + CALL DCOPY( N-KI+1, WORK( KI + (IV+1)*N ), 1, + $ VL( KI, IS+1 ), 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL DSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N-1 ) THEN + CALL DGEMV( 'N', N, N-KI-1, ONE, + $ VL( 1, KI+2 ), LDVL, + $ WORK( KI+2 + (IV)*N ), 1, + $ WORK( KI + (IV)*N ), + $ VL( 1, KI ), 1 ) + CALL DGEMV( 'N', N, N-KI-1, ONE, + $ VL( 1, KI+2 ), LDVL, + $ WORK( KI+2 + (IV+1)*N ), 1, + $ WORK( KI+1 + (IV+1)*N ), + $ VL( 1, KI+1 ), 1 ) + ELSE + CALL DSCAL( N, WORK(KI+ (IV )*N), VL(1, KI ), 1) + CALL DSCAL( N, WORK(KI+1+(IV+1)*N), VL(1, KI+1), 1) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL DSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL DSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + (IV )*N ) = ZERO + WORK( K + (IV+1)*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP + ISCOMPLEX( IV+1 ) = -IP + IV = IV + 1 +* back-transform and normalization is done below + END IF + END IF + + IF( NB.GT.1 ) THEN +* -------------------------------------------------------- +* Blocked version of back-transform +* For complex case, KI2 includes both vectors (KI and KI+1) + IF( IP.EQ.0 ) THEN + KI2 = KI + ELSE + KI2 = KI + 1 + END IF + +* Columns 1:IV of work are valid vectors. +* When the number of vectors stored reaches NB-1 or NB, +* or if this was last vector, do the GEMM + IF( (IV.GE.NB-1) .OR. (KI2.EQ.N) ) THEN + CALL DGEMM( 'N', 'N', N, IV, N-KI2+IV, ONE, + $ VL( 1, KI2-IV+1 ), LDVL, + $ WORK( KI2-IV+1 + (1)*N ), N, + $ ZERO, + $ WORK( 1 + (NB+1)*N ), N ) +* normalize vectors + DO K = 1, IV + IF( ISCOMPLEX(K).EQ.0) THEN +* real eigenvector + II = IDAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / ABS( WORK( II + (NB+K)*N ) ) + ELSE IF( ISCOMPLEX(K).EQ.1) THEN +* first eigenvector of conjugate pair + EMAX = ZERO + DO II = 1, N + EMAX = MAX( EMAX, + $ ABS( WORK( II + (NB+K )*N ) )+ + $ ABS( WORK( II + (NB+K+1)*N ) ) ) + END DO + REMAX = ONE / EMAX +* else if ISCOMPLEX(K).EQ.-1 +* second eigenvector of conjugate pair +* reuse same REMAX as previous K + END IF + CALL DSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL DLACPY( 'F', N, IV, + $ WORK( 1 + (NB+1)*N ), N, + $ VL( 1, KI2-IV+1 ), LDVL ) + IV = 1 + ELSE + IV = IV + 1 + END IF + END IF ! blocked back-transform +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 260 CONTINUE + END IF +* + RETURN +* +* End of DTREVC3 +* + END diff --git a/dspl/liblapack/SRC/dtrexc.f b/dspl/liblapack/SRC/dtrexc.f new file mode 100644 index 0000000..468ae47 --- /dev/null +++ b/dspl/liblapack/SRC/dtrexc.f @@ -0,0 +1,428 @@ +*> \brief \b DTREXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTREXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ +* INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTREXC reorders the real Schur factorization of a real matrix +*> A = Q*T*Q**T, so that the diagonal block of T with row index IFST is +*> moved to row ILST. +*> +*> The real Schur form T is reordered by an orthogonal similarity +*> transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors +*> is updated by postmultiplying it with Z. +*> +*> T must be in Schur canonical form (as returned by DHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> If N == 0 arguments ILST and IFST may be any value. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> Schur canonical form. +*> On exit, the reordered upper quasi-triangular matrix, again +*> in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> orthogonal transformation matrix Z which reorders T. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1, and if +*> COMPQ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> +*> Specify the reordering of the diagonal blocks of T. +*> The block with row index IFST is moved to row ILST, by a +*> sequence of transpositions between adjacent blocks. +*> On exit, if IFST pointed on entry to the second row of a +*> 2-by-2 block, it is changed to point to the first row; ILST +*> always points to the first row of the block in its final +*> position (which may differ from its input value by +1 or -1). +*> 1 <= IFST <= N; 1 <= ILST <= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: two adjacent blocks were too close to swap (the problem +*> is very ill-conditioned); T may have been partially +*> reordered, and ILST points to the first row of the +*> current position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER HERE, NBF, NBL, NBNEXT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLAEXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input arguments. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF(( IFST.LT.1 .OR. IFST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -7 + ELSE IF(( ILST.LT.1 .OR. ILST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of specified block +* and find out it is 1 by 1 or 2 by 2. +* + IF( IFST.GT.1 ) THEN + IF( T( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( T( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out it is 1 by 1 or 2 by 2. +* + IF( ILST.GT.1 ) THEN + IF( T( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( T( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF +* + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap block with next one below +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( T( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT, + $ WORK, INFO ) + HERE = HERE + 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, + $ NBNEXT, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2 by 2 Block did split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1, + $ WORK, INFO ) + HERE = HERE + 2 + END IF + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 +* + ELSE +* + HERE = IFST + 20 CONTINUE +* +* Swap block with next one above +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ NBF, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ 1, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1, + $ WORK, INFO ) + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2 by 2 Block did split +* + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1, + $ WORK, INFO ) + HERE = HERE - 2 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE +* + RETURN +* +* End of DTREXC +* + END diff --git a/dspl/liblapack/SRC/dtrrfs.f b/dspl/liblapack/SRC/dtrrfs.f new file mode 100644 index 0000000..c9fe55c --- /dev/null +++ b/dspl/liblapack/SRC/dtrrfs.f @@ -0,0 +1,472 @@ +*> \brief \b DTRRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, +* LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular +*> coefficient matrix. +*> +*> The solution matrix X must be computed by DTRTRS or some other +*> means before entering this routine. DTRRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DCOPY, DLACN2, DTRMV, DTRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL DTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 ) + CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use DLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL DLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL DTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of DTRRFS +* + END diff --git a/dspl/liblapack/SRC/dtrsen.f b/dspl/liblapack/SRC/dtrsen.f new file mode 100644 index 0000000..1fa126c --- /dev/null +++ b/dspl/liblapack/SRC/dtrsen.f @@ -0,0 +1,570 @@ +*> \brief \b DTRSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, +* M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, JOB +* INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N +* DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRSEN reorders the real Schur factorization of a real matrix +*> A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in +*> the leading diagonal blocks of the upper quasi-triangular matrix T, +*> and the leading columns of Q form an orthonormal basis of the +*> corresponding right invariant subspace. +*> +*> Optionally the routine computes the reciprocal condition numbers of +*> the cluster of eigenvalues and/or the invariant subspace. +*> +*> T must be in Schur canonical form (as returned by DHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (S) or the invariant subspace (SEP): +*> = 'N': none; +*> = 'E': for eigenvalues only (S); +*> = 'V': for invariant subspace only (SEP); +*> = 'B': for both eigenvalues and invariant subspace (S and +*> SEP). +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select a real eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. To select a complex conjugate pair of eigenvalues +*> w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; a complex conjugate pair of eigenvalues must be +*> either both included in the cluster or both excluded. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> canonical form. +*> On exit, T is overwritten by the reordered matrix T, again in +*> Schur canonical form, with the selected eigenvalues in the +*> leading diagonal blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is DOUBLE PRECISION array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> orthogonal transformation matrix which reorders T; the +*> leading M columns of Q form an orthonormal basis for the +*> specified invariant subspace. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> \param[out] WI +*> \verbatim +*> WI is DOUBLE PRECISION array, dimension (N) +*> +*> The real and imaginary parts, respectively, of the reordered +*> eigenvalues of T. The eigenvalues are stored in the same +*> order as on the diagonal of T, with WR(i) = T(i,i) and, if +*> T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and +*> WI(i+1) = -WI(i). Note that if a complex eigenvalue is +*> sufficiently ill-conditioned, then its value may differ +*> significantly from its value before reordering. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified invariant subspace. +*> 0 < = M <= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> If JOB = 'E' or 'B', S is a lower bound on the reciprocal +*> condition number for the selected cluster of eigenvalues. +*> S cannot underestimate the true reciprocal condition number +*> by more than a factor of sqrt(N). If M = 0 or N, S = 1. +*> If JOB = 'N' or 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is DOUBLE PRECISION +*> If JOB = 'V' or 'B', SEP is the estimated reciprocal +*> condition number of the specified invariant subspace. If +*> M = 0 or N, SEP = norm(T). +*> If JOB = 'N' or 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOB = 'N', LWORK >= max(1,N); +*> if JOB = 'E', LWORK >= max(1,M*(N-M)); +*> if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOB = 'N' or 'E', LIWORK >= 1; +*> if JOB = 'V' or 'B', LIWORK >= max(1,M*(N-M)). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: reordering of T failed because some eigenvalues are too +*> close to separate (the problem is very ill-conditioned); +*> T may have been partially reordered, and WR and WI +*> contain the eigenvalues in the same order as in T; S and +*> SEP (if requested) are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> DTRSEN first collects the selected eigenvalues by computing an +*> orthogonal transformation Z to move them to the top left corner of T. +*> In other words, the selected eigenvalues are the eigenvalues of T11 +*> in: +*> +*> Z**T * T * Z = ( T11 T12 ) n1 +*> ( 0 T22 ) n2 +*> n1 n2 +*> +*> where N = n1+n2 and Z**T means the transpose of Z. The first n1 columns +*> of Z span the specified invariant subspace of T. +*> +*> If T has been obtained from the real Schur factorization of a matrix +*> A = Q*T*Q**T, then the reordered real Schur factorization of A is given +*> by A = (Q*Z)*(Z**T*T*Z)*(Q*Z)**T, and the first n1 columns of Q*Z span +*> the corresponding invariant subspace of A. +*> +*> The reciprocal condition number of the average of the eigenvalues of +*> T11 may be returned in S. S lies between 0 (very badly conditioned) +*> and 1 (very well conditioned). It is computed as follows. First we +*> compute R so that +*> +*> P = ( I R ) n1 +*> ( 0 0 ) n2 +*> n1 n2 +*> +*> is the projector on the invariant subspace associated with T11. +*> R is the solution of the Sylvester equation: +*> +*> T11*R - R*T22 = T12. +*> +*> Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +*> the two-norm of M. Then S is computed as the lower bound +*> +*> (1 + F-norm(R)**2)**(-1/2) +*> +*> on the reciprocal of 2-norm(P), the true reciprocal condition number. +*> S cannot underestimate 1 / 2-norm(P) by more than a factor of +*> sqrt(N). +*> +*> An approximate error bound for the computed average of the +*> eigenvalues of T11 is +*> +*> EPS * norm(T) / S +*> +*> where EPS is the machine precision. +*> +*> The reciprocal condition number of the right invariant subspace +*> spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +*> SEP is defined as the separation of T11 and T22: +*> +*> sep( T11, T22 ) = sigma-min( C ) +*> +*> where sigma-min(C) is the smallest singular value of the +*> n1*n2-by-n1*n2 matrix +*> +*> C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +*> +*> I(m) is an m by m identity matrix, and kprod denotes the Kronecker +*> product. We estimate sigma-min(C) by the reciprocal of an estimate of +*> the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +*> cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +*> +*> When SEP is small, small changes in T can cause large changes in +*> the invariant subspace. An approximate bound on the maximum angular +*> error in the computed right invariant subspace is +*> +*> EPS * norm(T) / SEP +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, + $ M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N + DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS, + $ WANTSP + INTEGER IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2, + $ NN + DOUBLE PRECISION EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLANGE + EXTERNAL LSAME, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLACN2, DLACPY, DTREXC, DTRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE +* +* Set M to the dimension of the specified invariant subspace, +* and test LWORK and LIWORK. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + LIWMIN = MAX( 1, NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = MAX( 1, N ) + LIWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + LIWMIN = 1 + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = DLANGE( '1', N, N, T, LDT, WORK ) + GO TO 40 + END IF +* +* Collect the selected blocks at the top-left corner of T. +* + KS = 0 + PAIR = .FALSE. + DO 20 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( T( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* + IERR = 0 + KK = K + IF( K.NE.KS ) + $ CALL DTREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK, + $ IERR ) + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Blocks too close to swap: exit. +* + INFO = 1 + IF( WANTS ) + $ S = ZERO + IF( WANTSP ) + $ SEP = ZERO + GO TO 40 + END IF + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL DLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = DLANGE( 'F', N1, N2, WORK, N1, WORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL DLACN2( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11**T*R - R*T22**T = scale*X. +* + CALL DTRSYL( 'T', 'T', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Store the output eigenvalues in WR and WI. +* + DO 50 K = 1, N + WR( K ) = T( K, K ) + WI( K ) = ZERO + 50 CONTINUE + DO 60 K = 1, N - 1 + IF( T( K+1, K ).NE.ZERO ) THEN + WI( K ) = SQRT( ABS( T( K, K+1 ) ) )* + $ SQRT( ABS( T( K+1, K ) ) ) + WI( K+1 ) = -WI( K ) + END IF + 60 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of DTRSEN +* + END diff --git a/dspl/liblapack/SRC/dtrsna.f b/dspl/liblapack/SRC/dtrsna.f new file mode 100644 index 0000000..dd0ad2f --- /dev/null +++ b/dspl/liblapack/SRC/dtrsna.f @@ -0,0 +1,603 @@ +*> \brief \b DTRSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or right eigenvectors of a real upper +*> quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q +*> orthogonal). +*> +*> T must be in Schur canonical form (as returned by DHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (SEP): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (SEP); +*> = 'B': for both eigenvalues and eigenvectors (S and SEP). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the eigenpair corresponding to a real eigenvalue w(j), +*> SELECT(j) must be set to .TRUE.. To select condition numbers +*> corresponding to a complex conjugate pair of eigenvalues w(j) +*> and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +*> set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is DOUBLE PRECISION array, dimension (LDT,N) +*> The upper quasi-triangular matrix T, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of T +*> (or of any Q*T*Q**T with Q orthogonal), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VL, as returned by +*> DHSEIN or DTREVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is DOUBLE PRECISION array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of T +*> (or of any Q*T*Q**T with Q orthogonal), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VR, as returned by +*> DHSEIN or DTREVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. For a complex conjugate pair of eigenvalues two +*> consecutive elements of S are set to the same value. Thus +*> S(j), SEP(j), and the j-th columns of VL and VR all +*> correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. For a complex eigenvector two +*> consecutive elements of SEP are set to the same value. If +*> the eigenvalues cannot be reordered to compute SEP(j), SEP(j) +*> is set to 0; this can only occur when the true value would be +*> very small anyway. +*> If JOB = 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S (if JOB = 'E' or 'B') +*> and/or SEP (if JOB = 'V' or 'B'). MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and/or SEP actually +*> used to store the estimated condition numbers. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LDWORK,N+6) +*> If JOB = 'E', WORK is not referenced. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*(N-1)) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of an eigenvalue lambda is +*> defined as +*> +*> S(lambda) = |v**T*u| / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of T corresponding +*> to lambda; v**T denotes the transpose of v, and norm(u) +*> denotes the Euclidean norm. These reciprocal condition numbers always +*> lie between zero (very badly conditioned) and one (very well +*> conditioned). If n = 1, S(lambda) is defined to be 1. +*> +*> An approximate error bound for a computed eigenvalue W(i) is given by +*> +*> EPS * norm(T) / S(i) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> corresponding to lambda is defined as follows. Suppose +*> +*> T = ( lambda c ) +*> ( 0 T22 ) +*> +*> Then the reciprocal condition number is +*> +*> SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +*> +*> where sigma-min denotes the smallest singular value. We approximate +*> the smallest singular value by the reciprocal of an estimate of the +*> one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +*> defined to be abs(T(1,1)). +*> +*> An approximate error bound for a computed right eigenvector VR(i) +*> is given by +*> +*> EPS * norm(T) / SEP(i) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL PAIR, SOMCON, WANTBH, WANTS, WANTSP + INTEGER I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN + DOUBLE PRECISION BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM, + $ MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + DOUBLE PRECISION DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLAPY2, DNRM2 + EXTERNAL LSAME, DDOT, DLAMCH, DLAPY2, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLACN2, DLACPY, DLAQTR, DTREXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* + KS = 0 + PAIR = .FALSE. + DO 60 K = 1, N +* +* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 60 + ELSE + IF( K.LT.N ) + $ PAIR = T( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 60 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 60 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( .NOT.PAIR ) THEN +* +* Real eigenvalue. +* + PROD = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = DNRM2( N, VR( 1, KS ), 1 ) + LNRM = DNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) + ELSE +* +* Complex eigenvalue. +* + PROD1 = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + PROD1 = PROD1 + DDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ), + $ 1 ) + PROD2 = DDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 ) + PROD2 = PROD2 - DDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ), + $ 1 ) + RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ), + $ DNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ), + $ DNRM2( N, VL( 1, KS+1 ), 1 ) ) + COND = DLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM ) + S( KS ) = COND + S( KS+1 ) = COND + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the diagonal +* block beginning at T(k,k) to the (1,1) position. +* + CALL DLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + IFST = K + ILST = 1 + CALL DTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST, + $ WORK( 1, N+1 ), IERR ) +* + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Could not swap because blocks not well separated +* + SCALE = ONE + EST = BIGNUM + ELSE +* +* Reordering successful +* + IF( WORK( 2, 1 ).EQ.ZERO ) THEN +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE + N2 = 1 + NN = N - 1 + ELSE +* +* Triangularize the 2 by 2 block by unitary +* transformation U = [ cs i*ss ] +* [ i*ss cs ]. +* such that the (1,1) position of WORK is complex +* eigenvalue lambda with positive imaginary part. (2,2) +* position of WORK is the complex eigenvalue lambda +* with negative imaginary part. +* + MU = SQRT( ABS( WORK( 1, 2 ) ) )* + $ SQRT( ABS( WORK( 2, 1 ) ) ) + DELTA = DLAPY2( MU, WORK( 2, 1 ) ) + CS = MU / DELTA + SN = -WORK( 2, 1 ) / DELTA +* +* Form +* +* C**T = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] +* [ mu ] +* [ .. ] +* [ .. ] +* [ mu ] +* where C**T is transpose of matrix C, +* and RWORK is stored starting in the N+1-st column of +* WORK. +* + DO 30 J = 3, N + WORK( 2, J ) = CS*WORK( 2, J ) + WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 ) + 30 CONTINUE + WORK( 2, 2 ) = ZERO +* + WORK( 1, N+1 ) = TWO*MU + DO 40 I = 2, N - 1 + WORK( I, N+1 ) = SN*WORK( 1, I+1 ) + 40 CONTINUE + N2 = 2 + NN = 2*( N-1 ) + END IF +* +* Estimate norm(inv(C**T)) +* + EST = ZERO + KASE = 0 + 50 CONTINUE + CALL DLACN2( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK, + $ EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C**T*x = scale*c. +* + CALL DLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C**T*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL DLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ), + $ LDWORK, WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + END IF + ELSE + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C*x = scale*c. +* + CALL DLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL DLAQTR( .FALSE., .FALSE., N-1, + $ WORK( 2, 2 ), LDWORK, + $ WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) +* + END IF + END IF +* + GO TO 50 + END IF + END IF +* + SEP( KS ) = SCALE / MAX( EST, SMLNUM ) + IF( PAIR ) + $ SEP( KS+1 ) = SEP( KS ) + END IF +* + IF( PAIR ) + $ KS = KS + 1 +* + 60 CONTINUE + RETURN +* +* End of DTRSNA +* + END diff --git a/dspl/liblapack/SRC/dtrsyl.f b/dspl/liblapack/SRC/dtrsyl.f new file mode 100644 index 0000000..105032c --- /dev/null +++ b/dspl/liblapack/SRC/dtrsyl.f @@ -0,0 +1,1002 @@ +*> \brief \b DTRSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, +* LDC, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRSYL solves the real Sylvester matrix equation: +*> +*> op(A)*X + X*op(B) = scale*C or +*> op(A)*X - X*op(B) = scale*C, +*> +*> where op(A) = A or A**T, and A and B are both upper quasi- +*> triangular. A is M-by-M and B is N-by-N; the right hand side C and +*> the solution X are M-by-N; and scale is an output scale factor, set +*> <= 1 to avoid overflow in X. +*> +*> A and B must be in Schur canonical form (as returned by DHSEQR), that +*> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +*> each 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANA +*> \verbatim +*> TRANA is CHARACTER*1 +*> Specifies the option op(A): +*> = 'N': op(A) = A (No transpose) +*> = 'T': op(A) = A**T (Transpose) +*> = 'C': op(A) = A**H (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] TRANB +*> \verbatim +*> TRANB is CHARACTER*1 +*> Specifies the option op(B): +*> = 'N': op(B) = B (No transpose) +*> = 'T': op(B) = B**T (Transpose) +*> = 'C': op(B) = B**H (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> Specifies the sign in the equation: +*> = +1: solve op(A)*X + X*op(B) = scale*C +*> = -1: solve op(A)*X - X*op(B) = scale*C +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrix A, and the number of rows in the +*> matrices X and C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B, and the number of columns in the +*> matrices X and C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,M) +*> The upper quasi-triangular matrix A, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,N) +*> The upper quasi-triangular matrix B, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (LDC,N) +*> On entry, the M-by-N right hand side matrix C. +*> On exit, C is overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M) +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scale factor, scale, set <= 1 to avoid overflow in X. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: A and B have common or very close eigenvalues; perturbed +*> values were used to solve the equation (but the matrices +*> A and B are unchanged). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleSYcomputational +* +* ===================================================================== + SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT + DOUBLE PRECISION A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM, SUML, SUMR, XNORM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ), VEC( 2, 2 ), X( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DDOT, DLAMCH, DLANGE + EXTERNAL LSAME, DDOT, DLAMCH, DLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLALN2, DLASY2, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + SCALE = ONE + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*DBLE( M*N ) / EPS + BIGNUM = ONE / SMLNUM +* + SMIN = MAX( SMLNUM, EPS*DLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*DLANGE( 'M', N, N, B, LDB, DUM ) ) +* + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* +* Start column loop (index = L) +* L1 (L2) : column index of the first (first) row of X(K,L). +* + LNEXT = 1 + DO 60 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 60 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L). +* + KNEXT = M + DO 50 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 50 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 20 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 20 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 30 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 30 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .FALSE., .FALSE., ISGN, 2, 2, + $ A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC, + $ 2, SCALOC, X, 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 50 CONTINUE +* + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A**T *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 T L-1 +* R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = 1 + DO 120 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 120 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 110 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 110 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 80 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 80 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 90 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 110 CONTINUE + 120 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A**T*X + ISGN*X*B**T = scale*C. +* +* The (K,L)th block of X is determined starting from +* top-right corner column by column by +* +* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) +* +* Where +* K-1 N +* R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. +* I=1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 180 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 180 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 170 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 170 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 130 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 140 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 150 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 160 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 160 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 170 CONTINUE + 180 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B**T = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-right corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. +* I=K+1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 240 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 240 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = M + DO 230 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 230 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 190 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 200 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 200 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 210 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 210 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL DLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 220 J = 1, N + CALL DSCAL( M, SCALOC, C( 1, J ), 1 ) + 220 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 230 CONTINUE + 240 CONTINUE +* + END IF +* + RETURN +* +* End of DTRSYL +* + END diff --git a/dspl/liblapack/SRC/dtrti2.f b/dspl/liblapack/SRC/dtrti2.f new file mode 100644 index 0000000..0a9d5b6 --- /dev/null +++ b/dspl/liblapack/SRC/dtrti2.f @@ -0,0 +1,212 @@ +*> \brief \b DTRTI2 computes the inverse of a triangular matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRTI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRTI2 computes the inverse of a real upper or lower triangular +*> matrix. +*> +*> This is the Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading n by n upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL DTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL DSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL DTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL DSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of DTRTI2 +* + END diff --git a/dspl/liblapack/SRC/dtrtri.f b/dspl/liblapack/SRC/dtrtri.f new file mode 100644 index 0000000..d34b40b --- /dev/null +++ b/dspl/liblapack/SRC/dtrtri.f @@ -0,0 +1,242 @@ +*> \brief \b DTRTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRTRI computes the inverse of a real upper or lower triangular +*> matrix A. +*> +*> This is the Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DTRMM, DTRSM, DTRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of DTRTRI +* + END diff --git a/dspl/liblapack/SRC/dtrtrs.f b/dspl/liblapack/SRC/dtrtrs.f new file mode 100644 index 0000000..3e5ff6f --- /dev/null +++ b/dspl/liblapack/SRC/dtrtrs.f @@ -0,0 +1,226 @@ +*> \brief \b DTRTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular matrix of order N, and B is an N-by-NRHS +*> matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the solutions +*> X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b or A**T * x = b. +* + CALL DTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of DTRTRS +* + END diff --git a/dspl/liblapack/SRC/dtrttf.f b/dspl/liblapack/SRC/dtrttf.f new file mode 100644 index 0000000..8e91c3d --- /dev/null +++ b/dspl/liblapack/SRC/dtrttf.f @@ -0,0 +1,492 @@ +*> \brief \b DTRTTF copies a triangular matrix from the standard full format (TR) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRTTF copies a triangular matrix A from standard full format (TR) +*> to rectangular full packed format (TF) . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal form is wanted; +*> = 'T': ARF in Transpose form is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N). +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is DOUBLE PRECISION array, dimension (NT). +*> NT=N*(N+1)/2. On exit, the triangular matrix A in RFP format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE DTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER I, IJ, J, K, L, N1, N2, NT, NX2, NP1X2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + ARF( 0 ) = A( 0, 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + ARF( IJ ) = A( N2+J, I ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + ARF( IJ ) = A( J-N1, L ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + ARF( IJ ) = A( I, N1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + ARF( IJ ) = A( N2+J, L ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + ARF( IJ ) = A( K+J, I ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + ARF( IJ ) = A( J-K, L ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + J = K + DO I = K, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + ARF( IJ ) = A( I, K+1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + ARF( IJ ) = A( K+1+J, L ) + IJ = IJ + 1 + END DO + END DO +* Note that here, on exit of the loop, J = K-1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of DTRTTF +* + END diff --git a/dspl/liblapack/SRC/dtrttp.f b/dspl/liblapack/SRC/dtrttp.f new file mode 100644 index 0000000..5ee3dd5 --- /dev/null +++ b/dspl/liblapack/SRC/dtrttp.f @@ -0,0 +1,176 @@ +*> \brief \b DTRTTP copies a triangular matrix from the standard full format (TR) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTRTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTRTTP copies a triangular matrix A from full format (TR) to standard +*> packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices AP and A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is DOUBLE PRECISION array, dimension (N*(N+1)/2) +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTRTTP', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + END IF +* +* + RETURN +* +* End of DTRTTP +* + END diff --git a/dspl/liblapack/SRC/dtzrzf.f b/dspl/liblapack/SRC/dtzrzf.f new file mode 100644 index 0000000..0d4f922 --- /dev/null +++ b/dspl/liblapack/SRC/dtzrzf.f @@ -0,0 +1,313 @@ +*> \brief \b DTZRZF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTZRZF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +*> to upper triangular form by means of orthogonal transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The N-by-N matrix Z can be computed by +*> +*> Z = Z(1)*Z(2)* ... *Z(M) +*> +*> where each N-by-N Z(k) is given by +*> +*> Z(k) = I - tau(k)*v(k)*v(k)**T +*> +*> with v(k) is the kth row vector of the M-by-N matrix +*> +*> V = ( I A(:,M+1:N) ) +*> +*> I is the M-by-M identity matrix, A(:,M+1:N) +*> is the output stored in A on exit from DTZRZF, +*> and tau(k) is the kth element of the array TAU. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE DTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKMIN, LWKOPT, + $ M1, MU, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLARZB, DLARZT, DLATRZ +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + LWKMIN = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + LWKMIN = MAX( 1, M ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL DLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL DLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL DLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DTZRZF +* + END diff --git a/dspl/liblapack/SRC/dzsum1.f b/dspl/liblapack/SRC/dzsum1.f new file mode 100644 index 0000000..70a4042 --- /dev/null +++ b/dspl/liblapack/SRC/dzsum1.f @@ -0,0 +1,140 @@ +*> \brief \b DZSUM1 forms the 1-norm of the complex vector using the true absolute value. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DZSUM1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DZSUM1( N, CX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX*16 CX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DZSUM1 takes the sum of the absolute values of a complex +*> vector and returns a double precision result. +*> +*> Based on DZASUM from the Level 1 BLAS. +*> The change is to use the 'genuine' absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vector CX. +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX*16 array, dimension (N) +*> The vector whose elements will be summed. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive values of CX. INCX > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham for use with ZLACON. +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DZSUM1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, NINCX + DOUBLE PRECISION STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + DZSUM1 = 0.0D0 + STEMP = 0.0D0 + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 20 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + NINCX = N*INCX + DO 10 I = 1, NINCX, INCX +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 10 CONTINUE + DZSUM1 = STEMP + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 20 CONTINUE + DO 30 I = 1, N +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 30 CONTINUE + DZSUM1 = STEMP + RETURN +* +* End of DZSUM1 +* + END diff --git a/dspl/liblapack/SRC/icmax1.f b/dspl/liblapack/SRC/icmax1.f new file mode 100644 index 0000000..4141473 --- /dev/null +++ b/dspl/liblapack/SRC/icmax1.f @@ -0,0 +1,141 @@ +*> \brief \b ICMAX1 finds the index of the first vector element of maximum absolute value. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ICMAX1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ICMAX1( N, CX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX CX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ICMAX1 finds the index of the first vector element of maximum absolute value. +*> +*> Based on ICAMAX from Level 1 BLAS. +*> The change is to use the 'genuine' absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vector CX. +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX array, dimension (N) +*> The vector CX. The ICMAX1 function returns the index of its first +*> element of maximum absolute value. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive values of CX. INCX >= 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date February 2014 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham for use with CLACON. +* +* ===================================================================== + INTEGER FUNCTION ICMAX1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* February 2014 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX CX(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL SMAX + INTEGER I, IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + ICMAX1 = 0 + IF (N.LT.1 .OR. INCX.LE.0) RETURN + ICMAX1 = 1 + IF (N.EQ.1) RETURN + IF (INCX.EQ.1) THEN +* +* code for increment equal to 1 +* + SMAX = ABS(CX(1)) + DO I = 2,N + IF (ABS(CX(I)).GT.SMAX) THEN + ICMAX1 = I + SMAX = ABS(CX(I)) + END IF + END DO + ELSE +* +* code for increment not equal to 1 +* + IX = 1 + SMAX = ABS(CX(1)) + IX = IX + INCX + DO I = 2,N + IF (ABS(CX(IX)).GT.SMAX) THEN + ICMAX1 = I + SMAX = ABS(CX(IX)) + END IF + IX = IX + INCX + END DO + END IF + RETURN +* +* End of ICMAX1 +* + END diff --git a/dspl/liblapack/SRC/ieeeck.f b/dspl/liblapack/SRC/ieeeck.f new file mode 100644 index 0000000..2655958 --- /dev/null +++ b/dspl/liblapack/SRC/ieeeck.f @@ -0,0 +1,203 @@ +*> \brief \b IEEECK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IEEECK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) +* +* .. Scalar Arguments .. +* INTEGER ISPEC +* REAL ONE, ZERO +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> IEEECK is called from the ILAENV to verify that Infinity and +*> possibly NaN arithmetic is safe (i.e. will not trap). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> Specifies whether to test just for inifinity arithmetic +*> or whether to test for infinity and NaN arithmetic. +*> = 0: Verify infinity arithmetic only. +*> = 1: Verify infinity and NaN arithmetic. +*> \endverbatim +*> +*> \param[in] ZERO +*> \verbatim +*> ZERO is REAL +*> Must contain the value 0.0 +*> This is passed to prevent the compiler from optimizing +*> away this code. +*> \endverbatim +*> +*> \param[in] ONE +*> \verbatim +*> ONE is REAL +*> Must contain the value 1.0 +*> This is passed to prevent the compiler from optimizing +*> away this code. +*> +*> RETURN VALUE: INTEGER +*> = 0: Arithmetic failed to produce the correct answers +*> = 1: Arithmetic produced the correct answers +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION IEEECK( ISPEC, ZERO, ONE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ISPEC + REAL ONE, ZERO +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL NAN1, NAN2, NAN3, NAN4, NAN5, NAN6, NEGINF, + $ NEGZRO, NEWZRO, POSINF +* .. +* .. Executable Statements .. + IEEECK = 1 +* + POSINF = ONE / ZERO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = -ONE / ZERO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGZRO = ONE / ( NEGINF+ONE ) + IF( NEGZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = ONE / NEGZRO + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + NEWZRO = NEGZRO + ZERO + IF( NEWZRO.NE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = ONE / NEWZRO + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* + NEGINF = NEGINF*POSINF + IF( NEGINF.GE.ZERO ) THEN + IEEECK = 0 + RETURN + END IF +* + POSINF = POSINF*POSINF + IF( POSINF.LE.ONE ) THEN + IEEECK = 0 + RETURN + END IF +* +* +* +* +* Return if we were only asked to check infinity arithmetic +* + IF( ISPEC.EQ.0 ) + $ RETURN +* + NAN1 = POSINF + NEGINF +* + NAN2 = POSINF / NEGINF +* + NAN3 = POSINF / POSINF +* + NAN4 = POSINF*ZERO +* + NAN5 = NEGINF*NEGZRO +* + NAN6 = NAN5*ZERO +* + IF( NAN1.EQ.NAN1 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN2.EQ.NAN2 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN3.EQ.NAN3 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN4.EQ.NAN4 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN5.EQ.NAN5 ) THEN + IEEECK = 0 + RETURN + END IF +* + IF( NAN6.EQ.NAN6 ) THEN + IEEECK = 0 + RETURN + END IF +* + RETURN + END diff --git a/dspl/liblapack/SRC/ilaclc.f b/dspl/liblapack/SRC/ilaclc.f new file mode 100644 index 0000000..35d86d2 --- /dev/null +++ b/dspl/liblapack/SRC/ilaclc.f @@ -0,0 +1,118 @@ +*> \brief \b ILACLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILACLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILACLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILACLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILACLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = (0.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILACLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILACLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILACLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILACLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/ilaclr.f b/dspl/liblapack/SRC/ilaclr.f new file mode 100644 index 0000000..3aad730 --- /dev/null +++ b/dspl/liblapack/SRC/ilaclr.f @@ -0,0 +1,121 @@ +*> \brief \b ILACLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILACLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILACLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILACLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILACLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX ZERO + PARAMETER ( ZERO = (0.0E+0, 0.0E+0) ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILACLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILACLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILACLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILACLR = MAX( ILACLR, I ) + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/iladiag.f b/dspl/liblapack/SRC/iladiag.f new file mode 100644 index 0000000..58614d2 --- /dev/null +++ b/dspl/liblapack/SRC/iladiag.f @@ -0,0 +1,92 @@ +*> \brief \b ILADIAG +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADIAG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADIAG( DIAG ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine translated from a character string specifying if a +*> matrix has unit diagonal or not to the relevant BLAST-specified +*> integer constant. +*> +*> ILADIAG returns an INTEGER. If ILADIAG < 0, then the input is not a +*> character indicating a unit or non-unit diagonal. Otherwise ILADIAG +*> returns the constant value corresponding to DIAG. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + INTEGER FUNCTION ILADIAG( DIAG ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLAS_NON_UNIT_DIAG, BLAS_UNIT_DIAG + PARAMETER ( BLAS_NON_UNIT_DIAG = 131, BLAS_UNIT_DIAG = 132 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. + IF( LSAME( DIAG, 'N' ) ) THEN + ILADIAG = BLAS_NON_UNIT_DIAG + ELSE IF( LSAME( DIAG, 'U' ) ) THEN + ILADIAG = BLAS_UNIT_DIAG + ELSE + ILADIAG = -1 + END IF + RETURN +* +* End of ILADIAG +* + END diff --git a/dspl/liblapack/SRC/iladlc.f b/dspl/liblapack/SRC/iladlc.f new file mode 100644 index 0000000..c647611 --- /dev/null +++ b/dspl/liblapack/SRC/iladlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILADLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILADLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILADLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILADLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/iladlr.f b/dspl/liblapack/SRC/iladlr.f new file mode 100644 index 0000000..e8951d8 --- /dev/null +++ b/dspl/liblapack/SRC/iladlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILADLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILADLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILADLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILADLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILADLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILADLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILADLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILADLR = MAX( ILADLR, I ) + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/ilaenv.f b/dspl/liblapack/SRC/ilaenv.f new file mode 100644 index 0000000..a438ada --- /dev/null +++ b/dspl/liblapack/SRC/ilaenv.f @@ -0,0 +1,709 @@ +*> \brief \b ILAENV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAENV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* .. Scalar Arguments .. +* CHARACTER*( * ) NAME, OPTS +* INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAENV is called from the LAPACK routines to choose problem-dependent +*> parameters for the local environment. See ISPEC for a description of +*> the parameters. +*> +*> ILAENV returns an INTEGER +*> if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC +*> if ILAENV < 0: if ILAENV = -k, the k-th argument had an illegal value. +*> +*> This version provides a set of parameters which should give good, +*> but not optimal, performance on many of the currently available +*> computers. Users are encouraged to modify this subroutine to set +*> the tuning parameters for their particular machine using the option +*> and problem size information in the arguments. +*> +*> This routine will not function correctly if it is converted to all +*> lower case. Converting it to all upper case is allowed. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> Specifies the parameter to be returned as the value of +*> ILAENV. +*> = 1: the optimal blocksize; if this value is 1, an unblocked +*> algorithm will give the best performance. +*> = 2: the minimum block size for which the block routine +*> should be used; if the usable block size is less than +*> this value, an unblocked routine should be used. +*> = 3: the crossover point (in a block routine, for N less +*> than this value, an unblocked routine should be used) +*> = 4: the number of shifts, used in the nonsymmetric +*> eigenvalue routines (DEPRECATED) +*> = 5: the minimum column dimension for blocking to be used; +*> rectangular blocks must have dimension at least k by m, +*> where k is given by ILAENV(2,...) and m by ILAENV(5,...) +*> = 6: the crossover point for the SVD (when reducing an m by n +*> matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds +*> this value, a QR factorization is used first to reduce +*> the matrix to a triangular form.) +*> = 7: the number of processors +*> = 8: the crossover point for the multishift QR method +*> for nonsymmetric eigenvalue problems (DEPRECATED) +*> = 9: maximum size of the subproblems at the bottom of the +*> computation tree in the divide-and-conquer algorithm +*> (used by xGELSD and xGESDD) +*> =10: ieee NaN arithmetic can be trusted not to trap +*> =11: infinity arithmetic can be trusted not to trap +*> 12 <= ISPEC <= 16: +*> xHSEQR or related subroutines, +*> see IPARMQ for detailed explanation +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is CHARACTER*(*) +*> The name of the calling subroutine, in either upper case or +*> lower case. +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER*(*) +*> The character options to the subroutine NAME, concatenated +*> into a single character string. For example, UPLO = 'U', +*> TRANS = 'T', and DIAG = 'N' for a triangular routine would +*> be specified as OPTS = 'UTN'. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> \endverbatim +*> +*> \param[in] N3 +*> \verbatim +*> N3 is INTEGER +*> \endverbatim +*> +*> \param[in] N4 +*> \verbatim +*> N4 is INTEGER +*> Problem dimensions for the subroutine NAME; these may not all +*> be required. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The following conventions have been used when calling ILAENV from the +*> LAPACK routines: +*> 1) OPTS is a concatenation of all of the character options to +*> subroutine NAME, in the same order that they appear in the +*> argument list for NAME, even if they are not used in determining +*> the value of the parameter specified by ISPEC. +*> 2) The problem dimensions N1, N2, N3, N4 are specified in the order +*> that they appear in the argument list for NAME. N1 is used +*> first, N2 second, and so on, and unused problem dimensions are +*> passed a value of -1. +*> 3) The parameter value returned by ILAENV is checked for validity in +*> the calling subroutine. For example, ILAENV is used to retrieve +*> the optimal blocksize for STRTRI as follows: +*> +*> NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) +*> IF( NB.LE.1 ) NB = MAX( 1, N ) +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IZ, NB, NBMIN, NX + LOGICAL CNAME, SNAME, TWOSTAGE + CHARACTER C1*1, C2*2, C4*2, C3*3, SUBNAM*16 +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, INT, MIN, REAL +* .. +* .. External Functions .. + INTEGER IEEECK, IPARMQ, IPARAM2STAGE + EXTERNAL IEEECK, IPARMQ, IPARAM2STAGE +* .. +* .. Executable Statements .. +* + GO TO ( 10, 10, 10, 80, 90, 100, 110, 120, + $ 130, 140, 150, 160, 160, 160, 160, 160)ISPEC +* +* Invalid value for ISPEC +* + ILAENV = -1 + RETURN +* + 10 CONTINUE +* +* Convert NAME to upper case if the first character is lower case. +* + ILAENV = 1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 20 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 20 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO 30 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + 30 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 40 I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 40 CONTINUE + END IF + END IF +* + C1 = SUBNAM( 1: 1 ) + SNAME = C1.EQ.'S' .OR. C1.EQ.'D' + CNAME = C1.EQ.'C' .OR. C1.EQ.'Z' + IF( .NOT.( CNAME .OR. SNAME ) ) + $ RETURN + C2 = SUBNAM( 2: 3 ) + C3 = SUBNAM( 4: 6 ) + C4 = C3( 2: 3 ) + TWOSTAGE = LEN( SUBNAM ).GE.11 + $ .AND. SUBNAM( 11: 11 ).EQ.'2' +* + GO TO ( 50, 60, 70 )ISPEC +* + 50 CONTINUE +* +* ISPEC = 1: block size +* +* In these examples, separate code is provided for setting NB for +* real and complex. We assume that NB will take the same value in +* single or double precision. +* + NB = 1 +* + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. + $ C3.EQ.'QLF' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'QR ') THEN + IF( N3 .EQ. 1) THEN + IF( SNAME ) THEN +* M*N + IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN + NB = N1 + ELSE + NB = 32768/N2 + END IF + ELSE + IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN + NB = N1 + ELSE + NB = 32768/N2 + END IF + END IF + ELSE + IF( SNAME ) THEN + NB = 1 + ELSE + NB = 1 + END IF + END IF + ELSE IF( C3.EQ.'LQ ') THEN + IF( N3 .EQ. 2) THEN + IF( SNAME ) THEN +* M*N + IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN + NB = N1 + ELSE + NB = 32768/N2 + END IF + ELSE + IF ((N1*N2.LE.131072).OR.(N1.LE.8192)) THEN + NB = N1 + ELSE + NB = 32768/N2 + END IF + END IF + ELSE + IF( SNAME ) THEN + NB = 1 + ELSE + NB = 1 + END IF + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'PO' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF + ELSE + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( SNAME .AND. C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( TWOSTAGE ) THEN + NB = 192 + ELSE + NB = 64 + END IF + ELSE IF( C3.EQ.'TRD' ) THEN + NB = 32 + ELSE IF( C3.EQ.'GST' ) THEN + NB = 64 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NB = 32 + END IF + END IF + ELSE IF( C2.EQ.'GB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N4.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'PB' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + ELSE + IF( N2.LE.64 ) THEN + NB = 1 + ELSE + NB = 32 + END IF + END IF + END IF + ELSE IF( C2.EQ.'TR' ) THEN + IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + ELSE IF ( C3.EQ.'EVC' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( C2.EQ.'LA' ) THEN + IF( C3.EQ.'UUM' ) THEN + IF( SNAME ) THEN + NB = 64 + ELSE + NB = 64 + END IF + END IF + ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN + IF( C3.EQ.'EBZ' ) THEN + NB = 1 + END IF + ELSE IF( C2.EQ.'GG' ) THEN + NB = 32 + IF( C3.EQ.'HD3' ) THEN + IF( SNAME ) THEN + NB = 32 + ELSE + NB = 32 + END IF + END IF + END IF + ILAENV = NB + RETURN +* + 60 CONTINUE +* +* ISPEC = 2: minimum block size +* + NBMIN = 2 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. C3.EQ. + $ 'QLF' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + ELSE IF( C3.EQ.'TRI' ) THEN + IF( SNAME ) THEN + NBMIN = 2 + ELSE + NBMIN = 2 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( C3.EQ.'TRF' ) THEN + IF( SNAME ) THEN + NBMIN = 8 + ELSE + NBMIN = 8 + END IF + ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NBMIN = 2 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + ELSE IF( C3( 1: 1 ).EQ.'M' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NBMIN = 2 + END IF + END IF + ELSE IF( C2.EQ.'GG' ) THEN + NBMIN = 2 + IF( C3.EQ.'HD3' ) THEN + NBMIN = 2 + END IF + END IF + ILAENV = NBMIN + RETURN +* + 70 CONTINUE +* +* ISPEC = 3: crossover point +* + NX = 0 + IF( C2.EQ.'GE' ) THEN + IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR. C3.EQ. + $ 'QLF' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'HRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + ELSE IF( C3.EQ.'BRD' ) THEN + IF( SNAME ) THEN + NX = 128 + ELSE + NX = 128 + END IF + END IF + ELSE IF( C2.EQ.'SY' ) THEN + IF( SNAME .AND. C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN + IF( C3.EQ.'TRD' ) THEN + NX = 32 + END IF + ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NX = 128 + END IF + END IF + ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN + IF( C3( 1: 1 ).EQ.'G' ) THEN + IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR. C4.EQ. + $ 'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR. C4.EQ.'BR' ) + $ THEN + NX = 128 + END IF + END IF + ELSE IF( C2.EQ.'GG' ) THEN + NX = 128 + IF( C3.EQ.'HD3' ) THEN + NX = 128 + END IF + END IF + ILAENV = NX + RETURN +* + 80 CONTINUE +* +* ISPEC = 4: number of shifts (used by xHSEQR) +* + ILAENV = 6 + RETURN +* + 90 CONTINUE +* +* ISPEC = 5: minimum column dimension (not used) +* + ILAENV = 2 + RETURN +* + 100 CONTINUE +* +* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) +* + ILAENV = INT( REAL( MIN( N1, N2 ) )*1.6E0 ) + RETURN +* + 110 CONTINUE +* +* ISPEC = 7: number of processors (not used) +* + ILAENV = 1 + RETURN +* + 120 CONTINUE +* +* ISPEC = 8: crossover point for multishift (used by xHSEQR) +* + ILAENV = 50 + RETURN +* + 130 CONTINUE +* +* ISPEC = 9: maximum size of the subproblems at the bottom of the +* computation tree in the divide-and-conquer algorithm +* (used by xGELSD and xGESDD) +* + ILAENV = 25 + RETURN +* + 140 CONTINUE +* +* ISPEC = 10: ieee NaN arithmetic can be trusted not to trap +* +* ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 1, 0.0, 1.0 ) + END IF + RETURN +* + 150 CONTINUE +* +* ISPEC = 11: infinity arithmetic can be trusted not to trap +* +* ILAENV = 0 + ILAENV = 1 + IF( ILAENV.EQ.1 ) THEN + ILAENV = IEEECK( 0, 0.0, 1.0 ) + END IF + RETURN +* + 160 CONTINUE +* +* 12 <= ISPEC <= 16: xHSEQR or related subroutines. +* + ILAENV = IPARMQ( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) + RETURN +* +* End of ILAENV +* + END diff --git a/dspl/liblapack/SRC/ilaenv2stage.f b/dspl/liblapack/SRC/ilaenv2stage.f new file mode 100644 index 0000000..3c0d34a --- /dev/null +++ b/dspl/liblapack/SRC/ilaenv2stage.f @@ -0,0 +1,191 @@ +*> \brief \b ILAENV2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAENV2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* .. Scalar Arguments .. +* CHARACTER*( * ) NAME, OPTS +* INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAENV2STAGE is called from the LAPACK routines to choose problem-dependent +*> parameters for the local environment. See ISPEC for a description of +*> the parameters. +*> It sets problem and machine dependent parameters useful for *_2STAGE and +*> related subroutines. +*> +*> ILAENV2STAGE returns an INTEGER +*> if ILAENV2STAGE >= 0: ILAENV2STAGE returns the value of the parameter +* specified by ISPEC +*> if ILAENV2STAGE < 0: if ILAENV2STAGE = -k, the k-th argument had an +* illegal value. +*> +*> This version provides a set of parameters which should give good, +*> but not optimal, performance on many of the currently available +*> computers for the 2-stage solvers. Users are encouraged to modify this +*> subroutine to set the tuning parameters for their particular machine using +*> the option and problem size information in the arguments. +*> +*> This routine will not function correctly if it is converted to all +*> lower case. Converting it to all upper case is allowed. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> Specifies the parameter to be returned as the value of +*> ILAENV2STAGE. +*> = 1: the optimal blocksize nb for the reduction to BAND +*> +*> = 2: the optimal blocksize ib for the eigenvectors +*> singular vectors update routine +*> +*> = 3: The length of the array that store the Housholder +*> representation for the second stage +*> Band to Tridiagonal or Bidiagonal +*> +*> = 4: The workspace needed for the routine in input. +*> +*> = 5: For future release. +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is CHARACTER*(*) +*> The name of the calling subroutine, in either upper case or +*> lower case. +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER*(*) +*> The character options to the subroutine NAME, concatenated +*> into a single character string. For example, UPLO = 'U', +*> TRANS = 'T', and DIAG = 'N' for a triangular routine would +*> be specified as OPTS = 'UTN'. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> \endverbatim +*> +*> \param[in] N3 +*> \verbatim +*> N3 is INTEGER +*> \endverbatim +*> +*> \param[in] N4 +*> \verbatim +*> N4 is INTEGER +*> Problem dimensions for the subroutine NAME; these may not all +*> be required. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +*> \author Nick R. Papior +* +*> \date July 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The following conventions have been used when calling ILAENV2STAGE +*> from the LAPACK routines: +*> 1) OPTS is a concatenation of all of the character options to +*> subroutine NAME, in the same order that they appear in the +*> argument list for NAME, even if they are not used in determining +*> the value of the parameter specified by ISPEC. +*> 2) The problem dimensions N1, N2, N3, N4 are specified in the order +*> that they appear in the argument list for NAME. N1 is used +*> first, N2 second, and so on, and unused problem dimensions are +*> passed a value of -1. +*> 3) The parameter value returned by ILAENV2STAGE is checked for validity in +*> the calling subroutine. +*> +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION ILAENV2STAGE( ISPEC, NAME, OPTS, N1, N2, N3, N4 ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2017 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, N1, N2, N3, N4 +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + INTEGER IISPEC +* .. +* .. External Functions .. + INTEGER IPARAM2STAGE + EXTERNAL IPARAM2STAGE +* .. +* .. Executable Statements .. +* + GO TO ( 10, 10, 10, 10, 10 )ISPEC +* +* Invalid value for ISPEC +* + ILAENV2STAGE = -1 + RETURN +* + 10 CONTINUE +* +* 2stage eigenvalues and SVD or related subroutines. +* + IISPEC = 16 + ISPEC + ILAENV2STAGE = IPARAM2STAGE( IISPEC, NAME, OPTS, + $ N1, N2, N3, N4 ) + RETURN +* +* End of ILAENV2STAGE +* + END diff --git a/dspl/liblapack/SRC/ilaprec.f b/dspl/liblapack/SRC/ilaprec.f new file mode 100644 index 0000000..f1f32ac --- /dev/null +++ b/dspl/liblapack/SRC/ilaprec.f @@ -0,0 +1,98 @@ +*> \brief \b ILAPREC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAPREC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAPREC( PREC ) +* +* .. Scalar Arguments .. +* CHARACTER PREC +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine translated from a character string specifying an +*> intermediate precision to the relevant BLAST-specified integer +*> constant. +*> +*> ILAPREC returns an INTEGER. If ILAPREC < 0, then the input is not a +*> character indicating a supported intermediate precision. Otherwise +*> ILAPREC returns the constant value corresponding to PREC. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + INTEGER FUNCTION ILAPREC( PREC ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER PREC +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLAS_PREC_SINGLE, BLAS_PREC_DOUBLE, BLAS_PREC_INDIGENOUS, + $ BLAS_PREC_EXTRA + PARAMETER ( BLAS_PREC_SINGLE = 211, BLAS_PREC_DOUBLE = 212, + $ BLAS_PREC_INDIGENOUS = 213, BLAS_PREC_EXTRA = 214 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. + IF( LSAME( PREC, 'S' ) ) THEN + ILAPREC = BLAS_PREC_SINGLE + ELSE IF( LSAME( PREC, 'D' ) ) THEN + ILAPREC = BLAS_PREC_DOUBLE + ELSE IF( LSAME( PREC, 'I' ) ) THEN + ILAPREC = BLAS_PREC_INDIGENOUS + ELSE IF( LSAME( PREC, 'X' ) .OR. LSAME( PREC, 'E' ) ) THEN + ILAPREC = BLAS_PREC_EXTRA + ELSE + ILAPREC = -1 + END IF + RETURN +* +* End of ILAPREC +* + END diff --git a/dspl/liblapack/SRC/ilaslc.f b/dspl/liblapack/SRC/ilaslc.f new file mode 100644 index 0000000..e59c575 --- /dev/null +++ b/dspl/liblapack/SRC/ilaslc.f @@ -0,0 +1,118 @@ +*> \brief \b ILASLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILASLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILASLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILASLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILASLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILASLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILASLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILASLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILASLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/ilaslr.f b/dspl/liblapack/SRC/ilaslr.f new file mode 100644 index 0000000..910bc80 --- /dev/null +++ b/dspl/liblapack/SRC/ilaslr.f @@ -0,0 +1,121 @@ +*> \brief \b ILASLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILASLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILASLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILASLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILASLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILASLR = M + ELSEIF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILASLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILASLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILASLR = MAX( ILASLR, I ) + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/ilatrans.f b/dspl/liblapack/SRC/ilatrans.f new file mode 100644 index 0000000..6b90bfc --- /dev/null +++ b/dspl/liblapack/SRC/ilatrans.f @@ -0,0 +1,95 @@ +*> \brief \b ILATRANS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILATRANS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILATRANS( TRANS ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine translates from a character string specifying a +*> transposition operation to the relevant BLAST-specified integer +*> constant. +*> +*> ILATRANS returns an INTEGER. If ILATRANS < 0, then the input is not +*> a character indicating a transposition operator. Otherwise ILATRANS +*> returns the constant value corresponding to TRANS. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + INTEGER FUNCTION ILATRANS( TRANS ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLAS_NO_TRANS, BLAS_TRANS, BLAS_CONJ_TRANS + PARAMETER ( BLAS_NO_TRANS = 111, BLAS_TRANS = 112, + $ BLAS_CONJ_TRANS = 113 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. + IF( LSAME( TRANS, 'N' ) ) THEN + ILATRANS = BLAS_NO_TRANS + ELSE IF( LSAME( TRANS, 'T' ) ) THEN + ILATRANS = BLAS_TRANS + ELSE IF( LSAME( TRANS, 'C' ) ) THEN + ILATRANS = BLAS_CONJ_TRANS + ELSE + ILATRANS = -1 + END IF + RETURN +* +* End of ILATRANS +* + END diff --git a/dspl/liblapack/SRC/ilauplo.f b/dspl/liblapack/SRC/ilauplo.f new file mode 100644 index 0000000..89bc9b2 --- /dev/null +++ b/dspl/liblapack/SRC/ilauplo.f @@ -0,0 +1,92 @@ +*> \brief \b ILAUPLO +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAUPLO + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAUPLO( UPLO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine translated from a character string specifying a +*> upper- or lower-triangular matrix to the relevant BLAST-specified +*> integer constant. +*> +*> ILAUPLO returns an INTEGER. If ILAUPLO < 0, then the input is not +*> a character indicating an upper- or lower-triangular matrix. +*> Otherwise ILAUPLO returns the constant value corresponding to UPLO. +*> \endverbatim +* +* Arguments: +* ========== +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + INTEGER FUNCTION ILAUPLO( UPLO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER BLAS_UPPER, BLAS_LOWER + PARAMETER ( BLAS_UPPER = 121, BLAS_LOWER = 122 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. + IF( LSAME( UPLO, 'U' ) ) THEN + ILAUPLO = BLAS_UPPER + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + ILAUPLO = BLAS_LOWER + ELSE + ILAUPLO = -1 + END IF + RETURN +* +* End of ILAUPLO +* + END diff --git a/dspl/liblapack/SRC/ilazlc.f b/dspl/liblapack/SRC/ilazlc.f new file mode 100644 index 0000000..07dfc93 --- /dev/null +++ b/dspl/liblapack/SRC/ilazlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILAZLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILAZLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILAZLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILAZLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/ilazlr.f b/dspl/liblapack/SRC/ilazlr.f new file mode 100644 index 0000000..4ca4ed1 --- /dev/null +++ b/dspl/liblapack/SRC/ilazlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILAZLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILAZLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILAZLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILAZLR = MAX( ILAZLR, I ) + END DO + END IF + RETURN + END diff --git a/dspl/liblapack/SRC/iparam2stage.F b/dspl/liblapack/SRC/iparam2stage.F new file mode 100644 index 0000000..836e20e --- /dev/null +++ b/dspl/liblapack/SRC/iparam2stage.F @@ -0,0 +1,388 @@ +*> \brief \b IPARAM2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IPARAM2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, +* NI, NBI, IBI, NXI ) +* #if defined(_OPENMP) +* use omp_lib +* #endif +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER*( * ) NAME, OPTS +* INTEGER ISPEC, NI, NBI, IBI, NXI +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This program sets problem and machine dependent parameters +*> useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST, +*> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD +*> and related subroutines for eigenvalue problems. +*> It is called whenever ILAENV is called with 17 <= ISPEC <= 21. +*> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5 +*> with a direct conversion ISPEC + 16. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is integer scalar +*> ISPEC specifies which tunable parameter IPARAM2STAGE should +*> return. +*> +*> ISPEC=17: the optimal blocksize nb for the reduction to +* BAND +*> +*> ISPEC=18: the optimal blocksize ib for the eigenvectors +*> singular vectors update routine +*> +*> ISPEC=19: The length of the array that store the Housholder +*> representation for the second stage +*> Band to Tridiagonal or Bidiagonal +*> +*> ISPEC=20: The workspace needed for the routine in input. +*> +*> ISPEC=21: For future release. +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is character string +*> Name of the calling subroutine +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is CHARACTER*(*) +*> The character options to the subroutine NAME, concatenated +*> into a single character string. For example, UPLO = 'U', +*> TRANS = 'T', and DIAG = 'N' for a triangular routine would +*> be specified as OPTS = 'UTN'. +*> \endverbatim +*> +*> \param[in] NI +*> \verbatim +*> NI is INTEGER which is the size of the matrix +*> \endverbatim +*> +*> \param[in] NBI +*> \verbatim +*> NBI is INTEGER which is the used in the reduciton, +* (e.g., the size of the band), needed to compute workspace +* and LHOUS2. +*> \endverbatim +*> +*> \param[in] IBI +*> \verbatim +*> IBI is INTEGER which represent the IB of the reduciton, +* needed to compute workspace and LHOUS2. +*> \endverbatim +*> +*> \param[in] NXI +*> \verbatim +*> NXI is INTEGER needed in the future release. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All detail are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, + $ NI, NBI, IBI, NXI ) +#if defined(_OPENMP) + use omp_lib +#endif + IMPLICIT NONE +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*( * ) NAME, OPTS + INTEGER ISPEC, NI, NBI, IBI, NXI +* +* ================================================================ +* .. +* .. Local Scalars .. + INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS, + $ FACTOPTNB, QROPTNB, LQOPTNB + LOGICAL RPREC, CPREC + CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1 +* .. +* .. Intrinsic Functions .. + INTRINSIC CHAR, ICHAR, MAX +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Invalid value for ISPEC +* + IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN + IPARAM2STAGE = -1 + RETURN + ENDIF +* +* Get the number of threads +* + NTHREADS = 1 +#if defined(_OPENMP) +!$OMP PARALLEL + NTHREADS = OMP_GET_NUM_THREADS() +!$OMP END PARALLEL +#endif +* WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC +* + IF( ISPEC .NE. 19 ) THEN +* +* Convert NAME to upper case if the first character is lower case. +* + IPARAM2STAGE = -1 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 100 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 100 CONTINUE + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO 110 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + 110 CONTINUE + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO 120 I = 2, 12 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + 120 CONTINUE + END IF + END IF +* + PREC = SUBNAM( 1: 1 ) + ALGO = SUBNAM( 4: 6 ) + STAG = SUBNAM( 8:12 ) + RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D' + CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z' +* +* Invalid value for PRECISION +* + IF( .NOT.( RPREC .OR. CPREC ) ) THEN + IPARAM2STAGE = -1 + RETURN + ENDIF + ENDIF +* WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC, +* $ ' ALGO ',ALGO,' STAGE ',STAG +* +* + IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN +* +* ISPEC = 17, 18: block size KD, IB +* Could be also dependent from N but for now it +* depend only on sequential or parallel +* + IF( NTHREADS.GT.4 ) THEN + IF( CPREC ) THEN + KD = 128 + IB = 32 + ELSE + KD = 160 + IB = 40 + ENDIF + ELSE IF( NTHREADS.GT.1 ) THEN + IF( CPREC ) THEN + KD = 64 + IB = 32 + ELSE + KD = 64 + IB = 32 + ENDIF + ELSE + IF( CPREC ) THEN + KD = 16 + IB = 16 + ELSE + KD = 32 + IB = 16 + ENDIF + ENDIF + IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD + IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB +* + ELSE IF ( ISPEC .EQ. 19 ) THEN +* +* ISPEC = 19: +* LHOUS length of the Houselholder representation +* matrix (V,T) of the second stage. should be >= 1. +* +* Will add the VECT OPTION HERE next release + VECT = OPTS(1:1) + IF( VECT.EQ.'N' ) THEN + LHOUS = MAX( 1, 4*NI ) + ELSE +* This is not correct, it need to call the ALGO and the stage2 + LHOUS = MAX( 1, 4*NI ) + IBI + ENDIF + IF( LHOUS.GE.0 ) THEN + IPARAM2STAGE = LHOUS + ELSE + IPARAM2STAGE = -1 + ENDIF +* + ELSE IF ( ISPEC .EQ. 20 ) THEN +* +* ISPEC = 20: (21 for future use) +* LWORK length of the workspace for +* either or both stages for TRD and BRD. should be >= 1. +* TRD: +* TRD_stage 1: = LT + LW + LS1 + LS2 +* = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD +* where LDT=LDS2=KD +* = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +* TRD_stage 2: = (2NB+1)*N + KD*NTHREADS +* TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N ) +* = N*KD + N*max(KD+1,FACTOPTNB) +* + max(2*KD*KD, KD*NTHREADS) +* + (KD+1)*N + LWORK = -1 + SUBNAM(1:1) = PREC + SUBNAM(2:6) = 'GEQRF' + QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 ) + SUBNAM(2:6) = 'GELQF' + LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 ) +* Could be QR or LQ for TRD and the max for BRD + FACTOPTNB = MAX(QROPTNB, LQOPTNB) + IF( ALGO.EQ.'TRD' ) THEN + IF( STAG.EQ.'2STAG' ) THEN + LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) + $ + MAX(2*NBI*NBI, NBI*NTHREADS) + $ + (NBI+1)*NI + ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN + LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI + ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN + LWORK = (2*NBI+1)*NI + NBI*NTHREADS + ENDIF + ELSE IF( ALGO.EQ.'BRD' ) THEN + IF( STAG.EQ.'2STAG' ) THEN + LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) + $ + MAX(2*NBI*NBI, NBI*NTHREADS) + $ + (NBI+1)*NI + ELSE IF( STAG.EQ.'GE2GB' ) THEN + LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI + ELSE IF( STAG.EQ.'GB2BD' ) THEN + LWORK = (3*NBI+1)*NI + NBI*NTHREADS + ENDIF + ENDIF + LWORK = MAX ( 1, LWORK ) + + IF( LWORK.GT.0 ) THEN + IPARAM2STAGE = LWORK + ELSE + IPARAM2STAGE = -1 + ENDIF +* + ELSE IF ( ISPEC .EQ. 21 ) THEN +* +* ISPEC = 21 for future use + IPARAM2STAGE = NXI + ENDIF +* +* ==== End of IPARAM2STAGE ==== +* + END diff --git a/dspl/liblapack/SRC/iparmq.f b/dspl/liblapack/SRC/iparmq.f new file mode 100644 index 0000000..e576e0d --- /dev/null +++ b/dspl/liblapack/SRC/iparmq.f @@ -0,0 +1,395 @@ +*> \brief \b IPARMQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IPARMQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, ISPEC, LWORK, N +* CHARACTER NAME*( * ), OPTS*( * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This program sets problem and machine dependent parameters +*> useful for xHSEQR and related subroutines for eigenvalue +*> problems. It is called whenever +*> IPARMQ is called with 12 <= ISPEC <= 16 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ISPEC +*> \verbatim +*> ISPEC is INTEGER +*> ISPEC specifies which tunable parameter IPARMQ should +*> return. +*> +*> ISPEC=12: (INMIN) Matrices of order nmin or less +*> are sent directly to xLAHQR, the implicit +*> double shift QR algorithm. NMIN must be +*> at least 11. +*> +*> ISPEC=13: (INWIN) Size of the deflation window. +*> This is best set greater than or equal to +*> the number of simultaneous shifts NS. +*> Larger matrices benefit from larger deflation +*> windows. +*> +*> ISPEC=14: (INIBL) Determines when to stop nibbling and +*> invest in an (expensive) multi-shift QR sweep. +*> If the aggressive early deflation subroutine +*> finds LD converged eigenvalues from an order +*> NW deflation window and LD.GT.(NW*NIBBLE)/100, +*> then the next QR sweep is skipped and early +*> deflation is applied immediately to the +*> remaining active diagonal block. Setting +*> IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a +*> multi-shift QR sweep whenever early deflation +*> finds a converged eigenvalue. Setting +*> IPARMQ(ISPEC=14) greater than or equal to 100 +*> prevents TTQRE from skipping a multi-shift +*> QR sweep. +*> +*> ISPEC=15: (NSHFTS) The number of simultaneous shifts in +*> a multi-shift QR iteration. +*> +*> ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the +*> following meanings. +*> 0: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are not +*> accumulated when updating the +*> far-from-diagonal matrix entries. +*> 1: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are +*> accumulated, and matrix-matrix +*> multiplication is used to update the +*> far-from-diagonal matrix entries. +*> 2: During the multi-shift QR/QZ sweep, +*> blocked eigenvalue reordering, blocked +*> Hessenberg-triangular reduction, +*> reflections and/or rotations are +*> accumulated, and 2-by-2 block structure +*> is exploited during matrix-matrix +*> multiplies. +*> (If xTRMM is slower than xGEMM, then +*> IPARMQ(ISPEC=16)=1 may be more efficient than +*> IPARMQ(ISPEC=16)=2 despite the greater level of +*> arithmetic work implied by the latter choice.) +*> \endverbatim +*> +*> \param[in] NAME +*> \verbatim +*> NAME is character string +*> Name of the calling subroutine +*> \endverbatim +*> +*> \param[in] OPTS +*> \verbatim +*> OPTS is character string +*> This is a concatenation of the string arguments to +*> TTQRE. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The amount of workspace available. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Little is known about how best to choose these parameters. +*> It is possible to use different values of the parameters +*> for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR. +*> +*> It is probably best to choose different parameters for +*> different matrices and different parameters at different +*> times during the iteration, but this has not been +*> implemented --- yet. +*> +*> +*> The best choices of most of the parameters depend +*> in an ill-understood way on the relative execution +*> rate of xLAQR3 and xLAQR5 and on the nature of each +*> particular eigenvalue problem. Experiment may be the +*> only practical way to determine which choices are most +*> effective. +*> +*> Following is a list of default values supplied by IPARMQ. +*> These defaults may be adjusted in order to attain better +*> performance in any particular computational environment. +*> +*> IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> IPARMQ(ISPEC=13) Recommended deflation window size. +*> This depends on ILO, IHI and NS, the +*> number of simultaneous shifts returned +*> by IPARMQ(ISPEC=15). The default for +*> (IHI-ILO+1).LE.500 is NS. The default +*> for (IHI-ILO+1).GT.500 is 3*NS/2. +*> +*> IPARMQ(ISPEC=14) Nibble crossover point. Default: 14. +*> +*> IPARMQ(ISPEC=15) Number of simultaneous shifts, NS. +*> a multi-shift QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 0 30 NS = 2+ +*> 30 60 NS = 4+ +*> 60 150 NS = 10 +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default matrices of this order are +*> passed to the implicit double shift routine +*> xLAHQR. See IPARMQ(ISPEC=12) above. These +*> values of NS are used only in case of a rare +*> xLAHQR failure. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function increasing from 10 to 64. +*> +*> IPARMQ(ISPEC=16) Select structured matrix multiply. +*> (See ISPEC=16 above for details.) +*> Default: 3. +*> \endverbatim +*> +* ===================================================================== + INTEGER FUNCTION IPARMQ( ISPEC, NAME, OPTS, N, ILO, IHI, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, ISPEC, LWORK, N + CHARACTER NAME*( * ), OPTS*( * ) +* +* ================================================================ +* .. Parameters .. + INTEGER INMIN, INWIN, INIBL, ISHFTS, IACC22 + PARAMETER ( INMIN = 12, INWIN = 13, INIBL = 14, + $ ISHFTS = 15, IACC22 = 16 ) + INTEGER NMIN, K22MIN, KACMIN, NIBBLE, KNWSWP + PARAMETER ( NMIN = 75, K22MIN = 14, KACMIN = 14, + $ NIBBLE = 14, KNWSWP = 500 ) + REAL TWO + PARAMETER ( TWO = 2.0 ) +* .. +* .. Local Scalars .. + INTEGER NH, NS + INTEGER I, IC, IZ + CHARACTER SUBNAM*6 +* .. +* .. Intrinsic Functions .. + INTRINSIC LOG, MAX, MOD, NINT, REAL +* .. +* .. Executable Statements .. + IF( ( ISPEC.EQ.ISHFTS ) .OR. ( ISPEC.EQ.INWIN ) .OR. + $ ( ISPEC.EQ.IACC22 ) ) THEN +* +* ==== Set the number simultaneous shifts ==== +* + NH = IHI - ILO + 1 + NS = 2 + IF( NH.GE.30 ) + $ NS = 4 + IF( NH.GE.60 ) + $ NS = 10 + IF( NH.GE.150 ) + $ NS = MAX( 10, NH / NINT( LOG( REAL( NH ) ) / LOG( TWO ) ) ) + IF( NH.GE.590 ) + $ NS = 64 + IF( NH.GE.3000 ) + $ NS = 128 + IF( NH.GE.6000 ) + $ NS = 256 + NS = MAX( 2, NS-MOD( NS, 2 ) ) + END IF +* + IF( ISPEC.EQ.INMIN ) THEN +* +* +* ===== Matrices of order smaller than NMIN get sent +* . to xLAHQR, the classic double shift algorithm. +* . This must be at least 11. ==== +* + IPARMQ = NMIN +* + ELSE IF( ISPEC.EQ.INIBL ) THEN +* +* ==== INIBL: skip a multi-shift qr iteration and +* . whenever aggressive early deflation finds +* . at least (NIBBLE*(window size)/100) deflations. ==== +* + IPARMQ = NIBBLE +* + ELSE IF( ISPEC.EQ.ISHFTS ) THEN +* +* ==== NSHFTS: The number of simultaneous shifts ===== +* + IPARMQ = NS +* + ELSE IF( ISPEC.EQ.INWIN ) THEN +* +* ==== NW: deflation window size. ==== +* + IF( NH.LE.KNWSWP ) THEN + IPARMQ = NS + ELSE + IPARMQ = 3*NS / 2 + END IF +* + ELSE IF( ISPEC.EQ.IACC22 ) THEN +* +* ==== IACC22: Whether to accumulate reflections +* . before updating the far-from-diagonal elements +* . and whether to use 2-by-2 block structure while +* . doing it. A small amount of work could be saved +* . by making this choice dependent also upon the +* . NH=IHI-ILO+1. +* +* +* Convert NAME to upper case if the first character is lower case. +* + IPARMQ = 0 + SUBNAM = NAME + IC = ICHAR( SUBNAM( 1: 1 ) ) + IZ = ICHAR( 'Z' ) + IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN +* +* ASCII character set +* + IF( IC.GE.97 .AND. IC.LE.122 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.97 .AND. IC.LE.122 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + END DO + END IF +* + ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN +* +* EBCDIC character set +* + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN + SUBNAM( 1: 1 ) = CHAR( IC+64 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR. + $ ( IC.GE.145 .AND. IC.LE.153 ) .OR. + $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I: + $ I ) = CHAR( IC+64 ) + END DO + END IF +* + ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN +* +* Prime machines: ASCII+128 +* + IF( IC.GE.225 .AND. IC.LE.250 ) THEN + SUBNAM( 1: 1 ) = CHAR( IC-32 ) + DO I = 2, 6 + IC = ICHAR( SUBNAM( I: I ) ) + IF( IC.GE.225 .AND. IC.LE.250 ) + $ SUBNAM( I: I ) = CHAR( IC-32 ) + END DO + END IF + END IF +* + IF( SUBNAM( 2:6 ).EQ.'GGHRD' .OR. + $ SUBNAM( 2:6 ).EQ.'GGHD3' ) THEN + IPARMQ = 1 + IF( NH.GE.K22MIN ) + $ IPARMQ = 2 + ELSE IF ( SUBNAM( 4:6 ).EQ.'EXC' ) THEN + IF( NH.GE.KACMIN ) + $ IPARMQ = 1 + IF( NH.GE.K22MIN ) + $ IPARMQ = 2 + ELSE IF ( SUBNAM( 2:6 ).EQ.'HSEQR' .OR. + $ SUBNAM( 2:5 ).EQ.'LAQR' ) THEN + IF( NS.GE.KACMIN ) + $ IPARMQ = 1 + IF( NS.GE.K22MIN ) + $ IPARMQ = 2 + END IF +* + ELSE +* ===== invalid value of ispec ===== + IPARMQ = -1 +* + END IF +* +* ==== End of IPARMQ ==== +* + END diff --git a/dspl/liblapack/SRC/izmax1.f b/dspl/liblapack/SRC/izmax1.f new file mode 100644 index 0000000..bec5c68 --- /dev/null +++ b/dspl/liblapack/SRC/izmax1.f @@ -0,0 +1,141 @@ +*> \brief \b IZMAX1 finds the index of the first vector element of maximum absolute value. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download IZMAX1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION IZMAX1( N, ZX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX*16 ZX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> IZMAX1 finds the index of the first vector element of maximum absolute value. +*> +*> Based on IZAMAX from Level 1 BLAS. +*> The change is to use the 'genuine' absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vector ZX. +*> \endverbatim +*> +*> \param[in] ZX +*> \verbatim +*> ZX is COMPLEX*16 array, dimension (N) +*> The vector ZX. The IZMAX1 function returns the index of its first +*> element of maximum absolute value. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive values of ZX. INCX >= 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date February 2014 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham for use with ZLACON. +* +* ===================================================================== + INTEGER FUNCTION IZMAX1( N, ZX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* February 2014 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 ZX(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION DMAX + INTEGER I, IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IZMAX1 = 0 + IF (N.LT.1 .OR. INCX.LE.0) RETURN + IZMAX1 = 1 + IF (N.EQ.1) RETURN + IF (INCX.EQ.1) THEN +* +* code for increment equal to 1 +* + DMAX = ABS(ZX(1)) + DO I = 2,N + IF (ABS(ZX(I)).GT.DMAX) THEN + IZMAX1 = I + DMAX = ABS(ZX(I)) + END IF + END DO + ELSE +* +* code for increment not equal to 1 +* + IX = 1 + DMAX = ABS(ZX(1)) + IX = IX + INCX + DO I = 2,N + IF (ABS(ZX(IX)).GT.DMAX) THEN + IZMAX1 = I + DMAX = ABS(ZX(IX)) + END IF + IX = IX + INCX + END DO + END IF + RETURN +* +* End of IZMAX1 +* + END diff --git a/dspl/liblapack/SRC/lsamen.f b/dspl/liblapack/SRC/lsamen.f new file mode 100644 index 0000000..d25c6f8 --- /dev/null +++ b/dspl/liblapack/SRC/lsamen.f @@ -0,0 +1,122 @@ +*> \brief \b LSAMEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download LSAMEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* LOGICAL FUNCTION LSAMEN( N, CA, CB ) +* +* .. Scalar Arguments .. +* CHARACTER*( * ) CA, CB +* INTEGER N +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> LSAMEN tests if the first N letters of CA are the same as the +*> first N letters of CB, regardless of case. +*> LSAMEN returns .TRUE. if CA and CB are equivalent except for case +*> and .FALSE. otherwise. LSAMEN also returns .FALSE. if LEN( CA ) +*> or LEN( CB ) is less than N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of characters in CA and CB to be compared. +*> \endverbatim +*> +*> \param[in] CA +*> \verbatim +*> CA is CHARACTER*(*) +*> \endverbatim +*> +*> \param[in] CB +*> \verbatim +*> CB is CHARACTER*(*) +*> CA and CB specify two character strings of length at least N. +*> Only the first N characters of each string will be accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION LSAMEN( N, CA, CB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*( * ) CA, CB + INTEGER N +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC LEN +* .. +* .. Executable Statements .. +* + LSAMEN = .FALSE. + IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N ) + $ GO TO 20 +* +* Do for each character in the two strings. +* + DO 10 I = 1, N +* +* Test if the characters are equal using LSAME. +* + IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) ) + $ GO TO 20 +* + 10 CONTINUE + LSAMEN = .TRUE. +* + 20 CONTINUE + RETURN +* +* End of LSAMEN +* + END diff --git a/dspl/liblapack/SRC/sbbcsd.f b/dspl/liblapack/SRC/sbbcsd.f new file mode 100644 index 0000000..e1f8397 --- /dev/null +++ b/dspl/liblapack/SRC/sbbcsd.f @@ -0,0 +1,1080 @@ +*> \brief \b SBBCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SBBCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, +* THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, +* V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, +* B22D, B22E, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* REAL B11D( * ), B11E( * ), B12D( * ), B12E( * ), +* $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), +* $ PHI( * ), THETA( * ), WORK( * ) +* REAL U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SBBCSD computes the CS decomposition of an orthogonal matrix in +*> bidiagonal-block form, +*> +*> +*> [ B11 | B12 0 0 ] +*> [ 0 | 0 -I 0 ] +*> X = [----------------] +*> [ B21 | B22 0 0 ] +*> [ 0 | 0 0 I ] +*> +*> [ C | -S 0 0 ] +*> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**T +*> = [---------] [---------------] [---------] . +*> [ | U2 ] [ S | C 0 0 ] [ | V2 ] +*> [ 0 | 0 0 I ] +*> +*> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger +*> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be +*> transposed and/or permuted. This can be done in constant time using +*> the TRANS and SIGNS options. See SORCSD for details.) +*> +*> The bidiagonal matrices B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1:Q) and PHI(1:Q-1). +*> +*> The orthogonal matrices U1, U2, V1T, and V2T are input/output. +*> The input matrices are pre- or post-multiplied by the appropriate +*> singular vector matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is updated; +*> otherwise: U1 is not updated. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is updated; +*> otherwise: U2 is not updated. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is updated; +*> otherwise: V1T is not updated. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is updated; +*> otherwise: V2T is not updated. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X, the orthogonal matrix in +*> bidiagonal-block form. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in the top-left block of X. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in the top-left block of X. +*> 0 <= Q <= MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> On entry, the angles THETA(1),...,THETA(Q) that, along with +*> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block +*> form. On exit, the angles whose cosines and sines define the +*> diagonal blocks in the CS decomposition. +*> \endverbatim +*> +*> \param[in,out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., +*> THETA(Q), define the matrix in bidiagonal-block form. +*> \endverbatim +*> +*> \param[in,out] U1 +*> \verbatim +*> U1 is REAL array, dimension (LDU1,P) +*> On entry, a P-by-P matrix. On exit, U1 is postmultiplied +*> by the left singular vector matrix common to [ B11 ; 0 ] and +*> [ B12 0 0 ; 0 -I 0 0 ]. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of the array U1, LDU1 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] U2 +*> \verbatim +*> U2 is REAL array, dimension (LDU2,M-P) +*> On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is +*> postmultiplied by the left singular vector matrix common to +*> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2, LDU2 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] V1T +*> \verbatim +*> V1T is REAL array, dimension (LDV1T,Q) +*> On entry, a Q-by-Q matrix. On exit, V1T is premultiplied +*> by the transpose of the right singular vector +*> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of the array V1T, LDV1T >= MAX(1,Q). +*> \endverbatim +*> +*> \param[in,out] V2T +*> \verbatim +*> V2T is REAL array, dimension (LDV2T,M-Q) +*> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is +*> premultiplied by the transpose of the right +*> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and +*> [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] B11D +*> \verbatim +*> B11D is REAL array, dimension (Q) +*> When SBBCSD converges, B11D contains the cosines of THETA(1), +*> ..., THETA(Q). If SBBCSD fails to converge, then B11D +*> contains the diagonal of the partially reduced top-left +*> block. +*> \endverbatim +*> +*> \param[out] B11E +*> \verbatim +*> B11E is REAL array, dimension (Q-1) +*> When SBBCSD converges, B11E contains zeros. If SBBCSD fails +*> to converge, then B11E contains the superdiagonal of the +*> partially reduced top-left block. +*> \endverbatim +*> +*> \param[out] B12D +*> \verbatim +*> B12D is REAL array, dimension (Q) +*> When SBBCSD converges, B12D contains the negative sines of +*> THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then +*> B12D contains the diagonal of the partially reduced top-right +*> block. +*> \endverbatim +*> +*> \param[out] B12E +*> \verbatim +*> B12E is REAL array, dimension (Q-1) +*> When SBBCSD converges, B12E contains zeros. If SBBCSD fails +*> to converge, then B12E contains the subdiagonal of the +*> partially reduced top-right block. +*> \endverbatim +*> +*> \param[out] B21D +*> \verbatim +*> B21D is REAL array, dimension (Q) +*> When SBBCSD converges, B21D contains the negative sines of +*> THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then +*> B21D contains the diagonal of the partially reduced bottom-left +*> block. +*> \endverbatim +*> +*> \param[out] B21E +*> \verbatim +*> B21E is REAL array, dimension (Q-1) +*> When SBBCSD converges, B21E contains zeros. If SBBCSD fails +*> to converge, then B21E contains the subdiagonal of the +*> partially reduced bottom-left block. +*> \endverbatim +*> +*> \param[out] B22D +*> \verbatim +*> B22D is REAL array, dimension (Q) +*> When SBBCSD converges, B22D contains the negative sines of +*> THETA(1), ..., THETA(Q). If SBBCSD fails to converge, then +*> B22D contains the diagonal of the partially reduced bottom-right +*> block. +*> \endverbatim +*> +*> \param[out] B22E +*> \verbatim +*> B22E is REAL array, dimension (Q-1) +*> When SBBCSD converges, B22E contains zeros. If SBBCSD fails +*> to converge, then B22E contains the subdiagonal of the +*> partially reduced bottom-right block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= MAX(1,8*Q). +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the work array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if SBBCSD did not converge, INFO specifies the number +*> of nonzero entries in PHI, and B11D, B11E, etc., +*> contain the partially reduced matrix. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL REAL, default = MAX(10,MIN(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they +*> are within TOLMUL*EPS of either bound. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, + $ B22D, B22E, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q +* .. +* .. Array Arguments .. + REAL B11D( * ), B11E( * ), B12D( * ), B12E( * ), + $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), + $ PHI( * ), THETA( * ), WORK( * ) + REAL U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) + REAL HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO + PARAMETER ( HUNDRED = 100.0E0, MEIGHTH = -0.125E0, + $ ONE = 1.0E0, PIOVER2 = 1.57079632679489662E0, + $ TEN = 10.0E0, ZERO = 0.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12, + $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T, + $ WANTV2T + INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS, + $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J, + $ LWORKMIN, LWORKOPT, MAXIT, MINI + REAL B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY, + $ EPS, MU, NU, R, SIGMA11, SIGMA21, + $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL, + $ UNFL, X1, X2, Y1, Y2 +* +* .. External Subroutines .. + EXTERNAL SLASR, SSCAL, SSWAP, SLARTGP, SLARTGS, SLAS2, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) +* + IF( M .LT. 0 ) THEN + INFO = -6 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -7 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -8 + ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN + INFO = -8 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -12 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -14 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -16 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -18 + END IF +* +* Quick return if Q = 0 +* + IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN + LWORKMIN = 1 + WORK(1) = LWORKMIN + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + IU1CS = 1 + IU1SN = IU1CS + Q + IU2CS = IU1SN + Q + IU2SN = IU2CS + Q + IV1TCS = IU2SN + Q + IV1TSN = IV1TCS + Q + IV2TCS = IV1TSN + Q + IV2TSN = IV2TCS + Q + LWORKOPT = IV2TSN + Q - 1 + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -28 + END IF + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SBBCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) + TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) ) + TOL = TOLMUL*EPS + THRESH = MAX( TOL, MAXITR*Q*Q*UNFL ) +* +* Test for negligible sines or cosines +* + DO I = 1, Q + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = 1, Q-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Initial deflation +* + IMAX = Q + DO WHILE( IMAX .GT. 1 ) + IF( PHI(IMAX-1) .NE. ZERO ) THEN + EXIT + END IF + IMAX = IMAX - 1 + END DO + IMIN = IMAX - 1 + IF ( IMIN .GT. 1 ) THEN + DO WHILE( PHI(IMIN-1) .NE. ZERO ) + IMIN = IMIN - 1 + IF ( IMIN .LE. 1 ) EXIT + END DO + END IF +* +* Initialize iteration counter +* + MAXIT = MAXITR*Q*Q + ITER = 0 +* +* Begin main iteration loop +* + DO WHILE( IMAX .GT. 1 ) +* +* Compute the matrix entries +* + B11D(IMIN) = COS( THETA(IMIN) ) + B21D(IMIN) = -SIN( THETA(IMIN) ) + DO I = IMIN, IMAX - 1 + B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) ) + B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) ) + B12D(I) = SIN( THETA(I) ) * COS( PHI(I) ) + B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) ) + B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) ) + B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) ) + B22D(I) = COS( THETA(I) ) * COS( PHI(I) ) + B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) ) + END DO + B12D(IMAX) = SIN( THETA(IMAX) ) + B22D(IMAX) = COS( THETA(IMAX) ) +* +* Abort if not converging; otherwise, increment ITER +* + IF( ITER .GT. MAXIT ) THEN + INFO = 0 + DO I = 1, Q + IF( PHI(I) .NE. ZERO ) + $ INFO = INFO + 1 + END DO + RETURN + END IF +* + ITER = ITER + IMAX - IMIN +* +* Compute shifts +* + THETAMAX = THETA(IMIN) + THETAMIN = THETA(IMIN) + DO I = IMIN+1, IMAX + IF( THETA(I) > THETAMAX ) + $ THETAMAX = THETA(I) + IF( THETA(I) < THETAMIN ) + $ THETAMIN = THETA(I) + END DO +* + IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN +* +* Zero on diagonals of B11 and B22; induce deflation with a +* zero shift +* + MU = ZERO + NU = ONE +* + ELSE IF( THETAMIN .LT. THRESH ) THEN +* +* Zero on diagonals of B12 and B22; induce deflation with a +* zero shift +* + MU = ONE + NU = ZERO +* + ELSE +* +* Compute shifts for B11 and B21 and use the lesser +* + CALL SLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11, + $ DUMMY ) + CALL SLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21, + $ DUMMY ) +* + IF( SIGMA11 .LE. SIGMA21 ) THEN + MU = SIGMA11 + NU = SQRT( ONE - MU**2 ) + IF( MU .LT. THRESH ) THEN + MU = ZERO + NU = ONE + END IF + ELSE + NU = SIGMA21 + MU = SQRT( 1.0 - NU**2 ) + IF( NU .LT. THRESH ) THEN + MU = ONE + NU = ZERO + END IF + END IF + END IF +* +* Rotate to produce bulges in B11 and B21 +* + IF( MU .LE. NU ) THEN + CALL SLARTGS( B11D(IMIN), B11E(IMIN), MU, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) ) + ELSE + CALL SLARTGS( B21D(IMIN), B21E(IMIN), NU, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) ) + END IF +* + TEMP = WORK(IV1TCS+IMIN-1)*B11D(IMIN) + + $ WORK(IV1TSN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = WORK(IV1TCS+IMIN-1)*B11E(IMIN) - + $ WORK(IV1TSN+IMIN-1)*B11D(IMIN) + B11D(IMIN) = TEMP + B11BULGE = WORK(IV1TSN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B11D(IMIN+1) + TEMP = WORK(IV1TCS+IMIN-1)*B21D(IMIN) + + $ WORK(IV1TSN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = WORK(IV1TCS+IMIN-1)*B21E(IMIN) - + $ WORK(IV1TSN+IMIN-1)*B21D(IMIN) + B21D(IMIN) = TEMP + B21BULGE = WORK(IV1TSN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = WORK(IV1TCS+IMIN-1)*B21D(IMIN+1) +* +* Compute THETA(IMIN) +* + THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ), + $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) ) +* +* Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) +* + IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN + CALL SLARTGP( B11BULGE, B11D(IMIN), WORK(IU1SN+IMIN-1), + $ WORK(IU1CS+IMIN-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) ) + ELSE + CALL SLARTGS( B12D( IMIN ), B12E( IMIN ), NU, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) ) + END IF + IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN + CALL SLARTGP( B21BULGE, B21D(IMIN), WORK(IU2SN+IMIN-1), + $ WORK(IU2CS+IMIN-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) ) + ELSE + CALL SLARTGS( B22D(IMIN), B22E(IMIN), MU, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) ) + END IF + WORK(IU2CS+IMIN-1) = -WORK(IU2CS+IMIN-1) + WORK(IU2SN+IMIN-1) = -WORK(IU2SN+IMIN-1) +* + TEMP = WORK(IU1CS+IMIN-1)*B11E(IMIN) + + $ WORK(IU1SN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = WORK(IU1CS+IMIN-1)*B11D(IMIN+1) - + $ WORK(IU1SN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B11BULGE = WORK(IU1SN+IMIN-1)*B11E(IMIN+1) + B11E(IMIN+1) = WORK(IU1CS+IMIN-1)*B11E(IMIN+1) + END IF + TEMP = WORK(IU1CS+IMIN-1)*B12D(IMIN) + + $ WORK(IU1SN+IMIN-1)*B12E(IMIN) + B12E(IMIN) = WORK(IU1CS+IMIN-1)*B12E(IMIN) - + $ WORK(IU1SN+IMIN-1)*B12D(IMIN) + B12D(IMIN) = TEMP + B12BULGE = WORK(IU1SN+IMIN-1)*B12D(IMIN+1) + B12D(IMIN+1) = WORK(IU1CS+IMIN-1)*B12D(IMIN+1) + TEMP = WORK(IU2CS+IMIN-1)*B21E(IMIN) + + $ WORK(IU2SN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = WORK(IU2CS+IMIN-1)*B21D(IMIN+1) - + $ WORK(IU2SN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B21BULGE = WORK(IU2SN+IMIN-1)*B21E(IMIN+1) + B21E(IMIN+1) = WORK(IU2CS+IMIN-1)*B21E(IMIN+1) + END IF + TEMP = WORK(IU2CS+IMIN-1)*B22D(IMIN) + + $ WORK(IU2SN+IMIN-1)*B22E(IMIN) + B22E(IMIN) = WORK(IU2CS+IMIN-1)*B22E(IMIN) - + $ WORK(IU2SN+IMIN-1)*B22D(IMIN) + B22D(IMIN) = TEMP + B22BULGE = WORK(IU2SN+IMIN-1)*B22D(IMIN+1) + B22D(IMIN+1) = WORK(IU2CS+IMIN-1)*B22D(IMIN+1) +* +* Inner loop: chase bulges from B11(IMIN,IMIN+2), +* B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to +* bottom-right +* + DO I = IMIN+1, IMAX-1 +* +* Compute PHI(I-1) +* + X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1) + X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE + Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1) + Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE +* + PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), +* B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL SLARTGP( X2, X1, WORK(IV1TSN+I-1), WORK(IV1TCS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN + CALL SLARTGP( B11BULGE, B11E(I-1), WORK(IV1TSN+I-1), + $ WORK(IV1TCS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL SLARTGP( B21BULGE, B21E(I-1), WORK(IV1TSN+I-1), + $ WORK(IV1TCS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11D(I), B11E(I), MU, WORK(IV1TCS+I-1), + $ WORK(IV1TSN+I-1) ) + ELSE + CALL SLARTGS( B21D(I), B21E(I), NU, WORK(IV1TCS+I-1), + $ WORK(IV1TSN+I-1) ) + END IF + WORK(IV1TCS+I-1) = -WORK(IV1TCS+I-1) + WORK(IV1TSN+I-1) = -WORK(IV1TSN+I-1) + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL SLARTGP( B12BULGE, B12D(I-1), WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22D(I-1), WORK(IV2TSN+I-1-1), + $ WORK(IV2TCS+I-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B12E(I-1), B12D(I), NU, WORK(IV2TCS+I-1-1), + $ WORK(IV2TSN+I-1-1) ) + ELSE + CALL SLARTGS( B22E(I-1), B22D(I), MU, WORK(IV2TCS+I-1-1), + $ WORK(IV2TSN+I-1-1) ) + END IF +* + TEMP = WORK(IV1TCS+I-1)*B11D(I) + WORK(IV1TSN+I-1)*B11E(I) + B11E(I) = WORK(IV1TCS+I-1)*B11E(I) - + $ WORK(IV1TSN+I-1)*B11D(I) + B11D(I) = TEMP + B11BULGE = WORK(IV1TSN+I-1)*B11D(I+1) + B11D(I+1) = WORK(IV1TCS+I-1)*B11D(I+1) + TEMP = WORK(IV1TCS+I-1)*B21D(I) + WORK(IV1TSN+I-1)*B21E(I) + B21E(I) = WORK(IV1TCS+I-1)*B21E(I) - + $ WORK(IV1TSN+I-1)*B21D(I) + B21D(I) = TEMP + B21BULGE = WORK(IV1TSN+I-1)*B21D(I+1) + B21D(I+1) = WORK(IV1TCS+I-1)*B21D(I+1) + TEMP = WORK(IV2TCS+I-1-1)*B12E(I-1) + + $ WORK(IV2TSN+I-1-1)*B12D(I) + B12D(I) = WORK(IV2TCS+I-1-1)*B12D(I) - + $ WORK(IV2TSN+I-1-1)*B12E(I-1) + B12E(I-1) = TEMP + B12BULGE = WORK(IV2TSN+I-1-1)*B12E(I) + B12E(I) = WORK(IV2TCS+I-1-1)*B12E(I) + TEMP = WORK(IV2TCS+I-1-1)*B22E(I-1) + + $ WORK(IV2TSN+I-1-1)*B22D(I) + B22D(I) = WORK(IV2TCS+I-1-1)*B22D(I) - + $ WORK(IV2TSN+I-1-1)*B22E(I-1) + B22E(I-1) = TEMP + B22BULGE = WORK(IV2TSN+I-1-1)*B22E(I) + B22E(I) = WORK(IV2TCS+I-1-1)*B22E(I) +* +* Compute THETA(I) +* + X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1) + X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE + Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1) + Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE +* + THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), +* B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL SLARTGP( X2, X1, WORK(IU1SN+I-1), WORK(IU1CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN + CALL SLARTGP( B11BULGE, B11D(I), WORK(IU1SN+I-1), + $ WORK(IU1CS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL SLARTGP( B12BULGE, B12E(I-1), WORK(IU1SN+I-1), + $ WORK(IU1CS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL SLARTGS( B11E(I), B11D(I+1), MU, WORK(IU1CS+I-1), + $ WORK(IU1SN+I-1) ) + ELSE + CALL SLARTGS( B12D(I), B12E(I), NU, WORK(IU1CS+I-1), + $ WORK(IU1SN+I-1) ) + END IF + IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, WORK(IU2SN+I-1), WORK(IU2CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN + CALL SLARTGP( B21BULGE, B21D(I), WORK(IU2SN+I-1), + $ WORK(IU2CS+I-1), R ) + ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1), + $ WORK(IU2CS+I-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1), + $ WORK(IU2SN+I-1) ) + ELSE + CALL SLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1), + $ WORK(IU2SN+I-1) ) + END IF + WORK(IU2CS+I-1) = -WORK(IU2CS+I-1) + WORK(IU2SN+I-1) = -WORK(IU2SN+I-1) +* + TEMP = WORK(IU1CS+I-1)*B11E(I) + WORK(IU1SN+I-1)*B11D(I+1) + B11D(I+1) = WORK(IU1CS+I-1)*B11D(I+1) - + $ WORK(IU1SN+I-1)*B11E(I) + B11E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B11BULGE = WORK(IU1SN+I-1)*B11E(I+1) + B11E(I+1) = WORK(IU1CS+I-1)*B11E(I+1) + END IF + TEMP = WORK(IU2CS+I-1)*B21E(I) + WORK(IU2SN+I-1)*B21D(I+1) + B21D(I+1) = WORK(IU2CS+I-1)*B21D(I+1) - + $ WORK(IU2SN+I-1)*B21E(I) + B21E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B21BULGE = WORK(IU2SN+I-1)*B21E(I+1) + B21E(I+1) = WORK(IU2CS+I-1)*B21E(I+1) + END IF + TEMP = WORK(IU1CS+I-1)*B12D(I) + WORK(IU1SN+I-1)*B12E(I) + B12E(I) = WORK(IU1CS+I-1)*B12E(I) - WORK(IU1SN+I-1)*B12D(I) + B12D(I) = TEMP + B12BULGE = WORK(IU1SN+I-1)*B12D(I+1) + B12D(I+1) = WORK(IU1CS+I-1)*B12D(I+1) + TEMP = WORK(IU2CS+I-1)*B22D(I) + WORK(IU2SN+I-1)*B22E(I) + B22E(I) = WORK(IU2CS+I-1)*B22E(I) - WORK(IU2SN+I-1)*B22D(I) + B22D(I) = TEMP + B22BULGE = WORK(IU2SN+I-1)*B22D(I+1) + B22D(I+1) = WORK(IU2CS+I-1)*B22D(I+1) +* + END DO +* +* Compute PHI(IMAX-1) +* + X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) + + $ COS(THETA(IMAX-1))*B21E(IMAX-1) + Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) + + $ COS(THETA(IMAX-1))*B22D(IMAX-1) + Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE +* + PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) ) +* +* Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) +* + RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2 +* + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( Y2, Y1, WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL SLARTGP( B12BULGE, B12D(IMAX-1), WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL SLARTGP( B22BULGE, B22D(IMAX-1), WORK(IV2TSN+IMAX-1-1), + $ WORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL SLARTGS( B12E(IMAX-1), B12D(IMAX), NU, + $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) ) + ELSE + CALL SLARTGS( B22E(IMAX-1), B22D(IMAX), MU, + $ WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) ) + END IF +* + TEMP = WORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) + + $ WORK(IV2TSN+IMAX-1-1)*B12D(IMAX) + B12D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B12D(IMAX) - + $ WORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1) + B12E(IMAX-1) = TEMP + TEMP = WORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) + + $ WORK(IV2TSN+IMAX-1-1)*B22D(IMAX) + B22D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B22D(IMAX) - + $ WORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1) + B22E(IMAX-1) = TEMP +* +* Update singular vectors +* + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL SLASR( 'R', 'V', 'F', P, IMAX-IMIN+1, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1), + $ U1(1,IMIN), LDU1 ) + ELSE + CALL SLASR( 'L', 'V', 'F', IMAX-IMIN+1, P, + $ WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1), + $ U1(IMIN,1), LDU1 ) + END IF + END IF + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL SLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1), + $ U2(1,IMIN), LDU2 ) + ELSE + CALL SLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P, + $ WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1), + $ U2(IMIN,1), LDU2 ) + END IF + END IF + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL SLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1), + $ V1T(IMIN,1), LDV1T ) + ELSE + CALL SLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1, + $ WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1), + $ V1T(1,IMIN), LDV1T ) + END IF + END IF + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL SLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q, + $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1), + $ V2T(IMIN,1), LDV2T ) + ELSE + CALL SLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1, + $ WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1), + $ V2T(1,IMIN), LDV2T ) + END IF + END IF +* +* Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) +* + IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN + B11D(IMAX) = -B11D(IMAX) + B21D(IMAX) = -B21D(IMAX) + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL SSCAL( Q, NEGONE, V1T(IMAX,1), LDV1T ) + ELSE + CALL SSCAL( Q, NEGONE, V1T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Compute THETA(IMAX) +* + X1 = COS(PHI(IMAX-1))*B11D(IMAX) + + $ SIN(PHI(IMAX-1))*B12E(IMAX-1) + Y1 = COS(PHI(IMAX-1))*B21D(IMAX) + + $ SIN(PHI(IMAX-1))*B22E(IMAX-1) +* + THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) ) +* +* Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), +* and B22(IMAX,IMAX-1) +* + IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN + B12D(IMAX) = -B12D(IMAX) + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL SSCAL( P, NEGONE, U1(1,IMAX), 1 ) + ELSE + CALL SSCAL( P, NEGONE, U1(IMAX,1), LDU1 ) + END IF + END IF + END IF + IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN + B22D(IMAX) = -B22D(IMAX) + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL SSCAL( M-P, NEGONE, U2(1,IMAX), 1 ) + ELSE + CALL SSCAL( M-P, NEGONE, U2(IMAX,1), LDU2 ) + END IF + END IF + END IF +* +* Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) +* + IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL SSCAL( M-Q, NEGONE, V2T(IMAX,1), LDV2T ) + ELSE + CALL SSCAL( M-Q, NEGONE, V2T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Test for negligible sines or cosines +* + DO I = IMIN, IMAX + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = IMIN, IMAX-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Deflate +* + IF (IMAX .GT. 1) THEN + DO WHILE( PHI(IMAX-1) .EQ. ZERO ) + IMAX = IMAX - 1 + IF (IMAX .LE. 1) EXIT + END DO + END IF + IF( IMIN .GT. IMAX - 1 ) + $ IMIN = IMAX - 1 + IF (IMIN .GT. 1) THEN + DO WHILE (PHI(IMIN-1) .NE. ZERO) + IMIN = IMIN - 1 + IF (IMIN .LE. 1) EXIT + END DO + END IF +* +* Repeat main iteration loop +* + END DO +* +* Postprocessing: order THETA from least to greatest +* + DO I = 1, Q +* + MINI = I + THETAMIN = THETA(I) + DO J = I+1, Q + IF( THETA(J) .LT. THETAMIN ) THEN + MINI = J + THETAMIN = THETA(J) + END IF + END DO +* + IF( MINI .NE. I ) THEN + THETA(MINI) = THETA(I) + THETA(I) = THETAMIN + IF( COLMAJOR ) THEN + IF( WANTU1 ) + $ CALL SSWAP( P, U1(1,I), 1, U1(1,MINI), 1 ) + IF( WANTU2 ) + $ CALL SSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 ) + IF( WANTV1T ) + $ CALL SSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T ) + IF( WANTV2T ) + $ CALL SSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1), + $ LDV2T ) + ELSE + IF( WANTU1 ) + $ CALL SSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 ) + IF( WANTU2 ) + $ CALL SSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 ) + IF( WANTV1T ) + $ CALL SSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 ) + IF( WANTV2T ) + $ CALL SSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 ) + END IF + END IF +* + END DO +* + RETURN +* +* End of SBBCSD +* + END + diff --git a/dspl/liblapack/SRC/sbdsdc.f b/dspl/liblapack/SRC/sbdsdc.f new file mode 100644 index 0000000..1c559ce --- /dev/null +++ b/dspl/liblapack/SRC/sbdsdc.f @@ -0,0 +1,524 @@ +*> \brief \b SBDSDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SBDSDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, UPLO +* INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. +* INTEGER IQ( * ), IWORK( * ) +* REAL D( * ), E( * ), Q( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SBDSDC computes the singular value decomposition (SVD) of a real +*> N-by-N (upper or lower) bidiagonal matrix B: B = U * S * VT, +*> using a divide and conquer method, where S is a diagonal matrix +*> with non-negative diagonal elements (the singular values of B), and +*> U and VT are orthogonal matrices of left and right singular vectors, +*> respectively. SBDSDC can be used to compute all singular values, +*> and optionally, singular vectors or singular vectors in compact form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See SLASD3 for details. +*> +*> The code currently calls SLASDQ if singular values only are desired. +*> However, it can be slightly modified to compute singular values +*> using the divide and conquer method. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal. +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> Specifies whether singular vectors are to be computed +*> as follows: +*> = 'N': Compute singular values only; +*> = 'P': Compute singular values and compute singular +*> vectors in compact form; +*> = 'I': Compute singular values and singular vectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the elements of E contain the offdiagonal +*> elements of the bidiagonal matrix whose SVD is desired. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,N) +*> If COMPQ = 'I', then: +*> On exit, if INFO = 0, U contains the left singular vectors +*> of the bidiagonal matrix. +*> For other values of COMPQ, U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1. +*> If singular vectors are desired, then LDU >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,N) +*> If COMPQ = 'I', then: +*> On exit, if INFO = 0, VT**T contains the right singular +*> vectors of the bidiagonal matrix. +*> For other values of COMPQ, VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1. +*> If singular vectors are desired, then LDVT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ) +*> If COMPQ = 'P', then: +*> On exit, if INFO = 0, Q and IQ contain the left +*> and right singular vectors in a compact form, +*> requiring O(N log N) space instead of 2*N**2. +*> In particular, Q contains all the REAL data in +*> LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1)))) +*> words of memory, where SMLSIZ is returned by ILAENV and +*> is equal to the maximum size of the subproblems at the +*> bottom of the computation tree (usually about 25). +*> For other values of COMPQ, Q is not referenced. +*> \endverbatim +*> +*> \param[out] IQ +*> \verbatim +*> IQ is INTEGER array, dimension (LDIQ) +*> If COMPQ = 'P', then: +*> On exit, if INFO = 0, Q and IQ contain the left +*> and right singular vectors in a compact form, +*> requiring O(N log N) space instead of 2*N**2. +*> In particular, IQ contains all INTEGER data in +*> LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1)))) +*> words of memory, where SMLSIZ is returned by ILAENV and +*> is equal to the maximum size of the subproblems at the +*> bottom of the computation tree (usually about 25). +*> For other values of COMPQ, IQ is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> If COMPQ = 'N' then LWORK >= (4 * N). +*> If COMPQ = 'P' then LWORK >= (6 * N). +*> If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value. +*> The update process of divide and conquer failed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, UPLO + INTEGER INFO, LDU, LDVT, N +* .. +* .. Array Arguments .. + INTEGER IQ( * ), IWORK( * ) + REAL D( * ), E( * ), Q( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* Changed dimension statement in comment describing E from (N) to +* (N-1). Sven, 17 Feb 05. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC, + $ ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK, + $ MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ, + $ SMLSZP, SQRE, START, WSTART, Z + REAL CS, EPS, ORGNRM, P, R, SN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL SLAMCH, SLANST, ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLARTG, SLASCL, SLASD0, SLASDA, SLASDQ, + $ SLASET, SLASR, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, ABS, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( LSAME( COMPQ, 'N' ) ) THEN + ICOMPQ = 0 + ELSE IF( LSAME( COMPQ, 'P' ) ) THEN + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ICOMPQ = 2 + ELSE + ICOMPQ = -1 + END IF + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT. + $ N ) ) ) THEN + INFO = -7 + ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT. + $ N ) ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSDC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + SMLSIZ = ILAENV( 9, 'SBDSDC', ' ', 0, 0, 0, 0 ) + IF( N.EQ.1 ) THEN + IF( ICOMPQ.EQ.1 ) THEN + Q( 1 ) = SIGN( ONE, D( 1 ) ) + Q( 1+SMLSIZ*N ) = ONE + ELSE IF( ICOMPQ.EQ.2 ) THEN + U( 1, 1 ) = SIGN( ONE, D( 1 ) ) + VT( 1, 1 ) = ONE + END IF + D( 1 ) = ABS( D( 1 ) ) + RETURN + END IF + NM1 = N - 1 +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + WSTART = 1 + QSTART = 3 + IF( ICOMPQ.EQ.1 ) THEN + CALL SCOPY( N, D, 1, Q( 1 ), 1 ) + CALL SCOPY( N-1, E, 1, Q( N+1 ), 1 ) + END IF + IF( IUPLO.EQ.2 ) THEN + QSTART = 5 + IF( ICOMPQ .EQ. 2 ) WSTART = 2*N - 1 + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ICOMPQ.EQ.1 ) THEN + Q( I+2*N ) = CS + Q( I+3*N ) = SN + ELSE IF( ICOMPQ.EQ.2 ) THEN + WORK( I ) = CS + WORK( NM1+I ) = -SN + END IF + 10 CONTINUE + END IF +* +* If ICOMPQ = 0, use SLASDQ to compute the singular values. +* + IF( ICOMPQ.EQ.0 ) THEN +* Ignore WSTART, instead using WORK( 1 ), since the two vectors +* for CS and -SN above are added only if ICOMPQ == 2, +* and adding them exceeds documented WORK size of 4*n. + CALL SLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( 1 ), INFO ) + GO TO 40 + END IF +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK( WSTART ), INFO ) + ELSE IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = IU + N + CALL SLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ), + $ N ) + CALL SLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ), + $ N ) + CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, + $ Q( IVT+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, + $ Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ), + $ INFO ) + END IF + GO TO 40 + END IF +* + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU ) + CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT ) + END IF +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ RETURN + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR ) +* + EPS = SLAMCH( 'Epsilon' ) +* + MLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 + SMLSZP = SMLSIZ + 1 +* + IF( ICOMPQ.EQ.1 ) THEN + IU = 1 + IVT = 1 + SMLSIZ + DIFL = IVT + SMLSZP + DIFR = DIFL + MLVL + Z = DIFR + MLVL*2 + IC = Z + MLVL + IS = IC + 1 + POLES = IS + 1 + GIVNUM = POLES + 2*MLVL +* + K = 1 + GIVPTR = 2 + PERM = 3 + GIVCOL = PERM + MLVL + END IF +* + DO 20 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 20 CONTINUE +* + START = 1 + SQRE = 0 +* + DO 30 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - START + 1 + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - START + 1 + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N). Solve this 1-by-1 problem +* first. +* + NSIZE = I - START + 1 + IF( ICOMPQ.EQ.2 ) THEN + U( N, N ) = SIGN( ONE, D( N ) ) + VT( N, N ) = ONE + ELSE IF( ICOMPQ.EQ.1 ) THEN + Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) ) + Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE + END IF + D( N ) = ABS( D( N ) ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL SLASD0( NSIZE, SQRE, D( START ), E( START ), + $ U( START, START ), LDU, VT( START, START ), + $ LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO ) + ELSE + CALL SLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ), + $ E( START ), Q( START+( IU+QSTART-2 )*N ), N, + $ Q( START+( IVT+QSTART-2 )*N ), + $ IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )* + $ N ), Q( START+( DIFR+QSTART-2 )*N ), + $ Q( START+( Z+QSTART-2 )*N ), + $ Q( START+( POLES+QSTART-2 )*N ), + $ IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ), + $ N, IQ( START+PERM*N ), + $ Q( START+( GIVNUM+QSTART-2 )*N ), + $ Q( START+( IC+QSTART-2 )*N ), + $ Q( START+( IS+QSTART-2 )*N ), + $ WORK( WSTART ), IWORK, INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + START = I + 1 + END IF + 30 CONTINUE +* +* Unscale +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR ) + 40 CONTINUE +* +* Use Selection Sort to minimize swaps of singular vectors +* + DO 60 II = 2, N + I = II - 1 + KK = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).GT.P ) THEN + KK = J + P = D( J ) + END IF + 50 CONTINUE + IF( KK.NE.I ) THEN + D( KK ) = D( I ) + D( I ) = P + IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = KK + ELSE IF( ICOMPQ.EQ.2 ) THEN + CALL SSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 ) + CALL SSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT ) + END IF + ELSE IF( ICOMPQ.EQ.1 ) THEN + IQ( I ) = I + END IF + 60 CONTINUE +* +* If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO +* + IF( ICOMPQ.EQ.1 ) THEN + IF( IUPLO.EQ.1 ) THEN + IQ( N ) = 1 + ELSE + IQ( N ) = 0 + END IF + END IF +* +* If B is lower bidiagonal, update U by those Givens rotations +* which rotated B to be upper bidiagonal +* + IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) ) + $ CALL SLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU ) +* + RETURN +* +* End of SBDSDC +* + END diff --git a/dspl/liblapack/SRC/sbdsqr.f b/dspl/liblapack/SRC/sbdsqr.f new file mode 100644 index 0000000..b0067f6 --- /dev/null +++ b/dspl/liblapack/SRC/sbdsqr.f @@ -0,0 +1,866 @@ +*> \brief \b SBDSQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SBDSQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, +* LDU, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SBDSQR computes the singular values and, optionally, the right and/or +*> left singular vectors from the singular value decomposition (SVD) of +*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +*> zero-shift QR algorithm. The SVD of B has the form +*> +*> B = Q * S * P**T +*> +*> where S is the diagonal matrix of singular values, Q is an orthogonal +*> matrix of left singular vectors, and P is an orthogonal matrix of +*> right singular vectors. If left singular vectors are requested, this +*> subroutine actually returns U*Q instead of Q, and, if right singular +*> vectors are requested, this subroutine returns P**T*VT instead of +*> P**T, for given real input matrices U and VT. When U and VT are the +*> orthogonal matrices that reduce a general matrix A to bidiagonal +*> form: A = U*B*VT, as computed by SGEBRD, then +*> +*> A = (U*Q) * S * (P**T*VT) +*> +*> is the SVD of A. Optionally, the subroutine may also compute Q**T*C +*> for a given real input matrix C. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +*> no. 5, pp. 873-912, Sept 1990) and +*> "Accurate singular values and differential qd algorithms," by +*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +*> Department, University of California at Berkeley, July 1992 +*> for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> The number of columns of the matrix VT. NCVT >= 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> The number of rows of the matrix U. NRU >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B in decreasing +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the N-1 offdiagonal elements of the bidiagonal +*> matrix B. +*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +*> will contain the diagonal and superdiagonal elements of a +*> bidiagonal matrix orthogonally equivalent to the one given +*> as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT, NCVT) +*> On entry, an N-by-NCVT matrix VT. +*> On exit, VT is overwritten by P**T * VT. +*> Not referenced if NCVT = 0. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. +*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is REAL array, dimension (LDU, N) +*> On entry, an NRU-by-N matrix U. +*> On exit, U is overwritten by U * Q. +*> Not referenced if NRU = 0. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,NRU). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, NCC) +*> On entry, an N-by-NCC matrix C. +*> On exit, C is overwritten by Q**T * C. +*> Not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0: +*> if NCVT = NRU = NCC = 0, +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 30*N +*> iterations (in inner while loop) +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> else NCVT = NRU = NCC = 0, +*> the algorithm did not converge; D and E contain the +*> elements of a bidiagonal matrix which is orthogonally +*> similar to the input matrix B; if INFO = i, i +*> elements of E have not converged to zero. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL REAL, default = max(10,min(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> If it is positive, TOLMUL*EPS is the desired relative +*> precision in the computed singular values. +*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the +*> desired absolute accuracy in the computed singular +*> values (corresponds to relative accuracy +*> abs(TOLMUL*EPS) in the largest singular value. +*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably +*> between 10 (for fast convergence) and .1/EPS +*> (for there to be some accuracy in the results). +*> Default is to lose at either one eighth or 2 of the +*> available decimal digits in each computed singular value +*> (whichever is smaller). +*> +*> MAXITR INTEGER, default = 6 +*> MAXITR controls the maximum number of passes of the +*> algorithm through its inner loop. The algorithms stops +*> (and so fails to converge) if the number of passes +*> through the inner loop exceeds MAXITR*N**2. +*> \endverbatim +* +*> \par Note: +* =========== +*> +*> \verbatim +*> Bug report from Cezary Dendek. +*> On March 23rd 2017, the INTEGER variable MAXIT = MAXITR*N**2 is +*> removed since it can overflow pretty easily (for N larger or equal +*> than 18,919). We instead use MAXITDIVN = MAXITR*N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL NEGONE + PARAMETER ( NEGONE = -1.0E0 ) + REAL HNDRTH + PARAMETER ( HNDRTH = 0.01E0 ) + REAL TEN + PARAMETER ( TEN = 10.0E0 ) + REAL HNDRD + PARAMETER ( HNDRD = 100.0E0 ) + REAL MEIGTH + PARAMETER ( MEIGTH = -0.125E0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, ITERDIVN, J, LL, LLL, M, + $ MAXITDIVN, NM1, NM12, NM13, OLDLL, OLDM + REAL ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLAS2, SLASQ1, SLASR, SLASV2, SROT, + $ SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL SLASQ1( N, D, E, WORK, INFO ) +* +* If INFO equals 2, dqds didn't finish, try to finish +* + IF( INFO .NE. 2 ) RETURN + INFO = 0 + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = SLAMCH( 'Epsilon' ) + UNFL = SLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + WORK( I ) = CS + WORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U, + $ LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C, + $ LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( REAL( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*(N*(N*UNFL)) ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*(N*(N*UNFL)) ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXITDIVN = MAXITR*N + ITERDIVN = 0 + ITER = -1 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 +* + IF( ITER.GE.N ) THEN + ITER = ITER - N + ITERDIVN = ITERDIVN + 1 + IF( ITERDIVN.GE.MAXITDIVN ) + $ GO TO 200 + END IF +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR, + $ SINR ) + IF( NRU.GT.0 ) + $ CALL SROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL SROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL SLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL+1 ) = CS + WORK( I-LL+1+NM1 ) = SN + WORK( I-LL+1+NM12 ) = OLDCS + WORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + WORK( I-LL ) = CS + WORK( I-LL+NM1 ) = -SN + WORK( I-LL+NM12 ) = OLDCS + WORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + WORK( I-LL+1 ) = COSR + WORK( I-LL+1+NM1 ) = SINR + WORK( I-LL+1+NM12 ) = COSL + WORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ), + $ WORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ), + $ WORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ), + $ WORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL SLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL SLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + WORK( I-LL ) = COSR + WORK( I-LL+NM1 ) = -SINR + WORK( I-LL+NM12 ) = COSL + WORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ), + $ WORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ), + $ WORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ), + $ WORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL SSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of SBDSQR +* + END diff --git a/dspl/liblapack/SRC/sbdsvdx.f b/dspl/liblapack/SRC/sbdsvdx.f new file mode 100644 index 0000000..a4b1887 --- /dev/null +++ b/dspl/liblapack/SRC/sbdsvdx.f @@ -0,0 +1,792 @@ +*> \brief \b SBDSVDX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SBDSVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* $ NS, S, Z, LDZ, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, N, NS +* REAL VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), S( * ), WORK( * ), +* Z( LDZ, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SBDSVDX computes the singular value decomposition (SVD) of a real +*> N-by-N (upper or lower) bidiagonal matrix B, B = U * S * VT, +*> where S is a diagonal matrix with non-negative diagonal elements +*> (the singular values of B), and U and VT are orthogonal matrices +*> of left and right singular vectors, respectively. +*> +*> Given an upper bidiagonal B with diagonal D = [ d_1 d_2 ... d_N ] +*> and superdiagonal E = [ e_1 e_2 ... e_N-1 ], SBDSVDX computes the +*> singular value decompositon of B through the eigenvalues and +*> eigenvectors of the N*2-by-N*2 tridiagonal matrix +*> +*> | 0 d_1 | +*> | d_1 0 e_1 | +*> TGK = | e_1 0 d_2 | +*> | d_2 . . | +*> | . . . | +*> +*> If (s,u,v) is a singular triplet of B with ||u|| = ||v|| = 1, then +*> (+/-s,q), ||q|| = 1, are eigenpairs of TGK, with q = P * ( u' +/-v' ) / +*> sqrt(2) = ( v_1 u_1 v_2 u_2 ... v_n u_n ) / sqrt(2), and +*> P = [ e_{n+1} e_{1} e_{n+2} e_{2} ... ]. +*> +*> Given a TGK matrix, one can either a) compute -s,-v and change signs +*> so that the singular values (and corresponding vectors) are already in +*> descending order (as in SGESVD/SGESDD) or b) compute s,v and reorder +*> the values (and corresponding vectors). SBDSVDX implements a) by +*> calling SSTEVX (bisection plus inverse iteration, to be replaced +*> with a version of the Multiple Relative Robust Representation +*> algorithm. (See P. Willems and B. Lang, A framework for the MR^3 +*> algorithm: theory and implementation, SIAM J. Sci. Comput., +*> 35:740-766, 2013.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute singular values only; +*> = 'V': Compute singular values and singular vectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval [VL,VU) +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (max(1,N-1)) +*> The (n-1) superdiagonal elements of the bidiagonal matrix +*> B in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found. 0 <= NS <= N. +*> If RANGE = 'A', NS = N, and if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The first NS elements contain the selected singular values in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (2*N,K) ) +*> If JOBZ = 'V', then if INFO = 0 the first NS columns of Z +*> contain the singular vectors of the matrix B corresponding to +*> the selected singular values, with U in rows 1 to N and V +*> in rows N+1 to N*2, i.e. +*> Z = [ U ] +*> [ V ] +*> If JOBZ = 'N', then Z is not referenced. +*> Note: The user must ensure that at least K = NS+1 columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of +*> NS is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(2,N*2). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (14*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*N) +*> If JOBZ = 'V', then if INFO = 0, the first NS elements of +*> IWORK are zero. If INFO > 0, then IWORK contains the indices +*> of the eigenvectors that failed to converge in DSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in SSTEVX. The indices of the eigenvectors +*> (as returned by SSTEVX) are stored in the +*> array IWORK. +*> if INFO = N*2 + 1, an internal error occurred. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SBDSVDX( UPLO, JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ NS, S, Z, LDZ, WORK, IWORK, INFO) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, N, NS + REAL VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), S( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TEN, HNDRD, MEIGTH + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TEN = 10.0E0, + $ HNDRD = 100.0E0, MEIGTH = -0.1250E0 ) + REAL FUDGE + PARAMETER ( FUDGE = 2.0E0 ) +* .. +* .. Local Scalars .. + CHARACTER RNGVX + LOGICAL ALLSV, INDSV, LOWER, SPLIT, SVEQ0, VALSV, WANTZ + INTEGER I, ICOLZ, IDBEG, IDEND, IDTGK, IDPTR, IEPTR, + $ IETGK, IIFAIL, IIWORK, ILTGK, IROWU, IROWV, + $ IROWZ, ISBEG, ISPLT, ITEMP, IUTGK, J, K, + $ NTGK, NRU, NRV, NSL + REAL ABSTOL, EPS, EMIN, MU, NRMU, NRMV, ORTOL, SMAX, + $ SMIN, SQRT2, THRESH, TOL, ULP, + $ VLTGK, VUTGK, ZJTJI +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH, SNRM2 + EXTERNAL ISAMAX, LSAME, SAXPY, SDOT, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASET, SSCAL, SSWAP, SSTEVX, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + ALLSV = LSAME( RANGE, 'A' ) + VALSV = LSAME( RANGE, 'V' ) + INDSV = LSAME( RANGE, 'I' ) + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLSV .OR. VALSV .OR. INDSV ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N.GT.0 ) THEN + IF( VALSV ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -7 + ELSE IF( VU.LE.VL ) THEN + INFO = -8 + END IF + ELSE IF( INDSV ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N*2 ) ) INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SBDSVDX', -INFO ) + RETURN + END IF +* +* Quick return if possible (N.LE.1) +* + NS = 0 + IF( N.EQ.0 ) RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLSV .OR. INDSV ) THEN + NS = 1 + S( 1 ) = ABS( D( 1 ) ) + ELSE + IF( VL.LT.ABS( D( 1 ) ) .AND. VU.GE.ABS( D( 1 ) ) ) THEN + NS = 1 + S( 1 ) = ABS( D( 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = SIGN( ONE, D( 1 ) ) + Z( 2, 1 ) = ONE + END IF + RETURN + END IF +* + ABSTOL = 2*SLAMCH( 'Safe Minimum' ) + ULP = SLAMCH( 'Precision' ) + EPS = SLAMCH( 'Epsilon' ) + SQRT2 = SQRT( 2.0E0 ) + ORTOL = SQRT( ULP ) +* +* Criterion for splitting is taken from SBDSQR when singular +* values are computed to relative accuracy TOL. (See J. Demmel and +* W. Kahan, Accurate singular values of bidiagonal matrices, SIAM +* J. Sci. and Stat. Comput., 11:873–912, 1990.) +* + TOL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )*EPS +* +* Compute approximate maximum, minimum singular values. +* + I = ISAMAX( N, D, 1 ) + SMAX = ABS( D( I ) ) + I = ISAMAX( N-1, E, 1 ) + SMAX = MAX( SMAX, ABS( E( I ) ) ) +* +* Compute threshold for neglecting D's and E's. +* + SMIN = ABS( D( 1 ) ) + IF( SMIN.NE.ZERO ) THEN + MU = SMIN + DO I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMIN = MIN( SMIN, MU ) + IF( SMIN.EQ.ZERO ) EXIT + END DO + END IF + SMIN = SMIN / SQRT( REAL( N ) ) + THRESH = TOL*SMIN +* +* Check for zeros in D and E (splits), i.e. submatrices. +* + DO I = 1, N-1 + IF( ABS( D( I ) ).LE.THRESH ) D( I ) = ZERO + IF( ABS( E( I ) ).LE.THRESH ) E( I ) = ZERO + END DO + IF( ABS( D( N ) ).LE.THRESH ) D( N ) = ZERO +* +* Pointers for arrays used by SSTEVX. +* + IDTGK = 1 + IETGK = IDTGK + N*2 + ITEMP = IETGK + N*2 + IIFAIL = 1 + IIWORK = IIFAIL + N*2 +* +* Set RNGVX, which corresponds to RANGE for SSTEVX in TGK mode. +* VL,VU or IL,IU are redefined to conform to implementation a) +* described in the leading comments. +* + ILTGK = 0 + IUTGK = 0 + VLTGK = ZERO + VUTGK = ZERO +* + IF( ALLSV ) THEN +* +* All singular values will be found. We aim at -s (see +* leading comments) with RNGVX = 'I'. IL and IU are set +* later (as ILTGK and IUTGK) according to the dimension +* of the active submatrix. +* + RNGVX = 'I' + IF( WANTZ ) CALL SLASET( 'F', N*2, N+1, ZERO, ZERO, Z, LDZ ) + ELSE IF( VALSV ) THEN +* +* Find singular values in a half-open interval. We aim +* at -s (see leading comments) and we swap VL and VU +* (as VUTGK and VLTGK), changing their signs. +* + RNGVX = 'V' + VLTGK = -VU + VUTGK = -VL + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL SCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL SSTEVX( 'N', 'V', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VLTGK, VUTGK, ILTGK, ILTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + IF( NS.EQ.0 ) THEN + RETURN + ELSE + IF( WANTZ ) CALL SLASET( 'F', N*2, NS, ZERO, ZERO, Z, LDZ ) + END IF + ELSE IF( INDSV ) THEN +* +* Find the IL-th through the IU-th singular values. We aim +* at -s (see leading comments) and indices are mapped into +* values, therefore mimicking SSTEBZ, where +* +* GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN +* GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* + ILTGK = IL + IUTGK = IU + RNGVX = 'V' + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL SCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL SSTEVX( 'N', 'I', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VLTGK, VLTGK, ILTGK, ILTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + VLTGK = S( 1 ) - FUDGE*SMAX*ULP*N + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL SCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) + CALL SSTEVX( 'N', 'I', N*2, WORK( IDTGK ), WORK( IETGK ), + $ VUTGK, VUTGK, IUTGK, IUTGK, ABSTOL, NS, S, + $ Z, LDZ, WORK( ITEMP ), IWORK( IIWORK ), + $ IWORK( IIFAIL ), INFO ) + VUTGK = S( 1 ) + FUDGE*SMAX*ULP*N + VUTGK = MIN( VUTGK, ZERO ) +* +* If VLTGK=VUTGK, SSTEVX returns an error message, +* so if needed we change VUTGK slightly. +* + IF( VLTGK.EQ.VUTGK ) VLTGK = VLTGK - TOL +* + IF( WANTZ ) CALL SLASET( 'F', N*2, IU-IL+1, ZERO, ZERO, Z, LDZ) + END IF +* +* Initialize variables and pointers for S, Z, and WORK. +* +* NRU, NRV: number of rows in U and V for the active submatrix +* IDBEG, ISBEG: offsets for the entries of D and S +* IROWZ, ICOLZ: offsets for the rows and columns of Z +* IROWU, IROWV: offsets for the rows of U and V +* + NS = 0 + NRU = 0 + NRV = 0 + IDBEG = 1 + ISBEG = 1 + IROWZ = 1 + ICOLZ = 1 + IROWU = 2 + IROWV = 1 + SPLIT = .FALSE. + SVEQ0 = .FALSE. +* +* Form the tridiagonal TGK matrix. +* + S( 1:N ) = ZERO + WORK( IETGK+2*N-1 ) = ZERO + WORK( IDTGK:IDTGK+2*N-1 ) = ZERO + CALL SCOPY( N, D, 1, WORK( IETGK ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( IETGK+1 ), 2 ) +* +* +* Check for splits in two levels, outer level +* in E and inner level in D. +* + DO IEPTR = 2, N*2, 2 + IF( WORK( IETGK+IEPTR-1 ).EQ.ZERO ) THEN +* +* Split in E (this piece of B is square) or bottom +* of the (input bidiagonal) matrix. +* + ISPLT = IDBEG + IDEND = IEPTR - 1 + DO IDPTR = IDBEG, IDEND, 2 + IF( WORK( IETGK+IDPTR-1 ).EQ.ZERO ) THEN +* +* Split in D (rectangular submatrix). Set the number +* of rows in U and V (NRU and NRV) accordingly. +* + IF( IDPTR.EQ.IDBEG ) THEN +* +* D=0 at the top. +* + SVEQ0 = .TRUE. + IF( IDBEG.EQ.IDEND) THEN + NRU = 1 + NRV = 1 + END IF + ELSE IF( IDPTR.EQ.IDEND ) THEN +* +* D=0 at the bottom. +* + SVEQ0 = .TRUE. + NRU = (IDEND-ISPLT)/2 + 1 + NRV = NRU + IF( ISPLT.NE.IDBEG ) THEN + NRU = NRU + 1 + END IF + ELSE + IF( ISPLT.EQ.IDBEG ) THEN +* +* Split: top rectangular submatrix. +* + NRU = (IDPTR-IDBEG)/2 + NRV = NRU + 1 + ELSE +* +* Split: middle square submatrix. +* + NRU = (IDPTR-ISPLT)/2 + 1 + NRV = NRU + END IF + END IF + ELSE IF( IDPTR.EQ.IDEND ) THEN +* +* Last entry of D in the active submatrix. +* + IF( ISPLT.EQ.IDBEG ) THEN +* +* No split (trivial case). +* + NRU = (IDEND-IDBEG)/2 + 1 + NRV = NRU + ELSE +* +* Split: bottom rectangular submatrix. +* + NRV = (IDEND-ISPLT)/2 + 1 + NRU = NRV + 1 + END IF + END IF +* + NTGK = NRU + NRV +* + IF( NTGK.GT.0 ) THEN +* +* Compute eigenvalues/vectors of the active +* submatrix according to RANGE: +* if RANGE='A' (ALLSV) then RNGVX = 'I' +* if RANGE='V' (VALSV) then RNGVX = 'V' +* if RANGE='I' (INDSV) then RNGVX = 'V' +* + ILTGK = 1 + IUTGK = NTGK / 2 + IF( ALLSV .OR. VUTGK.EQ.ZERO ) THEN + IF( SVEQ0 .OR. + $ SMIN.LT.EPS .OR. + $ MOD(NTGK,2).GT.0 ) THEN +* Special case: eigenvalue equal to zero or very +* small, additional eigenvector is needed. + IUTGK = IUTGK + 1 + END IF + END IF +* +* Workspace needed by SSTEVX: +* WORK( ITEMP: ): 2*5*NTGK +* IWORK( 1: ): 2*6*NTGK +* + CALL SSTEVX( JOBZ, RNGVX, NTGK, WORK( IDTGK+ISPLT-1 ), + $ WORK( IETGK+ISPLT-1 ), VLTGK, VUTGK, + $ ILTGK, IUTGK, ABSTOL, NSL, S( ISBEG ), + $ Z( IROWZ,ICOLZ ), LDZ, WORK( ITEMP ), + $ IWORK( IIWORK ), IWORK( IIFAIL ), + $ INFO ) + IF( INFO.NE.0 ) THEN +* Exit with the error code from SSTEVX. + RETURN + END IF + EMIN = ABS( MAXVAL( S( ISBEG:ISBEG+NSL-1 ) ) ) +* + IF( NSL.GT.0 .AND. WANTZ ) THEN +* +* Normalize u=Z([2,4,...],:) and v=Z([1,3,...],:), +* changing the sign of v as discussed in the leading +* comments. The norms of u and v may be (slightly) +* different from 1/sqrt(2) if the corresponding +* eigenvalues are very small or too close. We check +* those norms and, if needed, reorthogonalize the +* vectors. +* + IF( NSL.GT.1 .AND. + $ VUTGK.EQ.ZERO .AND. + $ MOD(NTGK,2).EQ.0 .AND. + $ EMIN.EQ.0 .AND. .NOT.SPLIT ) THEN +* +* D=0 at the top or bottom of the active submatrix: +* one eigenvalue is equal to zero; concatenate the +* eigenvectors corresponding to the two smallest +* eigenvalues. +* + Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) = + $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-2 ) + + $ Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) + Z( IROWZ:IROWZ+NTGK-1,ICOLZ+NSL-1 ) = + $ ZERO +* IF( IUTGK*2.GT.NTGK ) THEN +* Eigenvalue equal to zero or very small. +* NSL = NSL - 1 +* END IF + END IF +* + DO I = 0, MIN( NSL-1, NRU-1 ) + NRMU = SNRM2( NRU, Z( IROWU, ICOLZ+I ), 2 ) + IF( NRMU.EQ.ZERO ) THEN + INFO = N*2 + 1 + RETURN + END IF + CALL SSCAL( NRU, ONE/NRMU, + $ Z( IROWU,ICOLZ+I ), 2 ) + IF( NRMU.NE.ONE .AND. + $ ABS( NRMU-ORTOL )*SQRT2.GT.ONE ) + $ THEN + DO J = 0, I-1 + ZJTJI = -SDOT( NRU, Z( IROWU, ICOLZ+J ), + $ 2, Z( IROWU, ICOLZ+I ), 2 ) + CALL SAXPY( NRU, ZJTJI, + $ Z( IROWU, ICOLZ+J ), 2, + $ Z( IROWU, ICOLZ+I ), 2 ) + END DO + NRMU = SNRM2( NRU, Z( IROWU, ICOLZ+I ), 2 ) + CALL SSCAL( NRU, ONE/NRMU, + $ Z( IROWU,ICOLZ+I ), 2 ) + END IF + END DO + DO I = 0, MIN( NSL-1, NRV-1 ) + NRMV = SNRM2( NRV, Z( IROWV, ICOLZ+I ), 2 ) + IF( NRMV.EQ.ZERO ) THEN + INFO = N*2 + 1 + RETURN + END IF + CALL SSCAL( NRV, -ONE/NRMV, + $ Z( IROWV,ICOLZ+I ), 2 ) + IF( NRMV.NE.ONE .AND. + $ ABS( NRMV-ORTOL )*SQRT2.GT.ONE ) + $ THEN + DO J = 0, I-1 + ZJTJI = -SDOT( NRV, Z( IROWV, ICOLZ+J ), + $ 2, Z( IROWV, ICOLZ+I ), 2 ) + CALL SAXPY( NRU, ZJTJI, + $ Z( IROWV, ICOLZ+J ), 2, + $ Z( IROWV, ICOLZ+I ), 2 ) + END DO + NRMV = SNRM2( NRV, Z( IROWV, ICOLZ+I ), 2 ) + CALL SSCAL( NRV, ONE/NRMV, + $ Z( IROWV,ICOLZ+I ), 2 ) + END IF + END DO + IF( VUTGK.EQ.ZERO .AND. + $ IDPTR.LT.IDEND .AND. + $ MOD(NTGK,2).GT.0 ) THEN +* +* D=0 in the middle of the active submatrix (one +* eigenvalue is equal to zero): save the corresponding +* eigenvector for later use (when bottom of the +* active submatrix is reached). +* + SPLIT = .TRUE. + Z( IROWZ:IROWZ+NTGK-1,N+1 ) = + $ Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) + Z( IROWZ:IROWZ+NTGK-1,NS+NSL ) = + $ ZERO + END IF + END IF !** WANTZ **! +* + NSL = MIN( NSL, NRU ) + SVEQ0 = .FALSE. +* +* Absolute values of the eigenvalues of TGK. +* + DO I = 0, NSL-1 + S( ISBEG+I ) = ABS( S( ISBEG+I ) ) + END DO +* +* Update pointers for TGK, S and Z. +* + ISBEG = ISBEG + NSL + IROWZ = IROWZ + NTGK + ICOLZ = ICOLZ + NSL + IROWU = IROWZ + IROWV = IROWZ + 1 + ISPLT = IDPTR + 1 + NS = NS + NSL + NRU = 0 + NRV = 0 + END IF !** NTGK.GT.0 **! + IF( IROWZ.LT.N*2 .AND. WANTZ ) THEN + Z( 1:IROWZ-1, ICOLZ ) = ZERO + END IF + END DO !** IDPTR loop **! + IF( SPLIT .AND. WANTZ ) THEN +* +* Bring back eigenvector corresponding +* to eigenvalue equal to zero. +* + Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) = + $ Z( IDBEG:IDEND-NTGK+1,ISBEG-1 ) + + $ Z( IDBEG:IDEND-NTGK+1,N+1 ) + Z( IDBEG:IDEND-NTGK+1,N+1 ) = 0 + END IF + IROWV = IROWV - 1 + IROWU = IROWU + 1 + IDBEG = IEPTR + 1 + SVEQ0 = .FALSE. + SPLIT = .FALSE. + END IF !** Check for split in E **! + END DO !** IEPTR loop **! +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO I = 1, NS-1 + K = 1 + SMIN = S( 1 ) + DO J = 2, NS + 1 - I + IF( S( J ).LE.SMIN ) THEN + K = J + SMIN = S( J ) + END IF + END DO + IF( K.NE.NS+1-I ) THEN + S( K ) = S( NS+1-I ) + S( NS+1-I ) = SMIN + IF( WANTZ ) CALL SSWAP( N*2, Z( 1,K ), 1, Z( 1,NS+1-I ), 1 ) + END IF + END DO +* +* If RANGE=I, check for singular values/vectors to be discarded. +* + IF( INDSV ) THEN + K = IU - IL + 1 + IF( K.LT.NS ) THEN + S( K+1:NS ) = ZERO + IF( WANTZ ) Z( 1:N*2,K+1:NS ) = ZERO + NS = K + END IF + END IF +* +* Reorder Z: U = Z( 1:N,1:NS ), V = Z( N+1:N*2,1:NS ). +* If B is a lower diagonal, swap U and V. +* + IF( WANTZ ) THEN + DO I = 1, NS + CALL SCOPY( N*2, Z( 1,I ), 1, WORK, 1 ) + IF( LOWER ) THEN + CALL SCOPY( N, WORK( 2 ), 2, Z( N+1,I ), 1 ) + CALL SCOPY( N, WORK( 1 ), 2, Z( 1 ,I ), 1 ) + ELSE + CALL SCOPY( N, WORK( 2 ), 2, Z( 1 ,I ), 1 ) + CALL SCOPY( N, WORK( 1 ), 2, Z( N+1,I ), 1 ) + END IF + END DO + END IF +* + RETURN +* +* End of SBDSVDX +* + END diff --git a/dspl/liblapack/SRC/scsum1.f b/dspl/liblapack/SRC/scsum1.f new file mode 100644 index 0000000..7fa0319 --- /dev/null +++ b/dspl/liblapack/SRC/scsum1.f @@ -0,0 +1,140 @@ +*> \brief \b SCSUM1 forms the 1-norm of the complex vector using the true absolute value. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SCSUM1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SCSUM1( N, CX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX CX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SCSUM1 takes the sum of the absolute values of a complex +*> vector and returns a single precision result. +*> +*> Based on SCASUM from the Level 1 BLAS. +*> The change is to use the 'genuine' absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vector CX. +*> \endverbatim +*> +*> \param[in] CX +*> \verbatim +*> CX is COMPLEX array, dimension (N) +*> The vector whose elements will be summed. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive values of CX. INCX > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complexOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham for use with CLACON. +* +* ===================================================================== + REAL FUNCTION SCSUM1( N, CX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX CX( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, NINCX + REAL STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + SCSUM1 = 0.0E0 + STEMP = 0.0E0 + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 ) + $ GO TO 20 +* +* CODE FOR INCREMENT NOT EQUAL TO 1 +* + NINCX = N*INCX + DO 10 I = 1, NINCX, INCX +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 10 CONTINUE + SCSUM1 = STEMP + RETURN +* +* CODE FOR INCREMENT EQUAL TO 1 +* + 20 CONTINUE + DO 30 I = 1, N +* +* NEXT LINE MODIFIED. +* + STEMP = STEMP + ABS( CX( I ) ) + 30 CONTINUE + SCSUM1 = STEMP + RETURN +* +* End of SCSUM1 +* + END diff --git a/dspl/liblapack/SRC/sdisna.f b/dspl/liblapack/SRC/sdisna.f new file mode 100644 index 0000000..b034033 --- /dev/null +++ b/dspl/liblapack/SRC/sdisna.f @@ -0,0 +1,245 @@ +*> \brief \b SDISNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SDISNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SDISNA( JOB, M, N, D, SEP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER INFO, M, N +* .. +* .. Array Arguments .. +* REAL D( * ), SEP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SDISNA computes the reciprocal condition numbers for the eigenvectors +*> of a real symmetric or complex Hermitian matrix or for the left or +*> right singular vectors of a general m-by-n matrix. The reciprocal +*> condition number is the 'gap' between the corresponding eigenvalue or +*> singular value and the nearest other one. +*> +*> The bound on the error, measured by angle in radians, in the I-th +*> computed vector is given by +*> +*> SLAMCH( 'E' ) * ( ANORM / SEP( I ) ) +*> +*> where ANORM = 2-norm(A) = max( abs( D(j) ) ). SEP(I) is not allowed +*> to be smaller than SLAMCH( 'E' )*ANORM in order to limit the size of +*> the error bound. +*> +*> SDISNA may also be used to compute error bounds for eigenvectors of +*> the generalized symmetric definite eigenproblem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies for which problem the reciprocal condition numbers +*> should be computed: +*> = 'E': the eigenvectors of a symmetric/Hermitian matrix; +*> = 'L': the left singular vectors of a general matrix; +*> = 'R': the right singular vectors of a general matrix. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> If JOB = 'L' or 'R', the number of columns of the matrix, +*> in which case N >= 0. Ignored if JOB = 'E'. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (M) if JOB = 'E' +*> dimension (min(M,N)) if JOB = 'L' or 'R' +*> The eigenvalues (if JOB = 'E') or singular values (if JOB = +*> 'L' or 'R') of the matrix, in either increasing or decreasing +*> order. If singular values, they must be non-negative. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is REAL array, dimension (M) if JOB = 'E' +*> dimension (min(M,N)) if JOB = 'L' or 'R' +*> The reciprocal condition numbers of the vectors. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SDISNA( JOB, M, N, D, SEP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER INFO, M, N +* .. +* .. Array Arguments .. + REAL D( * ), SEP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DECR, EIGEN, INCR, LEFT, RIGHT, SING + INTEGER I, K + REAL ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + EIGEN = LSAME( JOB, 'E' ) + LEFT = LSAME( JOB, 'L' ) + RIGHT = LSAME( JOB, 'R' ) + SING = LEFT .OR. RIGHT + IF( EIGEN ) THEN + K = M + ELSE IF( SING ) THEN + K = MIN( M, N ) + END IF + IF( .NOT.EIGEN .AND. .NOT.SING ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( K.LT.0 ) THEN + INFO = -3 + ELSE + INCR = .TRUE. + DECR = .TRUE. + DO 10 I = 1, K - 1 + IF( INCR ) + $ INCR = INCR .AND. D( I ).LE.D( I+1 ) + IF( DECR ) + $ DECR = DECR .AND. D( I ).GE.D( I+1 ) + 10 CONTINUE + IF( SING .AND. K.GT.0 ) THEN + IF( INCR ) + $ INCR = INCR .AND. ZERO.LE.D( 1 ) + IF( DECR ) + $ DECR = DECR .AND. D( K ).GE.ZERO + END IF + IF( .NOT.( INCR .OR. DECR ) ) + $ INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SDISNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Compute reciprocal condition numbers +* + IF( K.EQ.1 ) THEN + SEP( 1 ) = SLAMCH( 'O' ) + ELSE + OLDGAP = ABS( D( 2 )-D( 1 ) ) + SEP( 1 ) = OLDGAP + DO 20 I = 2, K - 1 + NEWGAP = ABS( D( I+1 )-D( I ) ) + SEP( I ) = MIN( OLDGAP, NEWGAP ) + OLDGAP = NEWGAP + 20 CONTINUE + SEP( K ) = OLDGAP + END IF + IF( SING ) THEN + IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN + IF( INCR ) + $ SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) ) + IF( DECR ) + $ SEP( K ) = MIN( SEP( K ), D( K ) ) + END IF + END IF +* +* Ensure that reciprocal condition numbers are not less than +* threshold, in order to limit the size of the error bound +* + EPS = SLAMCH( 'E' ) + SAFMIN = SLAMCH( 'S' ) + ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) ) + IF( ANORM.EQ.ZERO ) THEN + THRESH = EPS + ELSE + THRESH = MAX( EPS*ANORM, SAFMIN ) + END IF + DO 30 I = 1, K + SEP( I ) = MAX( SEP( I ), THRESH ) + 30 CONTINUE +* + RETURN +* +* End of SDISNA +* + END diff --git a/dspl/liblapack/SRC/sgbbrd.f b/dspl/liblapack/SRC/sgbbrd.f new file mode 100644 index 0000000..4711c80 --- /dev/null +++ b/dspl/liblapack/SRC/sgbbrd.f @@ -0,0 +1,547 @@ +*> \brief \b SGBBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, +* LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), +* $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBBRD reduces a real general m-by-n band matrix A to upper +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> The routine computes B, and optionally forms Q or P**T, or computes +*> Q**T*C for a given matrix C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether or not the matrices Q and P**T are to be +*> formed. +*> = 'N': do not form Q or P**T; +*> = 'Q': form Q only; +*> = 'P': form P**T only; +*> = 'B': form both. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the m-by-n band matrix A, stored in rows 1 to +*> KL+KU+1. The j-th column of A is stored in the j-th column of +*> the array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> On exit, A is overwritten by values generated during the +*> reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The superdiagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,M) +*> If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q. +*> If VECT = 'N' or 'P', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] PT +*> \verbatim +*> PT is REAL array, dimension (LDPT,N) +*> If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'. +*> If VECT = 'N' or 'Q', the array PT is not referenced. +*> \endverbatim +*> +*> \param[in] LDPT +*> \verbatim +*> LDPT is INTEGER +*> The leading dimension of the array PT. +*> LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,NCC) +*> On entry, an m-by-ncc matrix C. +*> On exit, C is overwritten by Q**T*C. +*> C is not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( LDC, * ), D( * ), E( * ), + $ PT( LDPT, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT + REAL RA, RB, RC, RS +* .. +* .. External Subroutines .. + EXTERNAL SLARGV, SLARTG, SLARTV, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P**T to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL SLASET( 'Full', M, M, ZERO, ONE, Q, LDQ ) + IF( WANTPT ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The sines of the plane rotations are stored in WORK(1:max(m,n)) +* and the cosines in WORK(max(m,n)+1:2*max(m,n)). +* + MN = MAX( M, N ) + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL SLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, WORK( MN+J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ WORK( MN+J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL SLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ), + $ RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL SROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ WORK( MN+I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL SROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ WORK( MN+J ), WORK( J ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL SROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ WORK( MN+J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL SLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ WORK( MN+J1+KUN ), WORK( J1+KUN ), + $ KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL SLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ), + $ RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL SROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ WORK( MN+I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P**T +* + DO 60 J = J1, J2, KB1 + CALL SROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ), + $ WORK( J+KUN ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, storing diagonal elements in D +* and off-diagonal elements in E +* + DO 100 I = 1, MIN( M-1, N ) + CALL SLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + D( I ) = RA + IF( I.LT.N ) THEN + E( I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL SROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS ) + IF( WANTC ) + $ CALL SROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + IF( M.LE.N ) + $ D( M ) = AB( 1, M ) + ELSE IF( KU.GT.0 ) THEN +* +* A has been reduced to upper bidiagonal form +* + IF( M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right, storing diagonal elements in D and off-diagonal +* elements in E +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL SLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + D( I ) = RA + IF( I.GT.1 ) THEN + RB = -RS*AB( KU, I ) + E( I-1 ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL SROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, RS ) + 110 CONTINUE + ELSE +* +* Copy off-diagonal elements to E and diagonal elements to D +* + DO 120 I = 1, MINMN - 1 + E( I ) = AB( KU, I+1 ) + 120 CONTINUE + DO 130 I = 1, MINMN + D( I ) = AB( KU+1, I ) + 130 CONTINUE + END IF + ELSE +* +* A is diagonal. Set elements of E to zero and copy diagonal +* elements to D. +* + DO 140 I = 1, MINMN - 1 + E( I ) = ZERO + 140 CONTINUE + DO 150 I = 1, MINMN + D( I ) = AB( 1, I ) + 150 CONTINUE + END IF + RETURN +* +* End of SGBBRD +* + END diff --git a/dspl/liblapack/SRC/sgbcon.f b/dspl/liblapack/SRC/sgbcon.f new file mode 100644 index 0000000..2294bbf --- /dev/null +++ b/dspl/liblapack/SRC/sgbcon.f @@ -0,0 +1,311 @@ +*> \brief \b SGBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, KL, KU, LDAB, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBCON estimates the reciprocal of the condition number of a real +*> general band matrix A, in either the 1-norm or the infinity-norm, +*> using the LU factorization computed by SGBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by SGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + REAL AINVNM, SCALE, SMLNUM, T +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLACN2, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL SAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(U**T). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), + $ INFO ) +* +* Multiply by inv(L**T). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - SDOT( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of SGBCON +* + END diff --git a/dspl/liblapack/SRC/sgbequ.f b/dspl/liblapack/SRC/sgbequ.f new file mode 100644 index 0000000..1001361 --- /dev/null +++ b/dspl/liblapack/SRC/sgbequ.f @@ -0,0 +1,324 @@ +*> \brief \b SGBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBEQU computes row and column scalings intended to equilibrate an +*> M-by-N band matrix A and reduce its condition number. R returns the +*> row scale factors and C the column scale factors, chosen to try to +*> make the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0, or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGBEQU +* + END diff --git a/dspl/liblapack/SRC/sgbequb.f b/dspl/liblapack/SRC/sgbequb.f new file mode 100644 index 0000000..5fa4abf --- /dev/null +++ b/dspl/liblapack/SRC/sgbequb.f @@ -0,0 +1,340 @@ +*> \brief \b SGBEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from SGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + REAL BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = SLAMCH( 'B' ) + LOGRDX = LOG(RADIX) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGBEQUB +* + END diff --git a/dspl/liblapack/SRC/sgbrfs.f b/dspl/liblapack/SRC/sgbrfs.f new file mode 100644 index 0000000..593c158 --- /dev/null +++ b/dspl/liblapack/SRC/sgbrfs.f @@ -0,0 +1,464 @@ +*> \brief \b SGBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is banded, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by SGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGBTRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGBMV, SGBTRS, SLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1, + $ ONE, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = ABS( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SGBRFS +* + END diff --git a/dspl/liblapack/SRC/sgbrfsx.f b/dspl/liblapack/SRC/sgbrfsx.f new file mode 100644 index 0000000..032b78b --- /dev/null +++ b/dspl/liblapack/SRC/sgbrfsx.f @@ -0,0 +1,765 @@ +*> \brief \b SGBRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, +* $ NPARAMS, N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBRFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, + $ NPARAMS, N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SGBCON + EXTERNAL SLA_GBRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL SLAMCH, SLANGB, SLA_GBRCOND + REAL SLAMCH, SLANGB, SLA_GBRCOND + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = SLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + CALL SGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 .AND. INFO .EQ. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + IF ( NOTRAN ) THEN + CALL SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), + $ WORK( 1 ), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + ELSE + CALL SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), + $ WORK( 1 ), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, -1, C, INFO, WORK, IWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, -1, R, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, 0, R, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, 1, X( 1, J ), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of SGBRFSX +* + END diff --git a/dspl/liblapack/SRC/sgbsv.f b/dspl/liblapack/SRC/sgbsv.f new file mode 100644 index 0000000..9b92296 --- /dev/null +++ b/dspl/liblapack/SRC/sgbsv.f @@ -0,0 +1,223 @@ +*> \brief SGBSV computes the solution to system of linear equations A * X = B for GB matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBSV computes the solution to a real system of linear equations +*> A * X = B, where A is a band matrix of order N with KL subdiagonals +*> and KU superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as A = L * U, where L is a product of permutation +*> and unit lower triangular matrices with KL subdiagonals, and U is +*> upper triangular with KL+KU superdiagonals. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SGBTRF, SGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL SGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of SGBSV +* + END diff --git a/dspl/liblapack/SRC/sgbsvx.f b/dspl/liblapack/SRC/sgbsvx.f new file mode 100644 index 0000000..acd7db2 --- /dev/null +++ b/dspl/liblapack/SRC/sgbsvx.f @@ -0,0 +1,644 @@ +*> \brief SGBSVX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), C( * ), FERR( * ), R( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBSVX uses the LU factorization to compute the solution to a real +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a band matrix of order N with KL subdiagonals and KU +*> superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed by this subroutine: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = L * U, +*> where L is a product of permutation and unit lower triangular +*> matrices with KL subdiagonals, and U is upper triangular with +*> KL+KU superdiagonals. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB and IPIV contain the factored form of +*> A. If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> AB, AFB, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then A must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by SGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns details of the LU factorization of A. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns details of the LU factorization of the equilibrated +*> matrix A (see the description of AB for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = L*U +*> as computed by SGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> On exit, WORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If WORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGBsolve +* +* ===================================================================== + SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* Moved setting of INFO = N+1 so INFO does not subsequently get +* overwritten. Sven, 17 Mar 05. +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGB, SLANTB + EXTERNAL LSAME, SLAMCH, SLANGB, SLANTB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGBCON, SGBEQU, SGBRFS, SGBTRF, SGBTRS, + $ SLACPY, SLAQGB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL SCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL SGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = SLANTB( 'M', 'U', 'N', INFO, MIN( INFO-1, KL+KU ), + $ AFB( MAX( 1, KL+KU+2-INFO ), 1 ), LDAFB, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGB( NORM, N, KL, KU, AB, LDAB, WORK ) + RPVGRW = SLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL SGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of SGBSVX +* + END diff --git a/dspl/liblapack/SRC/sgbsvxx.f b/dspl/liblapack/SRC/sgbsvxx.f new file mode 100644 index 0000000..b213232 --- /dev/null +++ b/dspl/liblapack/SRC/sgbsvxx.f @@ -0,0 +1,802 @@ +*> \brief SGBSVXX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, RPVGRW, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBSVXX uses the LU factorization to compute the solution to a +*> real system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. SGBSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> SGBSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> SGBSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what SGBSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then AB must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by SGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In SGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGBsolve +* +* ===================================================================== + SUBROUTINE SGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, RPVGRW, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, I, J, KL, KU + REAL AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, SLA_GBRPVGRW + LOGICAL LSAME + REAL SLAMCH, SLA_GBRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL SGBEQUB, SGBTRF, SGBTRS, SLACPY, SLAQGB, + $ XERBLA, SLASCL2, SGBRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in SGBRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until SGBRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGBEQUB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0 + END DO + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL SLASCL2(N, NRHS, R, B, LDB) + ELSE + IF( COLEQU ) CALL SLASCL2(N, NRHS, C, B, LDB) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + DO 40, J = 1, N + DO 30, I = KL+1, 2*KL+KU+1 + AFB( I, J ) = AB( I-KL, J ) + 30 CONTINUE + 40 CONTINUE + CALL SGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = SLA_GBRPVGRW( N, KL, KU, INFO, AB, LDAB, AFB, + $ LDAFB ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = SLA_GBRPVGRW( N, KL, KU, N, AB, LDAB, AFB, LDAFB ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL SLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL SLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of SGBSVXX +* + END diff --git a/dspl/liblapack/SRC/sgbtf2.f b/dspl/liblapack/SRC/sgbtf2.f new file mode 100644 index 0000000..ecf10e3 --- /dev/null +++ b/dspl/liblapack/SRC/sgbtf2.f @@ -0,0 +1,277 @@ +*> \brief \b SGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBTF2 computes an LU factorization of a real m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U, because of fill-in resulting from the row +*> interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = ISAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL SSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) +* + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL SSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL SGER( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of SGBTF2 +* + END diff --git a/dspl/liblapack/SRC/sgbtrf.f b/dspl/liblapack/SRC/sgbtrf.f new file mode 100644 index 0000000..2572f37 --- /dev/null +++ b/dspl/liblapack/SRC/sgbtrf.f @@ -0,0 +1,516 @@ +*> \brief \b SGBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBTRF computes an LU factorization of a real m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + REAL TEMP +* .. +* .. Local Arrays .. + REAL WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ILAENV, ISAMAX + EXTERNAL ILAENV, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL, + $ SSWAP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use SLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL SGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL SGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of SGBTRF +* + END diff --git a/dspl/liblapack/SRC/sgbtrs.f b/dspl/liblapack/SRC/sgbtrs.f new file mode 100644 index 0000000..477b033 --- /dev/null +++ b/dspl/liblapack/SRC/sgbtrs.f @@ -0,0 +1,269 @@ +*> \brief \b SGBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGBTRS solves a system of linear equations +*> A * X = B or A**T * X = B +*> with a general band matrix A using the LU factorization computed +*> by SGBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by SGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSWAP, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL SGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE +* +* Solve A**T*X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T*X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL SGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SGBTRS +* + END diff --git a/dspl/liblapack/SRC/sgebak.f b/dspl/liblapack/SRC/sgebak.f new file mode 100644 index 0000000..ec58bf3 --- /dev/null +++ b/dspl/liblapack/SRC/sgebak.f @@ -0,0 +1,268 @@ +*> \brief \b SGEBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* REAL V( LDV, * ), SCALE( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEBAK forms the right or left eigenvectors of a real general matrix +*> by backward transformation on the computed eigenvectors of the +*> balanced matrix output by SGEBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N', do nothing, return immediately; +*> = 'P', do backward transformation for permutation only; +*> = 'S', do backward transformation for scaling only; +*> = 'B', do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to SGEBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by SGEBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutation and scaling factors, as returned +*> by SGEBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by SHSEIN or STREVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL V( LDV, * ), SCALE( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + REAL S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL SSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of SGEBAK +* + END diff --git a/dspl/liblapack/SRC/sgebal.f b/dspl/liblapack/SRC/sgebal.f new file mode 100644 index 0000000..d34dc3b --- /dev/null +++ b/dspl/liblapack/SRC/sgebal.f @@ -0,0 +1,397 @@ +*> \brief \b SGEBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), SCALE( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEBAL balances a general real matrix A. This involves, first, +*> permuting A by a similarity transformation to isolate eigenvalues +*> in the first 1 to ILO-1 and last IHI+1 to N elements on the +*> diagonal; and second, applying a diagonal similarity transformation +*> to rows and columns ILO to IHI to make the rows and columns as +*> close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrix, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A: +*> = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +*> for i = 1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied to +*> A. If P(j) is the index of the row and column interchanged +*> with row and column j and D(j) is the scaling factor +*> applied to row and column j, then +*> SCALE(j) = P(j) for j = 1,...,ILO-1 +*> = D(j) for j = ILO,...,IHI +*> = P(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The permutations consist of row and column interchanges which put +*> the matrix in the form +*> +*> ( T1 X Y ) +*> P A P = ( 0 B Z ) +*> ( 0 0 T2 ) +*> +*> where T1 and T2 are upper triangular matrices whose eigenvalues lie +*> along the diagonal. The column indices ILO and IHI mark the starting +*> and ending columns of the submatrix B. Balancing consists of applying +*> a diagonal similarity transformation inv(D) * B * D to make the +*> 1-norms of each row of B and its corresponding column nearly equal. +*> The output matrix is +*> +*> ( T1 X*D Y ) +*> ( 0 inv(D)*B*D inv(D)*Z ). +*> ( 0 0 T2 ) +*> +*> Information about the permutations P and the diagonal matrix D is +*> returned in the vector SCALE. +*> +*> This subroutine is based on the EISPACK routine BALANC. +*> +*> Modified by Tzu-Yi Chen, Computer Science Division, University of +*> California at Berkeley, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SCALE( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL SCLFAC + PARAMETER ( SCLFAC = 2.0E+0 ) + REAL FACTOR + PARAMETER ( FACTOR = 0.95E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + REAL C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL SISNAN, LSAME + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL SISNAN, LSAME, ISAMAX, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( A( J, I ).NE.ZERO ) + $ GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( A( I, J ).NE.ZERO ) + $ GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L +* + C = SNRM2( L-K+1, A( K, I ), 1 ) + R = SNRM2( L-K+1, A( I, K ), LDA ) + ICA = ISAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = ISAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + IF( SISNAN( C+F+CA+R+G+RA ) ) THEN +* +* Exit if NaN to avoid infinite loop +* + INFO = -3 + CALL XERBLA( 'SGEBAL', -INFO ) + RETURN + END IF + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL SSCAL( N-K+1, G, A( I, K ), LDA ) + CALL SSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of SGEBAL +* + END diff --git a/dspl/liblapack/SRC/sgebd2.f b/dspl/liblapack/SRC/sgebd2.f new file mode 100644 index 0000000..2123ce2 --- /dev/null +++ b/dspl/liblapack/SRC/sgebd2.f @@ -0,0 +1,320 @@ +*> \brief \b SGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEBD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEBD2 reduces a real general m by n matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL SLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(i+1:m,i+1:n) from the left +* + CALL SLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ), + $ A( I+1, I+1 ), LDA, WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGEBD2 +* + END diff --git a/dspl/liblapack/SRC/sgebrd.f b/dspl/liblapack/SRC/sgebrd.f new file mode 100644 index 0000000..e825b4f --- /dev/null +++ b/dspl/liblapack/SRC/sgebrd.f @@ -0,0 +1,352 @@ +*> \brief \b SGEBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEBRD reduces a general real M-by-N matrix A to upper or lower +*> bidiagonal form B by an orthogonal transformation: Q**T * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the orthogonal matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the orthogonal matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,M,N). +*> For optimum performance LWORK >= (M+N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i); +*> u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX, WS +* .. +* .. External Subroutines .. + EXTERNAL SGEBD2, SGEMM, SLABRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = REAL( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'SGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'SGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'SGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+nb-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL SLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+nb:m,i+nb:n), using an update +* of the form A := A - V*Y**T - X*U**T +* + CALL SGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL SGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL SGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of SGEBRD +* + END diff --git a/dspl/liblapack/SRC/sgecon.f b/dspl/liblapack/SRC/sgecon.f new file mode 100644 index 0000000..d8f087d --- /dev/null +++ b/dspl/liblapack/SRC/sgecon.f @@ -0,0 +1,261 @@ +*> \brief \b SGECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGECON estimates the reciprocal of the condition number of a general +*> real matrix A, in either the 1-norm or the infinity-norm, using +*> the LU factorization computed by SGETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, SCALE, SL, SMLNUM, SU +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, WORK( 3*N+1 ), INFO ) + ELSE +* +* Multiply by inv(U**T). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SU, WORK( 3*N+1 ), INFO ) +* +* Multiply by inv(L**T). +* + CALL SLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, WORK( 2*N+1 ), INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SGECON +* + END diff --git a/dspl/liblapack/SRC/sgeequ.f b/dspl/liblapack/SRC/sgeequ.f new file mode 100644 index 0000000..bb5592b --- /dev/null +++ b/dspl/liblapack/SRC/sgeequ.f @@ -0,0 +1,304 @@ +*> \brief \b SGEEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEEQU computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGEEQU +* + END diff --git a/dspl/liblapack/SRC/sgeequb.f b/dspl/liblapack/SRC/sgeequb.f new file mode 100644 index 0000000..e04ee45 --- /dev/null +++ b/dspl/liblapack/SRC/sgeequb.f @@ -0,0 +1,321 @@ +*> \brief \b SGEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from SGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is REAL +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is REAL +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = SLAMCH( 'B' ) + LOGRDX = LOG( RADIX ) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), ABS( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of SGEEQUB +* + END diff --git a/dspl/liblapack/SRC/sgees.f b/dspl/liblapack/SRC/sgees.f new file mode 100644 index 0000000..06c4513 --- /dev/null +++ b/dspl/liblapack/SRC/sgees.f @@ -0,0 +1,535 @@ +*> \brief SGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, +* VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEES computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues, the real Schur form T, and, optionally, the matrix of +*> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> real Schur form so that selected eigenvalues are at the top left. +*> The leading columns of Z then form an orthonormal basis for the +*> invariant subspace corresponding to the selected eigenvalues. +*> +*> A matrix is in real Schur form if it is upper quasi-triangular with +*> 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the +*> form +*> [ a b ] +*> [ c a ] +*> +*> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of two REAL arguments +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +*> SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex +*> conjugate pair of eigenvalues is selected, then both complex +*> eigenvalues are selected. +*> Note that a selected complex eigenvalue may no longer +*> satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned); in this +*> case INFO is set to N+2 (see INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten by its real Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELECT is true. (Complex conjugate +*> pairs for which SELECT is true for either +*> eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues in the same order +*> that they appear on the diagonal of the output Schur form T. +*> Complex conjugate pairs of eigenvalues will appear +*> consecutively with the eigenvalue having the positive +*> imaginary part first. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is REAL array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1; if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the matrix which reduces A +*> to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI, + $ VS, LDVS, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST, + $ WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 3*N +* + CALL SHSEQR( 'S', JOBVS, N, 1, N, A, LDA, WR, WI, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, N + HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N + HSWORK ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (Workspace: none needed) +* + CALL STRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ ICOND ) + IF( ICOND.GT.0 ) + $ INFO = N + ICOND + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (Workspace: need N) +* + CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL SCOPY( N, A, LDA+1, WR, 1 ) + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, + $ MAX( ILO-1, 1 ), IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL SSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + IF( WANTVS ) THEN + CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + END IF + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF +* +* Undo scaling for the imaginary part of the eigenvalues +* + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEES +* + END diff --git a/dspl/liblapack/SRC/sgeesx.f b/dspl/liblapack/SRC/sgeesx.f new file mode 100644 index 0000000..c90de9b --- /dev/null +++ b/dspl/liblapack/SRC/sgeesx.f @@ -0,0 +1,649 @@ +*> \brief SGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, +* WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, +* IWORK, LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SENSE, SORT +* INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM +* REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEESX computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues, the real Schur form T, and, optionally, the matrix of +*> Schur vectors Z. This gives the Schur factorization A = Z*T*(Z**T). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> real Schur form so that selected eigenvalues are at the top left; +*> computes a reciprocal condition number for the average of the +*> selected eigenvalues (RCONDE); and computes a reciprocal condition +*> number for the right invariant subspace corresponding to the +*> selected eigenvalues (RCONDV). The leading columns of Z form an +*> orthonormal basis for this invariant subspace. +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +*> these quantities are called s and sep respectively). +*> +*> A real matrix is in real Schur form if it is upper quasi-triangular +*> with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in +*> the form +*> [ a b ] +*> [ c a ] +*> +*> where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of two REAL arguments +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if +*> SELECT(WR(j),WI(j)) is true; i.e., if either one of a +*> complex conjugate pair of eigenvalues is selected, then both +*> are. Note that a selected complex eigenvalue may no longer +*> satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned); in this +*> case INFO may be set to N+3 (see INFO below). +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for average of selected eigenvalues only; +*> = 'V': Computed for selected right invariant subspace only; +*> = 'B': Computed for both. +*> If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the N-by-N matrix A. +*> On exit, A is overwritten by its real Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELECT is true. (Complex conjugate +*> pairs for which SELECT is true for either +*> eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> WR and WI contain the real and imaginary parts, respectively, +*> of the computed eigenvalues, in the same order that they +*> appear on the diagonal of the output Schur form T. Complex +*> conjugate pairs of eigenvalues appear consecutively with the +*> eigenvalue having the positive imaginary part first. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is REAL array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1, and if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL +*> If SENSE = 'E' or 'B', RCONDE contains the reciprocal +*> condition number for the average of the selected eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL +*> If SENSE = 'V' or 'B', RCONDV contains the reciprocal +*> condition number for the selected right invariant subspace. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N). +*> Also, if SENSE = 'E' or 'V' or 'B', +*> LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of +*> selected eigenvalues computed by this routine. Note that +*> N+2*SDIM*(N-SDIM) <= N+N*N/2. Note also that an error is only +*> returned if LWORK < max(1,3*N), but if SENSE = 'E' or 'V' or +*> 'B' this may not be large enough. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates upper bounds on the optimal sizes of the +*> arrays WORK and IWORK, returns these values as the first +*> entries of the WORK and IWORK arrays, and no error messages +*> related to LWORK or LIWORK are issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM). +*> Note that SDIM*(N-SDIM) <= N*N/4. Note also that an error is +*> only returned if LIWORK < 1, but if SENSE = 'V' or 'B' this +*> may not be large enough. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates upper bounds on the optimal sizes of +*> the arrays WORK and IWORK, returns these values as the first +*> entries of the WORK and IWORK arrays, and no error messages +*> related to LWORK or LIWORK are issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the transformation which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, + $ WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM + REAL RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTSB, + $ WANTSE, WANTSN, WANTST, WANTSV, WANTVS + INTEGER HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL, + $ IHI, ILO, INXT, IP, ITAU, IWRK, LWRK, LIWRK, + $ MAXWRK, MINWRK + REAL ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, + $ SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "RWorkspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* IWorkspace refers to integer workspace. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine STRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + LIWRK = 1 + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 3*N +* + CALL SHSEQR( 'S', JOBVS, N, 1, N, A, LDA, WR, WI, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, N + HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, N + HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, N + ( N*N )/2 ) + IF( WANTSV .OR. WANTSB ) + $ LIWRK = ( N*N )/4 + END IF + IWORK( 1 ) = LIWRK + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEESX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (RWorkspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (RWorkspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = N + IBAL + IWRK = N + ITAU + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate orthogonal matrix in VS +* (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (RWorkspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR ) + END IF + DO 10 I = 1, N + BWORK( I ) = SELECT( WR( I ), WI( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM) +* otherwise, need N ) +* (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM) +* otherwise, need 0 ) +* + CALL STRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI, + $ SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-15 ) THEN +* +* Not enough real workspace +* + INFO = -16 + ELSE IF( ICOND.EQ.-17 ) THEN +* +* Not enough integer workspace +* + INFO = -18 + ELSE IF( ICOND.GT.0 ) THEN +* +* STRSEN failed to reorder or to restore standard Schur form +* + INFO = ICOND + N + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (RWorkspace: need N) +* + CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL SCOPY( N, A, LDA+1, WR, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + IF( CSCALE.EQ.SMLNUM ) THEN +* +* If scaling back towards underflow, adjust WI if an +* offdiagonal element of a 2-by-2 block in the Schur form +* underflows. +* + IF( IEVAL.GT.0 ) THEN + I1 = IEVAL + 1 + I2 = IHI - 1 + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + ELSE IF( WANTST ) THEN + I1 = 1 + I2 = N - 1 + ELSE + I1 = ILO + I2 = IHI - 1 + END IF + INXT = I1 - 1 + DO 20 I = I1, I2 + IF( I.LT.INXT ) + $ GO TO 20 + IF( WI( I ).EQ.ZERO ) THEN + INXT = I + 1 + ELSE + IF( A( I+1, I ).EQ.ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ. + $ ZERO ) THEN + WI( I ) = ZERO + WI( I+1 ) = ZERO + IF( I.GT.1 ) + $ CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 ) + IF( N.GT.I+1 ) + $ CALL SSWAP( N-I-1, A( I, I+2 ), LDA, + $ A( I+1, I+2 ), LDA ) + CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 ) + A( I, I+1 ) = A( I+1, I ) + A( I+1, I ) = ZERO + END IF + INXT = I + 2 + END IF + 20 CONTINUE + END IF + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1, + $ WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR ) + END IF +* + IF( WANTST .AND. INFO.EQ.0 ) THEN +* +* Check if reordering successful +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELECT( WR( I ), WI( I ) ) + IF( WI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE + END IF +* + WORK( 1 ) = MAXWRK + IF( WANTSV .OR. WANTSB ) THEN + IWORK( 1 ) = SDIM*(N-SDIM) + ELSE + IWORK( 1 ) = 1 + END IF +* + RETURN +* +* End of SGEESX +* + END diff --git a/dspl/liblapack/SRC/sgeev.f b/dspl/liblapack/SRC/sgeev.f new file mode 100644 index 0000000..08c5a57 --- /dev/null +++ b/dspl/liblapack/SRC/sgeev.f @@ -0,0 +1,529 @@ +*> \brief SGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, +* LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEEV computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate-transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues. Complex +*> conjugate pairs of eigenvalues appear consecutively +*> with the eigenvalue having the positive imaginary part +*> first. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j), +*> the j-th column of VL. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> If the j-th eigenvalue is real, then v(j) = VR(:,j), +*> the j-th column of VR. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +*> v(j+1) = VR(:,j) - i*VR(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,3*N), and +*> if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N. For good +*> performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors have been computed; +*> elements i+1:N of WR and WI contain eigenvalues which +*> have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @generated from dgeev.f, fortran d -> s, Tue Apr 19 01:47:44 2016 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, + $ LDVR, WORK, LWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC3, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, + $ SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -9 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) + IF( WANTVL ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VL, LDVL, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + CALL STREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, N, NOUT, + $ WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE IF( WANTVR ) THEN + MINWRK = 4*N + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'SORGHR', ' ', N, 1, N, -1 ) ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + CALL STREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, N, NOUT, + $ WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + MAXWRK = MAX( MAXWRK, 4*N ) + ELSE + MINWRK = 3*N + CALL SHSEQR( 'E', 'N', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + 1, N + HSWORK ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (Workspace: need N) +* + IBAL = 1 + CALL SGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (Workspace: need 3*N, prefer 2*N+N*NB) +* + ITAU = IBAL + N + IWRK = ITAU + N + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (Workspace: need N+1, prefer N+HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from SHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 4*N, prefer N + N + 2*N*NB) +* + CALL STREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ), + $ SNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (Workspace: need N) +* + CALL SGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ), + $ SNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = ISAMAX( N, WORK( IWRK ), 1 ) + CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEEV +* + END diff --git a/dspl/liblapack/SRC/sgeevx.f b/dspl/liblapack/SRC/sgeevx.f new file mode 100644 index 0000000..b11f500 --- /dev/null +++ b/dspl/liblapack/SRC/sgeevx.f @@ -0,0 +1,694 @@ +*> \brief SGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, +* VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, +* RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N +* REAL ABNRM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), +* $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEEVX computes for an N-by-N real nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +*> (RCONDE), and reciprocal condition numbers for the right +*> eigenvectors (RCONDV). +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate-transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> +*> Balancing a matrix means permuting the rows and columns to make it +*> more nearly upper triangular, and applying a diagonal similarity +*> transformation D * A * D**(-1), where D is a diagonal matrix, to +*> make its rows and columns closer in norm and the condition numbers +*> of its eigenvalues and eigenvectors smaller. The computed +*> reciprocal condition numbers correspond to the balanced matrix. +*> Permuting rows and columns will not change the condition numbers +*> (in exact arithmetic) but diagonal scaling will. For further +*> explanation of balancing, see section 4.10.2 of the LAPACK +*> Users' Guide. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Indicates how the input matrix should be diagonally scaled +*> and/or permuted to improve the conditioning of its +*> eigenvalues. +*> = 'N': Do not diagonally scale or permute; +*> = 'P': Perform permutations to make the matrix more nearly +*> upper triangular. Do not diagonally scale; +*> = 'S': Diagonally scale the matrix, i.e. replace A by +*> D*A*D**(-1), where D is a diagonal matrix chosen +*> to make the rows and columns of A more equal in +*> norm. Do not permute; +*> = 'B': Both diagonally scale and permute A. +*> +*> Computed reciprocal condition numbers will be for the matrix +*> after balancing and/or permuting. Permuting does not change +*> condition numbers (in exact arithmetic), but balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVL must = 'V'. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVR must = 'V'. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for eigenvalues only; +*> = 'V': Computed for right eigenvectors only; +*> = 'B': Computed for eigenvalues and right eigenvectors. +*> +*> If SENSE = 'E' or 'B', both left and right eigenvectors +*> must also be computed (JOBVL = 'V' and JOBVR = 'V'). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. If JOBVL = 'V' or +*> JOBVR = 'V', A contains the real Schur form of the balanced +*> version of the input matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> WR and WI contain the real and imaginary parts, +*> respectively, of the computed eigenvalues. Complex +*> conjugate pairs of eigenvalues will appear consecutively +*> with the eigenvalue having the positive imaginary part +*> first. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> If the j-th eigenvalue is real, then u(j) = VL(:,j), +*> the j-th column of VL. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and +*> u(j+1) = VL(:,j) - i*VL(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> If the j-th eigenvalue is real, then v(j) = VR(:,j), +*> the j-th column of VR. +*> If the j-th and (j+1)-st eigenvalues form a complex +*> conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and +*> v(j+1) = VR(:,j) - i*VR(:,j+1). +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values determined when A was +*> balanced. The balanced A(i,j) = 0 if I > J and +*> J = 1,...,ILO-1 or I = IHI+1,...,N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> when balancing A. If P(j) is the index of the row and column +*> interchanged with row and column j, and D(j) is the scaling +*> factor applied to row and column j, then +*> SCALE(J) = P(J), for J = 1,...,ILO-1 +*> = D(J), for J = ILO,...,IHI +*> = P(J) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is REAL +*> The one-norm of the balanced matrix (the maximum +*> of the sum of absolute values of elements of any column). +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension (N) +*> RCONDE(j) is the reciprocal condition number of the j-th +*> eigenvalue. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension (N) +*> RCONDV(j) is the reciprocal condition number of the j-th +*> right eigenvector. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. If SENSE = 'N' or 'E', +*> LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V', +*> LWORK >= 3*N. If SENSE = 'V' or 'B', LWORK >= N*(N+6). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N-2) +*> If SENSE = 'N' or 'E', not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors or condition numbers +*> have been computed; elements 1:ILO-1 and i+1:N of WR +*> and WI contain eigenvalues which have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @generated from dgeevx.f, fortran d -> s, Tue Apr 19 01:47:44 2016 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, + $ VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, + $ RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + REAL ABNRM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), RCONDE( * ), RCONDV( * ), + $ SCALE( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + REAL ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM, + $ SN +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY, + $ SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC3, + $ STRSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + REAL SLAMCH, SLANGE, SLAPY2, SNRM2 + EXTERNAL LSAME, ISAMAX, ILAENV, SLAMCH, SLANGE, SLAPY2, + $ SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) + $ .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) + $ THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by SHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 ) +* + IF( WANTVL ) THEN + CALL STREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + CALL STREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL SHSEQR( 'S', 'V', N, 1, N, A, LDA, WR, WI, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + IF( WNTSNN ) THEN + CALL SHSEQR( 'E', 'N', N, 1, N, A, LDA, WR, WI, VR, + $ LDVR, WORK, -1, INFO ) + ELSE + CALL SHSEQR( 'S', 'N', N, 1, N, A, LDA, WR, WI, VR, + $ LDVR, WORK, -1, INFO ) + END IF + END IF + HSWORK = INT( WORK(1) ) +* + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = 2*N + IF( .NOT.WNTSNN ) + $ MINWRK = MAX( MINWRK, N*N+6*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + IF( .NOT.WNTSNN ) + $ MAXWRK = MAX( MAXWRK, N*N + 6*N ) + ELSE + MINWRK = 3*N + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 6*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'SORGHR', + $ ' ', N, 1, N, -1 ) ) + IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 6*N ) + MAXWRK = MAX( MAXWRK, 3*N ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = SLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL SGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = SLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + IWRK = ITAU + N + CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate orthogonal matrix in VL +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate orthogonal matrix in VR +* (Workspace: need 2*N-1, prefer N+(N-1)*NB) +* + CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (Workspace: need 1, prefer HSWORK (see comments) ) +* + IWRK = ITAU + CALL SHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from SHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (Workspace: need 3*N, prefer N + 2*N*NB) +* + CALL STREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, IERR ) + END IF +* +* Compute condition numbers if desired +* (Workspace: need N*N+6*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL STRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL SGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ), + $ SNRM2( N, VL( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VL( 1, I ), 1 ) + CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 ) + DO 10 K = 1, N + WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2 + 10 CONTINUE + K = ISAMAX( N, WORK, 1 ) + CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R ) + CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN ) + VL( K, I+1 ) = ZERO + END IF + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL SGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + IF( WI( I ).EQ.ZERO ) THEN + SCL = ONE / SNRM2( N, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + ELSE IF( WI( I ).GT.ZERO ) THEN + SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ), + $ SNRM2( N, VR( 1, I+1 ), 1 ) ) + CALL SSCAL( N, SCL, VR( 1, I ), 1 ) + CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 ) + DO 30 K = 1, N + WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2 + 30 CONTINUE + K = ISAMAX( N, WORK, 1 ) + CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R ) + CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN ) + VR( K, I+1 ) = ZERO + END IF + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N, + $ IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGEEVX +* + END diff --git a/dspl/liblapack/SRC/sgehd2.f b/dspl/liblapack/SRC/sgehd2.f new file mode 100644 index 0000000..6a172b7 --- /dev/null +++ b/dspl/liblapack/SRC/sgehd2.f @@ -0,0 +1,225 @@ +*> \brief \b SGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEHD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEHD2 reduces a real general matrix A to upper Hessenberg form H by +*> an orthogonal similarity transformation: Q**T * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to SGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= max(1,N). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the n by n general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the orthogonal matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + CALL SLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + AII = A( I+1, I ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL SLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i) to A(i+1:ihi,i+1:n) from the left +* + CALL SLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ), + $ A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = AII + 10 CONTINUE +* + RETURN +* +* End of SGEHD2 +* + END diff --git a/dspl/liblapack/SRC/sgehrd.f b/dspl/liblapack/SRC/sgehrd.f new file mode 100644 index 0000000..75a1bca --- /dev/null +++ b/dspl/liblapack/SRC/sgehrd.f @@ -0,0 +1,356 @@ +*> \brief \b SGEHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEHRD reduces a real general matrix A to upper Hessenberg form H by +*> an orthogonal similarity transformation: Q**T * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to SGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the orthogonal matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +*> zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,N). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This file is a slight modification of LAPACK-3.0's DGEHRD +*> subroutine incorporating improvements proposed by Quintana-Orti and +*> Van de Geijn (2006). (See DLAHR2.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEHD2, SGEMM, SLAHR2, SLARFB, STRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IF( LWORK.LT.N*NB+TSIZE ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + IWT = 1 + N*NB + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V**T +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL SLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V**T. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL SGEMM( 'No transpose', 'Transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL STRMM( 'Right', 'Lower', 'Transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL SAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGEHRD +* + END diff --git a/dspl/liblapack/SRC/sgejsv.f b/dspl/liblapack/SRC/sgejsv.f new file mode 100644 index 0000000..e4cbe8d --- /dev/null +++ b/dspl/liblapack/SRC/sgejsv.f @@ -0,0 +1,1783 @@ +*> \brief \b SGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), +* $ WORK( LWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEJSV computes the singular value decomposition (SVD) of a real M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^t, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and +*> [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> SGEJSV can sometimes compute tiny singular values and their singular vectors much +*> more accurately than other SVD routines, see below under Further Details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=D*B. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are the noise and the matrix is treated +*> as numerically rank deficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^t restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations. This option is +*> allowed only if JOBU .NE. 'N', i.e. in computing the full SVD. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use SGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> =========================== +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use SGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^t seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. This is subject to +*> changes in the future. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^t * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^t is taken as input. If A is +*> replaced with A^t, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> This option can be used to compute only the singular values, or the +*> full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension ( LDU, N ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^t. In that case, [V] is computed +*> in U as left singular vectors of A^t and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^t. In that case, [U] is computed +*> in V as right singular vectors of A^t and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, +*> WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such +*> that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> WORK(2) = See the description of WORK(1). +*> WORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). +*> It is computed using SPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> WORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> WORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> +*> WORK(6) = the entropy of A^t*A :: this is the Shannon entropy +*> of diag(A^t*A) / Trace(A^t*A) taken as point in the +*> probability simplex. +*> WORK(7) = the entropy of A*A^t. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of WORK to confirm proper allocation of work space. +*> LWORK depends on the job: +*> +*> If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> -> .. no scaled condition estimate required (JOBE.EQ.'N'): +*> LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal +*> block size for DGEQP3 and DGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7). +*> -> .. an estimate of the scaled condition number of A is +*> required (JOBA='E', 'G'). In this case, LWORK is the maximum +*> of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7). +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7). +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), +*> N+N*N+LWORK(DPOCON),7). +*> +*> If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). +*> -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7), +*> where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ, +*> DORMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON), +*> N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)). +*> +*> If SIGMA and the left singular vectors are needed +*> -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7). +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7), +*> if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7), +*> where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON), +*> 2*N+LWORK(DGEQRF), N+LWORK(DORMQR)). +*> Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or +*> M*NB (for JOBU.EQ.'F'). +*> +*> If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> -> if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N). +*> -> if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6). +*> -> For optimal performance, LWORK should be additionally +*> larger than N+M*NB, where NB is the optimal block size +*> for DORMQR. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+3*N). +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : SGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> SGEJSV implements a preconditioned Jacobi SVD algorithm. It uses SGEQP3, +*> SGEQRF, and SGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by SGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (SGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (SGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: SGEQP3) should be +*> implemented as in [3]. We have a new version of SGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in SGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of SGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE SGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ), + $ WORK( LWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK, + $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM, + $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LSVEC, + $ L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, + $ NOSCAL, ROWPIV, RSVEC, TRANSP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, ALOG, MAX, MIN, FLOAT, NINT, SIGN, SQRT +* .. +* .. External Functions .. + REAL SLAMCH, SNRM2 + INTEGER ISAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, LSAME, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGELQF, SGEQP3, SGEQRF, SLACPY, SLASCL, + $ SLASET, SLASSQ, SLASWP, SORGQR, SORMLQ, + $ SORMQR, SPOCON, SSCAL, SSWAP, STRSM, XERBLA +* + EXTERNAL SGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ LSAME( JOBU, 'W' )) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( L2TRAN .OR. LSAME( JOBT, 'N' ) ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND. + $ (LWORK .LT. MAX(7,4*N+1,2*M+N))) .OR. + $ (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND. + $ (LWORK .LT. MAX(7,4*N+N*N,2*M+N))) .OR. + $ (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1))) + $ .OR. + $ (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX(7,2*M+N,4*N+1))) + $ .OR. + $ (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND. + $ (LWORK.LT.MAX(2*M+N,6*N+2*N*N))) + $ .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND. + $ LWORK.LT.MAX(2*M+N,4*N+N*N,2*N+N*N+6))) + $ THEN + INFO = - 17 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'SGEJSV', - INFO ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:3) = 0 + WORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure SLAMCH() does not fail on the target architecture. +* + EPSLN = SLAMCH('Epsilon') + SFMIN = SLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = SLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / SQRT(FLOAT(M)*FLOAT(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL SLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'SGEJSV', -INFO ) + RETURN + END IF + AAQQ = SQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL SSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL SLASET( 'G', M, N1, ZERO, ONE, U, LDU ) + IF ( RSVEC ) CALL SLASET( 'G', N, N, ZERO, ONE, V, LDV ) + WORK(1) = ONE + WORK(2) = ONE + IF ( ERREST ) WORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = ONE + WORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + WORK(6) = ZERO + WORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL SLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL SLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL SGEQRF( M, N, U,LDU, WORK, WORK(N+1),LWORK-N,IERR ) + CALL SORGQR( M,N1,1, U,LDU,WORK,WORK(N+1),LWORK-N,IERR ) + CALL SCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = ONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + WORK(1) = ONE / SCALEM + WORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IF ( ERREST ) WORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = ONE + WORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + WORK(6) = ZERO + WORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. + L2TRAN = L2TRAN .AND. ( M .EQ. N ) +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL SLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* SLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + WORK(M+N+p) = XSC * SCALEM + WORK(N+p) = XSC * (SCALEM*SQRT(TEMP1)) + AATMAX = MAX( AATMAX, WORK(N+p) ) + IF (WORK(N+p) .NE. ZERO) AATMIN = MIN(AATMIN,WORK(N+p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + WORK(M+N+p) = SCALEM*ABS( A(p,ISAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, WORK(M+N+p) ) + AATMIN = MIN( AATMIN, WORK(M+N+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^t would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL SLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * ALOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / ALOG(FLOAT(N)) +* +* Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex. +* It is derived from the diagonal of A^t * A. Do the same with the +* diagonal of A * A^t, compute the entropy of the corresponding +* probability distribution. Note that A * A^t and A^t * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = N+1, N+M + BIG1 = ( ( WORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * ALOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / ALOG(FLOAT(M)) +* +* Analyze the entropies and decide A or A^t. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^t is better than A, transpose A. +* + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + DO 1116 q = p + 1, N + TEMP1 = A(q,p) + A(q,p) = A(p,q) + A(p,q) = TEMP1 + 1116 CONTINUE + 1115 CONTINUE + DO 1117 p = 1, N + WORK(M+N+p) = SVA(p) + SVA(p) = WORK(N+p) + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than SQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep +* SQRT(BIG) instead of BIG is the fact that SGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then SGESVJ will compute them. So, in that case, +* one should use SGESVJ instead of SGEJSV. +* + BIG1 = SQRT( BIG ) + TEMP1 = SQRT( BIG / FLOAT(N) ) +* + CALL SLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL SLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). + XSC = SQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using SGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL SLASET( 'A', M, 1, ZERO, ZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + DO 1952 p = 1, M - 1 + q = ISAMAX( M-p+1, WORK(M+N+p), 1 ) + p - 1 + IWORK(2*N+p) = q + IF ( p .NE. q ) THEN + TEMP1 = WORK(M+N+p) + WORK(M+N+p) = WORK(M+N+q) + WORK(M+N+q) = TEMP1 + END IF + 1952 CONTINUE + CALL SLASWP( N, A, LDA, 1, M-1, IWORK(2*N+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use SGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in SGEQPX from TOMS # 782). Good results will be obtained using +* SGEQPX with properly (!) chosen numerical parameters. +* Any improvement of SGEQP3 improves overal performance of SGEJSV. +* +* A * P1 = Q1 * [ R1^t 0]^t: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL SGEQP3( M,N,A,LDA, IWORK,WORK, WORK(N+1),LWORK-N, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then SGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = SQRT(FLOAT(N))*EPSLN + DO 3001 p = 2, N + IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-deficient. + TEMP1 = SQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. + $ ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = SQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - FLOAT(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL SLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL SSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + CALL SPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ WORK(N+1), IWORK(2*N+M+1), IERR ) + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL SLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL SSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL SPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ WORK(N+1), IWORK(2*N+M+1), IERR ) + ELSE + CALL SLACPY( 'U', N, N, A, LDA, WORK(N+1), N ) + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL SSCAL( p, ONE/TEMP1, WORK(N+(p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. + CALL SPOCON( 'U', N, WORK(N+1), N, ONE, TEMP1, + $ WORK(N+N*N+1), IWORK(2*N+M+1), IERR ) + END IF + SCONDA = ONE / SQRT(TEMP1) +* SCONDA is an estimate of SQRT(||(R^t * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL SCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + 1946 CONTINUE +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / FLOAT(N) + DO 4947 q = 1, NR + TEMP1 = XSC*ABS(A(q,q)) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) + $ A(p,q) = SIGN( TEMP1, A(p,q) ) + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL SLASET( 'U', NR-1,NR-1, ZERO,ZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL SGEQRF( N,NR, A,LDA, WORK, WORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL SCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / FLOAT(N) + DO 1947 q = 1, NR + TEMP1 = XSC*ABS(A(q,q)) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) + $ A(p,q) = SIGN( TEMP1, A(p,q) ) + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL SLASET( 'U', NR-1, NR-1, ZERO, ZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL SGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA, + $ N, V, LDV, WORK, LWORK, INFO ) +* + SCALEM = WORK(1) + NUMRANK = NINT(WORK(2)) +* +* + ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL SCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 1998 CONTINUE + CALL SLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) +* + CALL SGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA, + $ WORK, LWORK, INFO ) + SCALEM = WORK(1) + NUMRANK = NINT(WORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL SLASET( 'Lower', NR-1, NR-1, ZERO, ZERO, A(2,1), LDA ) + CALL SGELQF( NR, N, A, LDA, WORK, WORK(N+1), LWORK-N, IERR) + CALL SLACPY( 'Lower', NR, NR, A, LDA, V, LDV ) + CALL SLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + CALL SGEQRF( NR, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL SCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + 8998 CONTINUE + CALL SLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) +* + CALL SGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, WORK(N+1), LWORK-N, INFO ) + SCALEM = WORK(N+1) + NUMRANK = NINT(WORK(N+2)) + IF ( NR .LT. N ) THEN + CALL SLASET( 'A',N-NR, NR, ZERO,ZERO, V(NR+1,1), LDV ) + CALL SLASET( 'A',NR, N-NR, ZERO,ZERO, V(1,NR+1), LDV ) + CALL SLASET( 'A',N-NR,N-NR,ZERO,ONE, V(NR+1,NR+1), LDV ) + END IF +* + CALL SORMLQ( 'Left', 'Transpose', N, N, NR, A, LDA, WORK, + $ V, LDV, WORK(N+1), LWORK-N, IERR ) +* + END IF +* + DO 8991 p = 1, N + CALL SCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) + 8991 CONTINUE + CALL SLACPY( 'All', N, N, A, LDA, V, LDV ) +* + IF ( TRANSP ) THEN + CALL SLACPY( 'All', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL SCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + 1965 CONTINUE + CALL SLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) +* + CALL SGEQRF( N, NR, U, LDU, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL SCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + 1967 CONTINUE + CALL SLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) +* + CALL SGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, WORK(N+1), LWORK-N, INFO ) + SCALEM = WORK(N+1) + NUMRANK = NINT(WORK(N+2)) +* + IF ( NR .LT. M ) THEN + CALL SLASET( 'A', M-NR, NR,ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL SLASET( 'A',NR, N1-NR, ZERO, ZERO, U(1,NR+1), LDU ) + CALL SLASET( 'A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1), LDU ) + END IF + END IF +* + CALL SORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / SNRM2( M, U(1,p), 1 ) + CALL SSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL SLACPY( 'All', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of SGEJSV. +* + DO 1968 p = 1, NR + CALL SCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 2969 q = 1, NR + TEMP1 = XSC*ABS( V(q,q) ) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) + $ V(p,q) = SIGN( TEMP1, V(p,q) ) + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL SLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL SLACPY( 'L', NR, NR, V, LDV, WORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = SNRM2(NR-p+1,WORK(2*N+(p-1)*NR+p),1) + CALL SSCAL(NR-p+1,ONE/TEMP1,WORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL SPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1, + $ WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR) + CONDR1 = ONE / SQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. FLOAT(N) +* more conservative <=> CONDR1 .LT. SQRT(FLOAT(N)) +* + COND_OK = SQRT(FLOAT(NR)) +*[TP] COND_OK is a tuning parameter. + + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^t = Q2 * R2 + CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + TEMP1 = XSC * MIN(ABS(V(p,p)),ABS(V(q,q))) + IF ( ABS(V(q,p)) .LE. TEMP1 ) + $ V(q,p) = SIGN( TEMP1, V(q,p) ) + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL SLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL SCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + 1969 CONTINUE +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to SGEQP3 +* should be replaced with eg. CALL SGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^t * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL SGEQP3( N, NR, V, LDV, IWORK(N+1), WORK(N+1), + $ WORK(2*N+1), LWORK-2*N, IERR ) +** CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + TEMP1 = XSC * MIN(ABS(V(p,p)),ABS(V(q,q))) + IF ( ABS(V(q,p)) .LE. TEMP1 ) + $ V(q,p) = SIGN( TEMP1, V(q,p) ) + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL SLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + TEMP1 = XSC * MIN(ABS(V(p,p)),ABS(V(q,q))) + V(p,q) = - SIGN( TEMP1, V(q,p) ) + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL SLASET( 'L',NR-1,NR-1,ZERO,ZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL SGELQF( NR, NR, V, LDV, WORK(2*N+N*NR+1), + $ WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL SLACPY( 'L',NR,NR,V,LDV,WORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = SNRM2( p, WORK(2*N+N*NR+NR+p), NR ) + CALL SSCAL( p, ONE/TEMP1, WORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL SPOCON( 'L',NR,WORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR ) + CONDR2 = ONE / SQRT(TEMP1) +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL SLACPY( 'U', NR, NR, V, LDV, WORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 4968 q = 2, NR + TEMP1 = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - SIGN( TEMP1, V(q,p) ) + V(p,q) = - SIGN( TEMP1, V(p,q) ) + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL SLASET( 'U', NR-1,NR-1, ZERO,ZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL SGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, + $ LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = NINT(WORK(2*N+N*NR+NR+2)) + DO 3970 p = 1, NR + CALL SCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL SSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in SGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL STRSM( 'L','U','N','N', NR,NR,ONE, A,LDA, V,LDV ) + ELSE +* .. R1 is well conditioned, but non-square. Transpose(R2) +* is inverted to get the product of the Jacobi rotations +* used in SGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL STRSM('L','U','T','N',NR,NR,ONE,WORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL SLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV) + CALL SLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV) + CALL SLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV) + END IF + CALL SORMQR('L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* :) .. the input matrix A is very likely a relative of +* the Kahan matrix :) +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^T*V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL SGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = NINT(WORK(2*N+N*NR+NR+2)) + DO 3870 p = 1, NR + CALL SCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL SSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL STRSM('L','U','N','N',NR,NR,ONE,WORK(2*N+1),N,U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = WORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL SLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL SLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL SLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + CALL SORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that SGEJSV completes the task. +* Compute the full SVD of L3 using SGESVJ with explicit +* accumulation of Jacobi rotations. + CALL SGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO ) + SCALEM = WORK(2*N+N*NR+NR+1) + NUMRANK = NINT(WORK(2*N+N*NR+NR+2)) + IF ( NR .LT. N ) THEN + CALL SLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL SLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL SLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + CALL SORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL SORMLQ( 'L', 'T', NR, NR, NR, WORK(2*N+1), N, + $ WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = WORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(FLOAT(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = WORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / SNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL SSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL SLASET( 'A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL SLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU) + CALL SLASET('A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL SORMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = SQRT(FLOAT(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / SNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL SSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL SLACPY( 'Upper', N, N, A, LDA, WORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 5970 p = 2, N + TEMP1 = XSC * WORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 + WORK(N+(q-1)*N+p)=-SIGN(TEMP1,WORK(N+(p-1)*N+q)) + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL SLASET( 'Lower',N-1,N-1,ZERO,ZERO,WORK(N+2),N ) + END IF +* + CALL SGESVJ( 'Upper', 'U', 'N', N, N, WORK(N+1), N, SVA, + $ N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO ) +* + SCALEM = WORK(N+N*N+1) + NUMRANK = NINT(WORK(N+N*N+2)) + DO 6970 p = 1, N + CALL SCOPY( N, WORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL SSCAL( N, SVA(p), WORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL STRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N, + $ ONE, A, LDA, WORK(N+1), N ) + DO 6972 p = 1, N + CALL SCOPY( N, WORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = SQRT(FLOAT(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / SNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL SSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL SLASET( 'A', M-N, N, ZERO, ZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL SLASET( 'A',N, N1-N, ZERO, ZERO, U(1,N+1),LDU ) + CALL SLASET( 'A',M-N,N1-N, ZERO, ONE,U(N+1,N+1),LDU ) + END IF + END IF + CALL SORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) + TEMP1 = SQRT(FLOAT(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / SNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL SSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values. Since that is not always the case, ... +* + DO 7968 p = 1, NR + CALL SCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + TEMP1 = XSC*ABS( V(q,q) ) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) + $ V(p,q) = SIGN( TEMP1, V(p,q) ) + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL SLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV ) + END IF + + CALL SGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL SLACPY( 'L', N, NR, V, LDV, WORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL SCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + TEMP1 = XSC * MIN(ABS(U(p,p)),ABS(U(q,q))) + U(p,q) = - SIGN( TEMP1, U(q,p) ) + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL SLASET('U', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU ) + END IF + + CALL SGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO ) + SCALEM = WORK(2*N+N*NR+1) + NUMRANK = NINT(WORK(2*N+N*NR+2)) + + IF ( NR .LT. N ) THEN + CALL SLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV ) + CALL SLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV ) + CALL SLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV ) + END IF + + CALL SORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1), + $ V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(FLOAT(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = WORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / SNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL SSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL SLASET( 'A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL SLASET( 'A',NR, N1-NR, ZERO, ZERO, U(1,NR+1),LDU ) + CALL SLASET( 'A',M-NR,N1-NR, ZERO, ONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL SORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U, + $ LDU, WORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL SLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^t + DO 6974 p = 1, N + CALL SSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL SLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + WORK(1) = USCAL2 * SCALEM + WORK(2) = USCAL1 + IF ( ERREST ) WORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + WORK(4) = CONDR1 + WORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + WORK(6) = ENTRA + WORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING +* + RETURN +* .. +* .. END OF SGEJSV +* .. + END +* diff --git a/dspl/liblapack/SRC/sgelq.f b/dspl/liblapack/SRC/sgelq.f new file mode 100644 index 0000000..4fe4d19 --- /dev/null +++ b/dspl/liblapack/SRC/sgelq.f @@ -0,0 +1,305 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SGELQ computes a LQ factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-min(M,N) lower trapezoidal matrix L +*> (L is lower triangular if M <= N); +*> the elements above the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any LQ factorization algorithm they wish. The triangular +*> (trapezoidal) L has to be stored in the lower part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> SLASWLQ or SGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, SGELQ will use either +*> SLASWLQ (if the matrix is short-and-wide) or SGELQT to compute +*> the LQ factorization. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGELQT, SLASWLQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'SGELQ ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'SGELQ ', ' ', M, N, 2, -1 ) + ELSE + MB = 1 + NB = N + END IF + IF( MB.GT.MIN( M, N ) .OR. MB.LT.1 ) MB = 1 + IF( NB.GT.N .OR. NB.LE.M ) NB = N + MINTSZ = M + 5 + IF ( NB.GT.M .AND. N.GT.M ) THEN + IF( MOD( N - M, NB - M ).EQ.0 ) THEN + NBLCKS = ( N - M ) / ( NB - M ) + ELSE + NBLCKS = ( N - M ) / ( NB - M ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M ) + $ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + MB = 1 + NB = N + END IF + IF( LWORK.LT.MB*M ) THEN + LMINWS = .TRUE. + MB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = MB*M*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, MB*M ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN + CALL SGELQT( M, N, MB, A, LDA, T( 6 ), MB, WORK, INFO ) + ELSE + CALL SLASWLQ( M, N, MB, NB, A, LDA, T( 6 ), MB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, MB*M ) + RETURN +* +* End of SGELQ +* + END diff --git a/dspl/liblapack/SRC/sgelq2.f b/dspl/liblapack/SRC/sgelq2.f new file mode 100644 index 0000000..5b1ad21 --- /dev/null +++ b/dspl/liblapack/SRC/sgelq2.f @@ -0,0 +1,192 @@ +*> \brief \b SGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELQ2 computes an LQ factorization of a real m by n matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m by min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGELQ2 +* + END diff --git a/dspl/liblapack/SRC/sgelqf.f b/dspl/liblapack/SRC/sgelqf.f new file mode 100644 index 0000000..99c03c0 --- /dev/null +++ b/dspl/liblapack/SRC/sgelqf.f @@ -0,0 +1,269 @@ +*> \brief \b SGELQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELQF computes an LQ factorization of a real M-by-N matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGELQ2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGELQF +* + END diff --git a/dspl/liblapack/SRC/sgelqt.f b/dspl/liblapack/SRC/sgelqt.f new file mode 100644 index 0000000..9a93af3 --- /dev/null +++ b/dspl/liblapack/SRC/sgelqt.f @@ -0,0 +1,193 @@ +* Definition: +* =========== +* +* SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQT computes a blocked LQ factorization of a real M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is +*> lower triangular if M <= N); the elements above the diagonal +*> are the rows of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K +* .. +* .. External Subroutines .. + EXTERNAL SGEQRT2, SGEQRT3, SGELQT3, SLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, MB + IB = MIN( K-I+1, MB ) +* +* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) +* + CALL SGELQT3( IB, N-I+1, A(I,I), LDA, T(1,I), LDT, IINFO ) + IF( I+IB.LE.M ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the right +* + CALL SLARFB( 'R', 'N', 'F', 'R', M-I-IB+1, N-I+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, WORK , M-I-IB+1 ) + END IF + END DO + RETURN +* +* End of SGELQT +* + END diff --git a/dspl/liblapack/SRC/sgelqt3.f b/dspl/liblapack/SRC/sgelqt3.f new file mode 100644 index 0000000..292ae88 --- /dev/null +++ b/dspl/liblapack/SRC/sgelqt3.f @@ -0,0 +1,242 @@ +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQT3 recursively computes a LQ factorization of a real M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M =< N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> below the diagonal contain the N-by-N lower triangular matrix L; the +*> elements above the diagonal are the rows of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE SGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+00 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, M1, M2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, STRMM, SGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( N .LT. M ) THEN + INFO = -2 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, M ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELQT3', -INFO ) + RETURN + END IF +* + IF( M.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL SLARFG( N, A, A( 1, MIN( 2, N ) ), LDA, T ) +* + ELSE +* +* Otherwise, split A into blocks... +* + M1 = M/2 + M2 = M-M1 + I1 = MIN( M1+1, M ) + J1 = MIN( M+1, N ) +* +* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL SGELQT3( M1, N, A, LDA, T, LDT, IINFO ) +* +* Compute A(J1:M,1:N) = Q1^H A(J1:M,1:N) [workspace: T(1:N1,J1:N)] +* + DO I=1,M2 + DO J=1,M1 + T( I+M1, J ) = A( I+M1, J ) + END DO + END DO + CALL STRMM( 'R', 'U', 'T', 'U', M2, M1, ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + CALL SGEMM( 'N', 'T', M2, M1, N-M1, ONE, A( I1, I1 ), LDA, + & A( 1, I1 ), LDA, ONE, T( I1, 1 ), LDT) +* + CALL STRMM( 'R', 'U', 'N', 'N', M2, M1, ONE, + & T, LDT, T( I1, 1 ), LDT ) +* + CALL SGEMM( 'N', 'N', M2, N-M1, M1, -ONE, T( I1, 1 ), LDT, + & A( 1, I1 ), LDA, ONE, A( I1, I1 ), LDA ) +* + CALL STRMM( 'R', 'U', 'N', 'U', M2, M1 , ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + DO I=1,M2 + DO J=1,M1 + A( I+M1, J ) = A( I+M1, J ) - T( I+M1, J ) + T( I+M1, J )=0 + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL SGELQT3( M2, N-M1, A( I1, I1 ), LDA, + & T( I1, I1 ), LDT, IINFO ) +* +* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,M2 + DO J=1,M1 + T( J, I+M1 ) = (A( J, I+M1 )) + END DO + END DO +* + CALL STRMM( 'R', 'U', 'T', 'U', M1, M2, ONE, + & A( I1, I1 ), LDA, T( 1, I1 ), LDT ) +* + CALL SGEMM( 'N', 'T', M1, M2, N-M, ONE, A( 1, J1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, I1 ), LDT ) +* + CALL STRMM( 'L', 'U', 'N', 'N', M1, M2, -ONE, T, LDT, + & T( 1, I1 ), LDT ) +* + CALL STRMM( 'R', 'U', 'N', 'N', M1, M2, ONE, + & T( I1, I1 ), LDT, T( 1, I1 ), LDT ) +* +* +* +* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] +* [ A(1:N1,J1:N) L2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of SGELQT3 +* + END diff --git a/dspl/liblapack/SRC/sgels.f b/dspl/liblapack/SRC/sgels.f new file mode 100644 index 0000000..ee50f30 --- /dev/null +++ b/dspl/liblapack/SRC/sgels.f @@ -0,0 +1,504 @@ +*> \brief SGELS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELS solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, or its transpose, using a QR or LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an underdetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by SGEQRF; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by SGELQF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of +*> elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + REAL ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + REAL RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGELQF, SGEQRF, SLABAD, SLASCL, SLASET, SORMLQ, + $ SORMQR, STRTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN + MAX( MN, NRHS ) ) .AND. + $ .NOT.LQUERY ) THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB ) + WORK( 1 ) = REAL( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL SGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL SORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* Underdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL STRTRS( 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL SORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL SGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL SORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL SORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( WSIZE ) +* + RETURN +* +* End of SGELS +* + END diff --git a/dspl/liblapack/SRC/sgelsd.f b/dspl/liblapack/SRC/sgelsd.f new file mode 100644 index 0000000..9a18961 --- /dev/null +++ b/dspl/liblapack/SRC/sgelsd.f @@ -0,0 +1,633 @@ +*> \brief SGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, +* RANK, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELSD computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize 2-norm(| b - A*x |) +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The problem is solved in three steps: +*> (1) Reduce the coefficient matrix A to bidiagonal form with +*> Householder transformations, reducing the original problem +*> into a "bidiagonal least squares problem" (BLS) +*> (2) Solve the BLS using a divide and conquer approach. +*> (3) Apply back all the Householder transformations to solve +*> the original least squares problem. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution +*> matrix X. If m >= n and RANK = n, the residual +*> sum-of-squares for the solution in the i-th column is given +*> by the sum of squares of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,max(M,N)). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK must be at least 1. +*> The exact minimum amount of workspace needed depends on M, +*> N and NRHS. As long as LWORK is at least +*> 12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2, +*> if M is greater than or equal to N or +*> 12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2, +*> if M is less than N, the code will execute correctly. +*> SMLSIZ is returned by ILAENV and is equal to the maximum +*> size of the subproblems at the bottom of the computation +*> tree (usually about 25), and +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the array WORK and the +*> minimum size of the array IWORK, and returns these values as +*> the first entries of the WORK and IWORK arrays, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), +*> where MINMN = MIN( M,N ). +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEsolve +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, + $ RANK, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, MAXMN, MAXWRK, MINMN, MINWRK, + $ MM, MNTHR, NLVL, NWORK, SMLSIZ, WLALSD + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL SGEBRD, SGELQF, SGEQRF, SLABAD, SLACPY, SLALSD, + $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE, ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + LIWORK = 1 + IF( MINMN.GT.0 ) THEN + SMLSIZ = ILAENV( 9, 'SGELSD', ' ', 0, 0, 0, 0 ) + MNTHR = ILAENV( 6, 'SGELSD', ' ', M, N, NRHS, -1 ) + NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ + 1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'SGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'SORMQR', 'LT', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MAXWRK = MAX( MAXWRK, 3*N + ( MM + N )*ILAENV( 1, + $ 'SGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*N + ( N - 1 )*ILAENV( 1, + $ 'SORMBR', 'PLN', N, NRHS, N, -1 ) ) + WLALSD = 9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + + $ ( SMLSIZ + 1 )**2 + MAXWRK = MAX( MAXWRK, 3*N + WLALSD ) + MINWRK = MAX( 3*N + MM, 3*N + NRHS, 3*N + WLALSD ) + END IF + IF( N.GT.M ) THEN + WLALSD = 9*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + + $ ( SMLSIZ + 1 )**2 + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'SGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'SORMBR', 'QLT', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + ( M - 1 )*ILAENV( 1, + $ 'SORMBR', 'PLN', M, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + NRHS*ILAENV( 1, 'SORMLQ', + $ 'LT', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + WLALSD ) +! XXX: Ensure the Path 2a case below is triggered. The workspace +! calculation should use queries for all routines eventually. + MAXWRK = MAX( MAXWRK, + $ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) ) + ELSE +* +* Path 2 - remaining underdetermined cases. +* + MAXWRK = 3*M + ( N + M )*ILAENV( 1, 'SGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 3*M + NRHS*ILAENV( 1, 'SORMBR', + $ 'QLT', M, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + M*ILAENV( 1, 'SORMBR', + $ 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 3*M + WLALSD ) + END IF + MINWRK = MAX( 3*M + NRHS, 3*M + M, 3*M + WLALSD ) + END IF + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure certain entries of B are zero. +* + IF( M.LT.N ) + $ CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns. +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A. +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M, WLALSD ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS, 4*M+M*LDA+WLALSD ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL). +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL SORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A. +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL SLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL SORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RETURN +* +* End of SGELSD +* + END diff --git a/dspl/liblapack/SRC/sgelss.f b/dspl/liblapack/SRC/sgelss.f new file mode 100644 index 0000000..29380d4 --- /dev/null +++ b/dspl/liblapack/SRC/sgelss.f @@ -0,0 +1,743 @@ +*> \brief SGELSS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELSS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELSS computes the minimum norm solution to a real linear least +*> squares problem: +*> +*> Minimize 2-norm(| b - A*x |). +*> +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +*> X. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the first min(m,n) rows of A are overwritten with +*> its right singular vectors, stored rowwise. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution +*> matrix X. If m >= n and RANK = n, the residual +*> sum-of-squares for the solution in the i-th column is given +*> by the sum of squares of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,max(M,N)). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1, and also: +*> LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + INTEGER LWORK_SGEQRF, LWORK_SORMQR, LWORK_SGEBRD, + $ LWORK_SORMBR, LWORK_SORGBR, LWORK_SORMLQ + REAL ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV, + $ SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR, + $ SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'SGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* +* Compute space needed for SGEQRF + CALL SGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) + LWORK_SGEQRF=DUM(1) +* Compute space needed for SORMQR + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, DUM(1), B, + $ LDB, DUM(1), -1, INFO ) + LWORK_SORMQR=DUM(1) + MM = N + MAXWRK = MAX( MAXWRK, N + LWORK_SGEQRF ) + MAXWRK = MAX( MAXWRK, N + LWORK_SORMQR ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*N ) +* Compute space needed for SGEBRD + CALL SGEBRD( MM, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_SGEBRD=DUM(1) +* Compute space needed for SORMBR + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_SORMBR=DUM(1) +* Compute space needed for SORGBR + CALL SORGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_SORGBR=DUM(1) +* Compute total workspace needed + MAXWRK = MAX( MAXWRK, 3*N + LWORK_SGEBRD ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_SORMBR ) + MAXWRK = MAX( MAXWRK, 3*N + LWORK_SORGBR ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = MAX( 3*N + MM, 3*N + NRHS, BDSPAC ) + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + IF( N.GT.M ) THEN +* +* Compute workspace needed for SBDSQR +* + BDSPAC = MAX( 1, 5*M ) + MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Compute space needed for SGEBRD + CALL SGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_SGEBRD=DUM(1) +* Compute space needed for SORMBR + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_SORMBR=DUM(1) +* Compute space needed for SORGBR + CALL SORGBR( 'P', M, M, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_SORGBR=DUM(1) +* Compute space needed for SORMLQ + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_SORMLQ=DUM(1) +* Compute total workspace needed + MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_SGEBRD ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_SORMBR ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + LWORK_SORGBR ) + MAXWRK = MAX( MAXWRK, M*M + M + BDSPAC ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + LWORK_SORMLQ ) + ELSE +* +* Path 2 - underdetermined +* +* Compute space needed for SGEBRD + CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, INFO ) + LWORK_SGEBRD=DUM(1) +* Compute space needed for SORMBR + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_SORMBR=DUM(1) +* Compute space needed for SORGBR + CALL SORGBR( 'P', M, N, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_SORGBR=DUM(1) + MAXWRK = 3*M + LWORK_SGEBRD + MAXWRK = MAX( MAXWRK, 3*M + LWORK_SORMBR ) + MAXWRK = MAX( MAXWRK, 3*M + LWORK_SORGBR ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = SLAMCH( 'P' ) + SFMIN = SLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (Workspace: need N+NRHS, prefer N+NRHS*NB) +* + CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + END IF +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB) +* + CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = IL + LDWORK*M + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of L in WORK(IL) and +* multiplying B by transpose of left singular vectors +* (Workspace: need M*M+M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IE +* +* Multiply B by right singular vectors of L in WORK(IL) +* (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK, + $ B, LDB, ZERO, WORK( IWORK ), LDB ) + CALL SLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, ZERO, WORK( IWORK ), M ) + CALL SLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, ZERO, WORK( IWORK ), 1 ) + CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (Workspace: need M+NRHS, prefer M+NRHS*NB) +* + CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB) +* + CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, DUM, + $ 1, B, LDB, WORK( IWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (Workspace: need N, prefer N*NRHS) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL SGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO, + $ WORK, LDB ) + CALL SLACPY( 'F', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL SGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ), + $ LDB, ZERO, WORK, N ) + CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) + CALL SCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGELSS +* + END diff --git a/dspl/liblapack/SRC/sgelsy.f b/dspl/liblapack/SRC/sgelsy.f new file mode 100644 index 0000000..58bd7c8 --- /dev/null +++ b/dspl/liblapack/SRC/sgelsy.f @@ -0,0 +1,479 @@ +*> \brief SGELSY solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGELSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGELSY computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by orthogonal transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**T [ inv(T11)*Q1**T*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> +*> This routine is basically identical to the original xGELSX except +*> three differences: +*> o The call to the subroutine xGEQPF has been substituted by the +*> the call to the subroutine xGEQP3. This subroutine is a Blas-3 +*> version of the QR factorization with column pivoting. +*> o Matrix B (the right hand side) is updated with Blas-3. +*> o The permutation of matrix B (the right hand side) is faster and +*> more simple. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of AP, otherwise column i is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of AP +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> The unblocked strategy requires that: +*> LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), +*> where MN = min( M, N ). +*> The block algorithm requires that: +*> LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), +*> where NB is an upper bound on the blocksize returned +*> by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, +*> and SORMRZ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEsolve +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n +*> E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> +* ===================================================================== + SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKMIN, + $ LWKOPT, MN, NB, NB1, NB2, NB3, NB4 + REAL ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX, + $ SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL ILAENV, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET, + $ SORMQR, SORMRZ, STRSM, STZRZF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 ) THEN + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = MN + MAX( 2*MN, N + 1, MN + NRHS ) + LWKOPT = MAX( LWKMIN, + $ MN + 2*N + NB*( N + 1 ), 2*MN + NB*NRHS ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL SGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, INFO ) + WSIZE = MN + WORK( MN+1 ) +* +* workspace: MN+2*N+NB*(N+1). +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = ONE + WORK( ISMAX ) = ONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL STZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL SORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ), + $ B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) ) +* +* workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, ONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL SORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A, + $ LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) + END IF +* +* workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL SCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGELSY +* + END diff --git a/dspl/liblapack/SRC/sgemlq.f b/dspl/liblapack/SRC/sgemlq.f new file mode 100644 index 0000000..dedbe77 --- /dev/null +++ b/dspl/liblapack/SRC/sgemlq.f @@ -0,0 +1,283 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by short wide LQ +*> factorization (SGELQ) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> Part of the data structure to represent Q as returned by DGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by SGELQ. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> SLASWLQ or SGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, SGELQ will use either +*> SLASWLQ (if the matrix is wide-and-short) or SGELQT to compute +*> the LQ factorization. +*> This version of SGEMLQ will use either SLAMSWLQ or SGEMLQT to +*> multiply matrix Q by another matrix. +*> Further Details in SLAMSWLQ or SGEMLQT. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAMSWLQ, SGEMLQT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * MB + MN = M + ELSE + LW = M * MB + MN = N + END IF +* + IF( ( NB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, NB - K ) .EQ. 0 ) THEN + NBLCKS = ( MN - K ) / ( NB - K ) + ELSE + NBLCKS = ( MN - K ) / ( NB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = REAL( LW ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( NB.LE.K ) .OR. ( NB.GE.MAX( M, N, K ) ) ) THEN + CALL SGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T( 6 ), MB, C, LDC, WORK, INFO ) + ELSE + CALL SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ MB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = REAL( LW ) +* + RETURN +* +* End of SGEMLQ +* + END diff --git a/dspl/liblapack/SRC/sgemlqt.f b/dspl/liblapack/SRC/sgemlqt.f new file mode 100644 index 0000000..a8f022b --- /dev/null +++ b/dspl/liblapack/SRC/sgemlqt.f @@ -0,0 +1,272 @@ +* Definition: +* =========== +* +* SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. +* REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGEMLQT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'T': Q**T C C Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**T +*> +*> generated using the compact WY representation as returned by DGELQT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DGELQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQT in the first K rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DGELQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array. The dimension of +*> WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE SGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. + REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0) THEN + INFO = -5 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL SLARFB( 'L', 'T', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL SLARFB( 'R', 'N', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL SLARFB( 'L', 'N', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL SLARFB( 'R', 'T', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of SGEMLQT +* + END diff --git a/dspl/liblapack/SRC/sgemqr.f b/dspl/liblapack/SRC/sgemqr.f new file mode 100644 index 0000000..307fc8c --- /dev/null +++ b/dspl/liblapack/SRC/sgemqr.f @@ -0,0 +1,285 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (SGEQR) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> Part of the data structure to represent Q as returned by SGEQR. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by SGEQR. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> SLATSQR or SGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, SGEQR will use either +*> SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute +*> the QR factorization. +*> This version of SGEMQR will use either SLAMTSQR or SGEMQRT to +*> multiply matrix Q by another matrix. +*> Further Details in SLAMTSQR or SGEMQRT. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMQRT, SLAMTSQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * NB + MN = M + ELSE + LW = MB * NB + MN = N + END IF +* + IF( ( MB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, MB - K ).EQ.0 ) THEN + NBLCKS = ( MN - K ) / ( MB - K ) + ELSE + NBLCKS = ( MN - K ) / ( MB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, MN ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( MB.LE.K ) .OR. ( MB.GE.MAX( M, N, K ) ) ) THEN + CALL SGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, INFO ) + ELSE + CALL SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of SGEMQR +* + END diff --git a/dspl/liblapack/SRC/sgemqrt.f b/dspl/liblapack/SRC/sgemqrt.f new file mode 100644 index 0000000..843151a --- /dev/null +++ b/dspl/liblapack/SRC/sgemqrt.f @@ -0,0 +1,291 @@ +*> \brief \b SGEMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. +* REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEMQRT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'T': Q**T C C Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**T +*> +*> generated using the compact WY representation as returned by SGEQRT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CGEQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (LDV,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRT in the first K columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CGEQRT, stored as a NB-by-N matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**T C, C Q**T or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. + REAL V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF, Q +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + Q = M + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + Q = N + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.Q ) THEN + INFO = -5 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, Q ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL SLARFB( 'L', 'T', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL SLARFB( 'R', 'N', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL SLARFB( 'L', 'N', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL SLARFB( 'R', 'T', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of SGEMQRT +* + END diff --git a/dspl/liblapack/SRC/sgeql2.f b/dspl/liblapack/SRC/sgeql2.f new file mode 100644 index 0000000..53009b9 --- /dev/null +++ b/dspl/liblapack/SRC/sgeql2.f @@ -0,0 +1,193 @@ +*> \brief \b SGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQL2 computes a QL factorization of a real m by n matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the m by n lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + CALL SLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL SLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ), + $ A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SGEQL2 +* + END diff --git a/dspl/liblapack/SRC/sgeqlf.f b/dspl/liblapack/SRC/sgeqlf.f new file mode 100644 index 0000000..ccf7a0a --- /dev/null +++ b/dspl/liblapack/SRC/sgeqlf.f @@ -0,0 +1,287 @@ +*> \brief \b SGEQLF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQLF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQLF computes a QL factorization of a real M-by-N matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the M-by-N lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQL2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'SGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL SGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL SGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQLF +* + END diff --git a/dspl/liblapack/SRC/sgeqp3.f b/dspl/liblapack/SRC/sgeqp3.f new file mode 100644 index 0000000..303a944 --- /dev/null +++ b/dspl/liblapack/SRC/sgeqp3.f @@ -0,0 +1,358 @@ +*> \brief \b SGEQP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQP3 computes a QR factorization with column pivoting of a +*> matrix A: A*P = Q*R using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper trapezoidal matrix R; the elements below +*> the diagonal, together with the array TAU, represent the +*> orthogonal matrix Q as a product of min(M,N) elementary +*> reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(J).ne.0, the J-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(J)=0, +*> the J-th column of A is a free column. +*> On exit, if JPVT(J)=K, then the J-th column of A*P was the +*> the K-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 3*N+1. +*> For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real/complex vector +*> with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +*> A(i+1:m,i), and tau in TAU(i). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +* ===================================================================== + SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SLAQP2, SLAQPS, SORMQR, SSWAP, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + REAL SNRM2 + EXTERNAL ILAENV, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* Test input arguments +* ==================== +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = 3*N + 1 + NB = ILAENV( INB, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = 2*N + ( N + 1 )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL SSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL SGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA, +*CC $ TAU, A( 1, NA+1 ), LDA, WORK, INFO ) + CALL SORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU, + $ A( 1, NA+1 ), LDA, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'SGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'SGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = 2*SN + ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = ( LWORK-2*SN ) / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'SGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + WORK( J ) = SNRM2( SM, A( NFXD+1, J ), 1 ) + WORK( N+J ) = WORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL SLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL SLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), WORK( J ), WORK( N+J ), + $ WORK( 2*N+1 ) ) +* + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQP3 +* + END diff --git a/dspl/liblapack/SRC/sgeqr.f b/dspl/liblapack/SRC/sgeqr.f new file mode 100644 index 0000000..f939abd --- /dev/null +++ b/dspl/liblapack/SRC/sgeqr.f @@ -0,0 +1,307 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SGEQR computes a QR factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R +*> (R is upper triangular if M >= N); +*> the elements below the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any QR factorization algorithm they wish. The triangular +*> (trapezoidal) R has to be stored in the upper part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> SLATSQR or SGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, SGEQR will use either +*> SLATSQR (if the matrix is tall-and-skinny) or SGEQRT to compute +*> the QR factorization. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLATSQR, SGEQRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'SGEQR ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'SGEQR ', ' ', M, N, 2, -1 ) + ELSE + MB = M + NB = 1 + END IF + IF( MB.GT.M .OR. MB.LE.N ) MB = M + IF( NB.GT.MIN( M, N ) .OR. NB.LT.1 ) NB = 1 + MINTSZ = N + 5 + IF ( MB.GT.N .AND. M.GT.N ) THEN + IF( MOD( M - N, MB - N ).EQ.0 ) THEN + NBLCKS = ( M - N ) / ( MB - N ) + ELSE + NBLCKS = ( M - N ) / ( MB - N ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) .OR. LWORK.LT.NB*N ) + $ .AND. ( LWORK.GE.N ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + NB = 1 + MB = M + END IF + IF( LWORK.LT.NB*N ) THEN + LMINWS = .TRUE. + NB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, N*NB ) ) .AND. ( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = NB*N*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, NB*N ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF( ( M.LE.N ) .OR. ( MB.LE.N ) .OR. ( MB.GE.M ) ) THEN + CALL SGEQRT( M, N, NB, A, LDA, T( 6 ), NB, WORK, INFO ) + ELSE + CALL SLATSQR( M, N, MB, NB, A, LDA, T( 6 ), NB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, NB*N ) +* + RETURN +* +* End of SGEQR +* + END diff --git a/dspl/liblapack/SRC/sgeqr2.f b/dspl/liblapack/SRC/sgeqr2.f new file mode 100644 index 0000000..3b990f8 --- /dev/null +++ b/dspl/liblapack/SRC/sgeqr2.f @@ -0,0 +1,192 @@ +*> \brief \b SGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQR2 computes a QR factorization of a real m by n matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGEQR2 +* + END diff --git a/dspl/liblapack/SRC/sgeqr2p.f b/dspl/liblapack/SRC/sgeqr2p.f new file mode 100644 index 0000000..f48af9d --- /dev/null +++ b/dspl/liblapack/SRC/sgeqr2p.f @@ -0,0 +1,195 @@ +*> \brief \b SGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagonal elements using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQR2P + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQR2P computes a QR factorization of a real m by n matrix A: +*> A = Q * R. The diagonal entries of R are nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are nonnegative; the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFGP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQR2P', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL SLARFGP( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(i:m,i+1:n) from the left +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + A( I, I ) = AII + END IF + 10 CONTINUE + RETURN +* +* End of SGEQR2P +* + END diff --git a/dspl/liblapack/SRC/sgeqrf.f b/dspl/liblapack/SRC/sgeqrf.f new file mode 100644 index 0000000..0f79c2c --- /dev/null +++ b/dspl/liblapack/SRC/sgeqrf.f @@ -0,0 +1,270 @@ +*> \brief \b SGEQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQRF computes a QR factorization of a real M-by-N matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQRF +* + END diff --git a/dspl/liblapack/SRC/sgeqrfp.f b/dspl/liblapack/SRC/sgeqrfp.f new file mode 100644 index 0000000..654c0a1 --- /dev/null +++ b/dspl/liblapack/SRC/sgeqrfp.f @@ -0,0 +1,273 @@ +*> \brief \b SGEQRFP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQRFP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQRFP computes a QR factorization of a real M-by-N matrix A: +*> A = Q * R. The diagonal entries of R are nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are nonnegative; the elements below the diagonal, +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SGEQR2P, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRFP', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL SGEQR2P( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'Transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL SGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGEQRFP +* + END diff --git a/dspl/liblapack/SRC/sgeqrt.f b/dspl/liblapack/SRC/sgeqrt.f new file mode 100644 index 0000000..f7c5817 --- /dev/null +++ b/dspl/liblapack/SRC/sgeqrt.f @@ -0,0 +1,218 @@ +*> \brief \b SGEQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQRT computes a blocked QR factorization of a real M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if M >= N); the elements below the diagonal +*> are the columns of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K + LOGICAL USE_RECURSIVE_QR + PARAMETER( USE_RECURSIVE_QR=.TRUE. ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRT2, SGEQRT3, SLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NB.LT.1 .OR. ( NB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block A(I:M,I:I+IB-1) +* + IF( USE_RECURSIVE_QR ) THEN + CALL SGEQRT3( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + ELSE + CALL SGEQRT2( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + END IF + IF( I+IB.LE.N ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the left +* + CALL SLARFB( 'L', 'T', 'F', 'C', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, WORK , N-I-IB+1 ) + END IF + END DO + RETURN +* +* End of SGEQRT +* + END diff --git a/dspl/liblapack/SRC/sgeqrt2.f b/dspl/liblapack/SRC/sgeqrt2.f new file mode 100644 index 0000000..349fd4b --- /dev/null +++ b/dspl/liblapack/SRC/sgeqrt2.f @@ -0,0 +1,227 @@ +*> \brief \b SGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQRT2 computes a QR factorization of a real M-by-N matrix A, +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII, ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SGEMV, SGER, STRMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRT2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO I = 1, K +* +* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(I:M,I+1:N) from the left +* + AII = A( I, I ) + A( I, I ) = ONE +* +* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] +* + CALL SGEMV( 'T',M-I+1, N-I, ONE, A( I, I+1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, N ), 1 ) +* +* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H +* + ALPHA = -(T( I, 1 )) + CALL SGER( M-I+1, N-I, ALPHA, A( I, I ), 1, + $ T( 1, N ), 1, A( I, I+1 ), LDA ) + A( I, I ) = AII + END IF + END DO +* + DO I = 2, N + AII = A( I, I ) + A( I, I ) = ONE +* +* T(1:I-1,I) := alpha * A(I:M,1:I-1)**T * A(I:M,I) +* + ALPHA = -T( I, 1 ) + CALL SGEMV( 'T', M-I+1, I-1, ALPHA, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, I ), 1 ) + A( I, I ) = AII +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL STRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1) = ZERO + END DO + +* +* End of SGEQRT2 +* + END diff --git a/dspl/liblapack/SRC/sgeqrt3.f b/dspl/liblapack/SRC/sgeqrt3.f new file mode 100644 index 0000000..8316c7f --- /dev/null +++ b/dspl/liblapack/SRC/sgeqrt3.f @@ -0,0 +1,257 @@ +*> \brief \b SGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE SGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGEQRT3 recursively computes a QR factorization of a real M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE SGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, N1, N2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, STRMM, SGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N .LT. 0 ) THEN + INFO = -2 + ELSE IF( M .LT. N ) THEN + INFO = -1 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGEQRT3', -INFO ) + RETURN + END IF +* + IF( N.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL SLARFG( M, A(1,1), A( MIN( 2, M ), 1 ), 1, T(1,1) ) +* + ELSE +* +* Otherwise, split A into blocks... +* + N1 = N/2 + N2 = N-N1 + J1 = MIN( N1+1, N ) + I1 = MIN( N+1, M ) +* +* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL SGEQRT3( M, N1, A, LDA, T, LDT, IINFO ) +* +* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] +* + DO J=1,N2 + DO I=1,N1 + T( I, J+N1 ) = A( I, J+N1 ) + END DO + END DO + CALL STRMM( 'L', 'L', 'T', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + CALL SGEMM( 'T', 'N', N1, N2, M-N1, ONE, A( J1, 1 ), LDA, + & A( J1, J1 ), LDA, ONE, T( 1, J1 ), LDT) +* + CALL STRMM( 'L', 'U', 'T', 'N', N1, N2, ONE, + & T, LDT, T( 1, J1 ), LDT ) +* + CALL SGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( J1, 1 ), LDA, + & T( 1, J1 ), LDT, ONE, A( J1, J1 ), LDA ) +* + CALL STRMM( 'L', 'L', 'N', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + DO J=1,N2 + DO I=1,N1 + A( I, J+N1 ) = A( I, J+N1 ) - T( I, J+N1 ) + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL SGEQRT3( M-N1, N2, A( J1, J1 ), LDA, + & T( J1, J1 ), LDT, IINFO ) +* +* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,N1 + DO J=1,N2 + T( I, J+N1 ) = (A( J+N1, I )) + END DO + END DO +* + CALL STRMM( 'R', 'L', 'N', 'U', N1, N2, ONE, + & A( J1, J1 ), LDA, T( 1, J1 ), LDT ) +* + CALL SGEMM( 'T', 'N', N1, N2, M-N, ONE, A( I1, 1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, J1 ), LDT ) +* + CALL STRMM( 'L', 'U', 'N', 'N', N1, N2, -ONE, T, LDT, + & T( 1, J1 ), LDT ) +* + CALL STRMM( 'R', 'U', 'N', 'N', N1, N2, ONE, + & T( J1, J1 ), LDT, T( 1, J1 ), LDT ) +* +* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] +* [ 0 R2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of SGEQRT3 +* + END diff --git a/dspl/liblapack/SRC/sgerfs.f b/dspl/liblapack/SRC/sgerfs.f new file mode 100644 index 0000000..aba761d --- /dev/null +++ b/dspl/liblapack/SRC/sgerfs.f @@ -0,0 +1,438 @@ +*> \brief \b SGERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGERFS improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates for +*> the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANST + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGETRS, SLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = ABS( X( K, J ) ) + DO 40 I = 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ), + $ N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SGERFS +* + END diff --git a/dspl/liblapack/SRC/sgerfsx.f b/dspl/liblapack/SRC/sgerfsx.f new file mode 100644 index 0000000..3f51889 --- /dev/null +++ b/dspl/liblapack/SRC/sgerfsx.f @@ -0,0 +1,731 @@ +*> \brief \b SGERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ), WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGERFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. +*> If R is accessed, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. +*> If C is accessed, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ), WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SGECON, SLA_GERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL SLAMCH, SLANGE, SLA_GERCOND + REAL SLAMCH, SLANGE, SLA_GERCOND + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = SLANGE( NORM, N, N, A, LDA, WORK ) + CALL SGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + IF ( NOTRAN ) THEN + CALL SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK(N+1), WORK(1), WORK(2*N+1), + $ WORK(1), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + ELSE + CALL SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK(N+1), WORK(1), WORK(2*N+1), + $ WORK(1), RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, + $ IGNORE_CWISE, INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = SLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ -1, C, INFO, WORK, IWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = SLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ -1, R, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = SLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ 0, R, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = SLA_GERCOND( TRANS, N, A, LDA, AF, LDAF, + $ IPIV, 1, X(1,J), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF +* + RETURN +* +* End of SGERFSX +* + END diff --git a/dspl/liblapack/SRC/sgerq2.f b/dspl/liblapack/SRC/sgerq2.f new file mode 100644 index 0000000..68f78c6 --- /dev/null +++ b/dspl/liblapack/SRC/sgerq2.f @@ -0,0 +1,193 @@ +*> \brief \b SGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGERQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGERQ2 computes an RQ factorization of a real m by n matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the m by n upper trapezoidal matrix R; the remaining +*> elements, with the array TAU, represent the orthogonal matrix +*> Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, K + REAL AII +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL SLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA, + $ TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + AII = A( M-K+I, N-K+I ) + A( M-K+I, N-K+I ) = ONE + CALL SLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SGERQ2 +* + END diff --git a/dspl/liblapack/SRC/sgerqf.f b/dspl/liblapack/SRC/sgerqf.f new file mode 100644 index 0000000..8b842cb --- /dev/null +++ b/dspl/liblapack/SRC/sgerqf.f @@ -0,0 +1,290 @@ +*> \brief \b SGERQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGERQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGERQF computes an RQ factorization of a real M-by-N matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; +*> the remaining elements, with the array TAU, represent the +*> orthogonal matrix Q as a product of min(m,n) elementary +*> reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL SGERQ2, SLARFB, SLARFT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL SGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL SLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL SGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of SGERQF +* + END diff --git a/dspl/liblapack/SRC/sgesc2.f b/dspl/liblapack/SRC/sgesc2.f new file mode 100644 index 0000000..c78daa3 --- /dev/null +++ b/dspl/liblapack/SRC/sgesc2.f @@ -0,0 +1,201 @@ +*> \brief \b SGESC2 solves a system of linear equations using the LU factorization with complete pivoting computed by sgetc2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* .. Scalar Arguments .. +* INTEGER LDA, N +* REAL SCALE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* REAL A( LDA, * ), RHS( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESC2 solves a system of linear equations +*> +*> A * X = scale* RHS +*> +*> with a general N-by-N matrix A using the LU factorization with +*> complete pivoting computed by SGETC2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix A computed by SGETC2: A = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is REAL array, dimension (N). +*> On entry, the right hand side vector b. +*> On exit, the solution vector X. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> 0 <= SCALE <= 1 to prevent owerflow in the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL A( LDA, * ), RHS( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL BIGNUM, EPS, SMLNUM, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLASWP, SSCAL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Set constant to control owerflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL SLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = ISAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = ( ONE / TWO ) / ABS( RHS( I ) ) + CALL SSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*TEMP + END IF +* + DO 40 I = N, 1, -1 + TEMP = ONE / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL SLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of SGESC2 +* + END diff --git a/dspl/liblapack/SRC/sgesdd.f b/dspl/liblapack/SRC/sgesdd.f new file mode 100644 index 0000000..0ba2a78 --- /dev/null +++ b/dspl/liblapack/SRC/sgesdd.f @@ -0,0 +1,1548 @@ +*> \brief \b SGESDD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESDD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESDD computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and right singular +*> vectors. If singular vectors are desired, it uses a +*> divide-and-conquer algorithm. +*> +*> The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns VT = V**T, not V. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U and all N rows of V**T are +*> returned in the arrays U and VT; +*> = 'S': the first min(M,N) columns of U and the first +*> min(M,N) rows of V**T are returned in the arrays U +*> and VT; +*> = 'O': If M >= N, the first N columns of U are overwritten +*> on the array A and all rows of V**T are returned in +*> the array VT; +*> otherwise, all columns of U are returned in the +*> array U and the first M rows of V**T are overwritten +*> in the array A; +*> = 'N': no columns of U or rows of V**T are computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBZ = 'O', A is overwritten with the first N columns +*> of U (the left singular vectors, stored +*> columnwise) if M >= N; +*> A is overwritten with the first M rows +*> of V**T (the right singular vectors, stored +*> rowwise) otherwise. +*> if JOBZ .ne. 'O', the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,UCOL) +*> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +*> UCOL = min(M,N) if JOBZ = 'S'. +*> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +*> orthogonal matrix U; +*> if JOBZ = 'S', U contains the first min(M,N) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,N) +*> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +*> N-by-N orthogonal matrix V**T; +*> if JOBZ = 'S', VT contains the first min(M,N) rows of +*> V**T (the right singular vectors, stored rowwise); +*> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; +*> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +*> if JOBZ = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> If LWORK = -1, a workspace query is assumed. The optimal +*> size for the WORK array is calculated and stored in WORK(1), +*> and no other work except argument checking is performed. +*> +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ). +*> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ). +*> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn. +*> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx. +*> These are not tight minimums in all cases; see comments inside code. +*> For good performance, LWORK should generally be larger; +*> a query is recommended. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: SBDSDC did not converge, updating process failed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL, + $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR, NWORK, WRKBL + INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM, + $ LWORK_SGEBRD_NN, LWORK_SGELQF_MN, + $ LWORK_SGEQRF_MN, + $ LWORK_SORGBR_P_MM, LWORK_SORGBR_Q_NN, + $ LWORK_SORGLQ_MN, LWORK_SORGLQ_NN, + $ LWORK_SORGQR_MM, LWORK_SORGQR_MN, + $ LWORK_SORMBR_PRT_MM, LWORK_SORMBR_QLN_MM, + $ LWORK_SORMBR_PRT_MN, LWORK_SORMBR_QLN_MN, + $ LWORK_SORMBR_PRT_NN, LWORK_SORMBR_QLN_NN + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSDC, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE, LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace allocated at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + BDSPAC = 0 + MNTHR = INT( MINMN*11.0E0 / 6.0E0 ) + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSDC +* + IF( WNTQN ) THEN +* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) +* keep 7*N for backwards compatability. + BDSPAC = 7*N + ELSE + BDSPAC = 3*N*N + 4*N + END IF +* +* Compute space preferred for each routine + CALL SGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD_MN = INT( DUM(1) ) +* + CALL SGEBRD( N, N, DUM(1), N, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD_NN = INT( DUM(1) ) +* + CALL SGEQRF( M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_SGEQRF_MN = INT( DUM(1) ) +* + CALL SORGBR( 'Q', N, N, N, DUM(1), N, DUM(1), DUM(1), -1, + $ IERR ) + LWORK_SORGBR_Q_NN = INT( DUM(1) ) +* + CALL SORGQR( M, M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGQR_MM = INT( DUM(1) ) +* + CALL SORGQR( M, N, N, DUM(1), M, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGQR_MN = INT( DUM(1) ) +* + CALL SORMBR( 'P', 'R', 'T', N, N, N, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_SORMBR_PRT_NN = INT( DUM(1) ) +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_SORMBR_QLN_NN = INT( DUM(1) ) +* + CALL SORMBR( 'Q', 'L', 'N', M, N, N, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_SORMBR_QLN_MN = INT( DUM(1) ) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_SORMBR_QLN_MM = INT( DUM(1) ) +* + IF( M.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* + WRKBL = N + LWORK_SGEQRF_MN + WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN ) + MAXWRK = MAX( WRKBL, BDSPAC + N ) + MINWRK = BDSPAC + N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* + WRKBL = N + LWORK_SGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + 2*N*N + MINWRK = BDSPAC + 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* + WRKBL = N + LWORK_SGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + N*N + MINWRK = BDSPAC + N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* + WRKBL = N + LWORK_SGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MM ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + N*N + MINWRK = N*N + MAX( 3*N + BDSPAC, N + M ) + END IF + ELSE +* +* Path 5 (M >= N, but not much larger) +* + WRKBL = 3*N + LWORK_SGEBRD_MN + IF( WNTQN ) THEN +* Path 5n (M >= N, jobz='N') + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQO ) THEN +* Path 5o (M >= N, jobz='O') + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MN ) + WRKBL = MAX( WRKBL, 3*N + BDSPAC ) + MAXWRK = WRKBL + M*N + MINWRK = 3*N + MAX( M, N*N + BDSPAC ) + ELSE IF( WNTQS ) THEN +* Path 5s (M >= N, jobz='S') + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MN ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + ELSE IF( WNTQA ) THEN +* Path 5a (M >= N, jobz='A') + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*N + BDSPAC ) + MINWRK = 3*N + MAX( M, BDSPAC ) + END IF + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSDC +* + IF( WNTQN ) THEN +* sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6) +* keep 7*N for backwards compatability. + BDSPAC = 7*M + ELSE + BDSPAC = 3*M*M + 4*M + END IF +* +* Compute space preferred for each routine + CALL SGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD_MN = INT( DUM(1) ) +* + CALL SGEBRD( M, M, A, M, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD_MM = INT( DUM(1) ) +* + CALL SGELQF( M, N, A, M, DUM(1), DUM(1), -1, IERR ) + LWORK_SGELQF_MN = INT( DUM(1) ) +* + CALL SORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGLQ_NN = INT( DUM(1) ) +* + CALL SORGLQ( M, N, M, A, M, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGLQ_MN = INT( DUM(1) ) +* + CALL SORGBR( 'P', M, M, M, A, N, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGBR_P_MM = INT( DUM(1) ) +* + CALL SORMBR( 'P', 'R', 'T', M, M, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_SORMBR_PRT_MM = INT( DUM(1) ) +* + CALL SORMBR( 'P', 'R', 'T', M, N, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_SORMBR_PRT_MN = INT( DUM(1) ) +* + CALL SORMBR( 'P', 'R', 'T', N, N, M, DUM(1), N, + $ DUM(1), DUM(1), N, DUM(1), -1, IERR ) + LWORK_SORMBR_PRT_NN = INT( DUM(1) ) +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, DUM(1), M, + $ DUM(1), DUM(1), M, DUM(1), -1, IERR ) + LWORK_SORMBR_QLN_MM = INT( DUM(1) ) +* + IF( N.GE.MNTHR ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* + WRKBL = M + LWORK_SGELQF_MN + WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM ) + MAXWRK = MAX( WRKBL, BDSPAC + M ) + MINWRK = BDSPAC + M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* + WRKBL = M + LWORK_SGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_MN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + 2*M*M + MINWRK = BDSPAC + 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* + WRKBL = M + LWORK_SGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_MN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*M + MINWRK = BDSPAC + M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* + WRKBL = M + LWORK_SGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_NN ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*M + MINWRK = M*M + MAX( 3*M + BDSPAC, M + N ) + END IF + ELSE +* +* Path 5t (N > M, but not much larger) +* + WRKBL = 3*M + LWORK_SGEBRD_MN + IF( WNTQN ) THEN +* Path 5tn (N > M, jobz='N') + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQO ) THEN +* Path 5to (N > M, jobz='O') + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MN ) + WRKBL = MAX( WRKBL, 3*M + BDSPAC ) + MAXWRK = WRKBL + M*N + MINWRK = 3*M + MAX( N, M*M + BDSPAC ) + ELSE IF( WNTQS ) THEN +* Path 5ts (N > M, jobz='S') + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MN ) + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + ELSE IF( WNTQA ) THEN +* Path 5ta (N > M, jobz='A') + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_NN ) + MAXWRK = MAX( WRKBL, 3*M + BDSPAC ) + MINWRK = 3*M + MAX( N, BDSPAC ) + END IF + END IF + END IF + + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N [tau] + N [work] +* Workspace: prefer N [tau] + N*NB [work] +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Zero out below R +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* Workspace: need 3*N [e, tauq, taup] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + N +* +* Perform bidiagonal SVD, computing singular values only +* Workspace: need N [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ = 'O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is LDWRKR by N +* + IF( LWORK .GE. LDA*N + N*N + 3*N + BDSPAC ) THEN + LDWRKR = LDA + ELSE + LDWRKR = ( LWORK - N*N - 3*N - BDSPAC ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1), + $ LDWRKR ) +* +* Generate Q in A +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* WORK(IU) is N by N +* + IU = NWORK + NWORK = IU + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R +* and VT by right singular vectors of R +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] +* Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U] +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M - I + 1, LDWRKR ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), N, ZERO, WORK( IR ), + $ LDWRKR ) + CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1), + $ LDWRKR ) +* +* Generate Q in A +* Workspace: need N*N [R] + N [tau] + N [work] +* Workspace: prefer N*N [R] + N [tau] + N*NB [work] +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagoal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of R and VT +* by right singular vectors of R +* Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* Workspace: need N*N [R] +* + CALL SLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ), + $ LDWRKR, ZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* Workspace: need N*N [U] + N [tau] + N [work] +* Workspace: prefer N*N [U] + N [tau] + N*NB [work] +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* Workspace: need N*N [U] + N [tau] + M [work] +* Workspace: prefer N*N [U] + N [tau] + M*NB [work] + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Produce R in A, zeroing out other entries +* + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work] +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N, + $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite WORK(IU) by left singular vectors of R and VT +* by right singular vectors of R +* Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work] +* Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* Workspace: need N*N [U] +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ), + $ LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 5 (M >= N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* Workspace: need 3*N [e, tauq, taup] + M [work] +* Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work] +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5n (M >= N, JOBZ='N') +* Perform bidiagonal SVD, only computing singular values +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 5o (M >= N, JOBZ='O') + IU = NWORK + IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + NWORK = IU + LDWRKU*N + CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IU ), + $ LDWRKU ) +* IR is unused; silence compile warnings + IR = -1 + ELSE +* +* WORK( IU ) is N by N +* + LDWRKU = N + NWORK = IU + LDWRKU*N +* +* WORK(IR) is LDWRKR by N +* + IR = NWORK + LDWRKR = ( LWORK - N*N - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in WORK(IU) and computing right +* singular vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC +* + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), + $ LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite VT by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN +* +* Path 5o-fast +* Overwrite WORK(IU) by left singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Copy left singular vectors of A from WORK(IU) to A +* + CALL SLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Path 5o-slow +* Generate Q in A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work] +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by left singular vectors of +* bidiagonal matrix in WORK(IU), storing result in +* WORK(IR) and copying to A +* Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R] +* Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R] +* + DO 20 I = 1, M, LDWRKR + CHUNK = MIN( M - I + 1, LDWRKR ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, ZERO, + $ WORK( IR ), LDWRKR ) + CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 20 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Path 5s (M >= N, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL SLASET( 'F', M, N, ZERO, ZERO, U, LDU ) + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + N [work] +* Workspace: prefer 3*N [e, tauq, taup] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Path 5a (M >= N, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*N [e, tauq, taup] + BDSPAC +* + CALL SLASET( 'F', M, M, ZERO, ZERO, U, LDU ) + CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of U to identity matrix +* + IF( M.GT.N ) THEN + CALL SLASET( 'F', M - N, M - N, ZERO, ONE, U(N+1,N+1), + $ LDU ) + END IF +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*N [e, tauq, taup] + M [work] +* Workspace: prefer 3*N [e, tauq, taup] + M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M [tau] + M [work] +* Workspace: prefer M [tau] + M*NB [work] +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Zero out above L +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* Workspace: need 3*M [e, tauq, taup] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NWORK = IE + M +* +* Perform bidiagonal SVD, computing singular values only +* Workspace: need M [e] + BDSPAC +* + CALL SBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* WORK(IL) is M by M; it is later resized to M by chunk for gemm +* + IL = IVT + M*M + IF( LWORK .GE. M*N + M*M + 3*M + BDSPAC ) THEN + LDWRKL = M + CHUNK = N + ELSE + LDWRKL = M + CHUNK = ( LWORK - M*M ) / M + END IF + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL SLASET( 'U', M - 1, M - 1, ZERO, ZERO, + $ WORK( IL + LDWRKL ), LDWRKL ) +* +* Generate Q in A +* Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U, and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), M, DUM, IDUM, WORK( NWORK ), + $ IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), M, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by Q +* in A, storing result in WORK(IL) and copying to A +* Workspace: need M*M [VT] + M*M [L] +* Workspace: prefer M*M [VT] + M*N [L] +* At this point, L is resized as M by chunk. +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N - I + 1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M, + $ A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL ) + CALL SLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* Workspace: need M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [L] + M [tau] + M*NB [work] +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL SLASET( 'U', M - 1, M - 1, ZERO, ZERO, + $ WORK( IL + LDWRKL ), LDWRKL ) +* +* Generate Q in A +* Workspace: need M*M [L] + M [tau] + M [work] +* Workspace: prefer M*M [L] + M [tau] + M*NB [work] +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU). +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of L and VT +* by right singular vectors of L +* Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by +* Q in A, storing result in VT +* Workspace: need M*M [L] +* + CALL SLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL, + $ A, LDA, ZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* Workspace: need M*M [VT] + M [tau] + M [work] +* Workspace: prefer M*M [VT] + M [tau] + M*NB [work] +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* Workspace: need M*M [VT] + M [tau] + N [work] +* Workspace: prefer M*M [VT] + M [tau] + N*NB [work] +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Produce L in A, zeroing out other entries +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work] +* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work] +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of L and WORK(IVT) +* by right singular vectors of L +* Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work] +* Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* Workspace: need M*M [VT] +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 5t (N > M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* Workspace: need 3*M [e, tauq, taup] + N [work] +* Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work] +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5tn (N > M, JOBZ='N') +* Perform bidiagonal SVD, only computing singular values +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL SBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1, + $ DUM, IDUM, WORK( NWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 5to (N > M, JOBZ='O') + LDWKVT = M + IVT = NWORK + IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN +* +* WORK( IVT ) is M by N +* + CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N +* IL is unused; silence compile warnings + IL = -1 + ELSE +* +* WORK( IVT ) is M by M +* + NWORK = IVT + LDWKVT*M + IL = NWORK +* +* WORK(IL) is M by CHUNK +* + CHUNK = ( LWORK - M*M - 3*M ) / M + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in WORK(IVT) +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC +* + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, + $ WORK( IVT ), LDWKVT, DUM, IDUM, + $ WORK( NWORK ), IWORK, INFO ) +* +* Overwrite U by left singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) +* + IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN +* +* Path 5to-fast +* Overwrite WORK(IVT) by left singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work] +* + CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Copy right singular vectors of A from WORK(IVT) to A +* + CALL SLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Path 5to-slow +* Generate P**T in A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work] +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK - NWORK + 1, IERR ) +* +* Multiply Q in A by right singular vectors of +* bidiagonal matrix in WORK(IVT), storing result in +* WORK(IL) and copying to A +* Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L] +* Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L] +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N - I + 1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), + $ LDWKVT, A( 1, I ), LDA, ZERO, + $ WORK( IL ), M ) + CALL SLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ), + $ LDA ) + 40 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Path 5ts (N > M, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL SLASET( 'F', M, N, ZERO, ZERO, VT, LDVT ) + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + M [work] +* Workspace: prefer 3*M [e, tauq, taup] + M*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + ELSE IF( WNTQA ) THEN +* +* Path 5ta (N > M, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in U and computing right singular +* vectors of bidiagonal matrix in VT +* Workspace: need 3*M [e, tauq, taup] + BDSPAC +* + CALL SLASET( 'F', N, N, ZERO, ZERO, VT, LDVT ) + CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT, + $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK, + $ INFO ) +* +* Set the right corner of VT to identity matrix +* + IF( N.GT.M ) THEN + CALL SLASET( 'F', N-M, N-M, ZERO, ONE, VT(M+1,M+1), + $ LDVT ) + END IF +* +* Overwrite U by left singular vectors of A and VT +* by right singular vectors of A +* Workspace: need 3*M [e, tauq, taup] + N [work] +* Workspace: prefer 3*M [e, tauq, taup] + N*NB [work] +* + CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK - NWORK + 1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGESDD +* + END diff --git a/dspl/liblapack/SRC/sgesv.f b/dspl/liblapack/SRC/sgesv.f new file mode 100644 index 0000000..6ef2cf9 --- /dev/null +++ b/dspl/liblapack/SRC/sgesv.f @@ -0,0 +1,179 @@ +*> \brief SGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as +*> A = P * L * U, +*> where P is a permutation matrix, L is unit lower triangular, and U is +*> upper triangular. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SGETRF, SGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL SGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of SGESV +* + END diff --git a/dspl/liblapack/SRC/sgesvd.f b/dspl/liblapack/SRC/sgesvd.f new file mode 100644 index 0000000..c1756fa --- /dev/null +++ b/dspl/liblapack/SRC/sgesvd.f @@ -0,0 +1,3505 @@ +*> \brief SGESVD computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESVD computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U are returned in array U: +*> = 'S': the first min(m,n) columns of U (the left singular +*> vectors) are returned in the array U; +*> = 'O': the first min(m,n) columns of U (the left singular +*> vectors) are overwritten on the array A; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'A': all N rows of V**T are returned in the array VT; +*> = 'S': the first min(m,n) rows of V**T (the right singular +*> vectors) are returned in the array VT; +*> = 'O': the first min(m,n) rows of V**T (the right singular +*> vectors) are overwritten on the array A; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> +*> JOBVT and JOBU cannot both be 'O'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBU = 'O', A is overwritten with the first min(m,n) +*> columns of U (the left singular vectors, +*> stored columnwise); +*> if JOBVT = 'O', A is overwritten with the first min(m,n) +*> rows of V**T (the right singular vectors, +*> stored rowwise); +*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +*> are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,UCOL) +*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U; +*> if JOBU = 'S', U contains the first min(m,n) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBU = 'N' or 'O', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'S' or 'A', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,N) +*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix +*> V**T; +*> if JOBVT = 'S', VT contains the first min(m,n) rows of +*> V**T (the right singular vectors, stored rowwise); +*> if JOBVT = 'N' or 'O', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged +*> superdiagonal elements of an upper bidiagonal matrix B +*> whose diagonal is in S (not necessarily sorted). B +*> satisfies A = U * B * VT, so it has the same singular values +*> as A, and singular vectors related by U and VT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code): +*> - PATH 1 (M much larger than N, JOBU='N') +*> - PATH 1t (N much larger than M, JOBVT='N') +*> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if SBDSQR did not converge, INFO specifies how many +*> superdiagonals of an intermediate bidiagonal form B +*> did not converge to zero. See the description of WORK +*> above for details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGEsing +* +* ===================================================================== + SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M, + $ LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q, + $ LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M + REAL ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSQR +* + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*N +* Compute space needed for SGEQRF + CALL SGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_SGEQRF = INT( DUM(1) ) +* Compute space needed for SORGQR + CALL SORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGQR_N = INT( DUM(1) ) + CALL SORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGQR_M = INT( DUM(1) ) +* Compute space needed for SGEBRD + CALL SGEBRD( N, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD = INT( DUM(1) ) +* Compute space needed for SORGBR P + CALL SORGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_P = INT( DUM(1) ) +* Compute space needed for SORGBR Q + CALL SORGBR( 'Q', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_Q = INT( DUM(1) ) +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + LWORK_SGEQRF + MAXWRK = MAX( MAXWRK, 3*N+LWORK_SGEBRD ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*N, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N ) + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_N ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_SGEQRF + WRKBL = MAX( WRKBL, N+LWORK_SORGQR_M ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, 3*N+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = N*N + WRKBL + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD = INT( DUM(1) ) + MAXWRK = 3*N + LWORK_SGEBRD + IF( WNTUS .OR. WNTUO ) THEN + CALL SORGBR( 'Q', M, N, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_Q = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q ) + END IF + IF( WNTUA ) THEN + CALL SORGBR( 'Q', M, M, N, A, LDA, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_Q = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_Q ) + END IF + IF( .NOT.WNTVN ) THEN + MAXWRK = MAX( MAXWRK, 3*N+LWORK_SORGBR_P ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*N+M, BDSPAC ) + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Compute space needed for SBDSQR +* + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + BDSPAC = 5*M +* Compute space needed for SGELQF + CALL SGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_SGELQF = INT( DUM(1) ) +* Compute space needed for SORGLQ + CALL SORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGLQ_N = INT( DUM(1) ) + CALL SORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR ) + LWORK_SORGLQ_M = INT( DUM(1) ) +* Compute space needed for SGEBRD + CALL SGEBRD( M, M, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD = INT( DUM(1) ) +* Compute space needed for SORGBR P + CALL SORGBR( 'P', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_P = INT( DUM(1) ) +* Compute space needed for SORGBR Q + CALL SORGBR( 'Q', M, M, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_Q = INT( DUM(1) ) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + LWORK_SGELQF + MAXWRK = MAX( MAXWRK, 3*M+LWORK_SGEBRD ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q ) + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 4*M, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M ) + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + MAXWRK = MAX( MAXWRK, MINWRK ) + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_M ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = 2*M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + LWORK_SGELQF + WRKBL = MAX( WRKBL, M+LWORK_SORGLQ_N ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SGEBRD ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_P ) + WRKBL = MAX( WRKBL, 3*M+LWORK_SORGBR_Q ) + WRKBL = MAX( WRKBL, BDSPAC ) + MAXWRK = M*M + WRKBL + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + CALL SGEBRD( M, N, A, LDA, S, DUM(1), DUM(1), + $ DUM(1), DUM(1), -1, IERR ) + LWORK_SGEBRD = INT( DUM(1) ) + MAXWRK = 3*M + LWORK_SGEBRD + IF( WNTVS .OR. WNTVO ) THEN +* Compute space needed for SORGBR P + CALL SORGBR( 'P', M, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_P = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P ) + END IF + IF( WNTVA ) THEN + CALL SORGBR( 'P', N, N, M, A, N, DUM(1), + $ DUM(1), -1, IERR ) + LWORK_SORGBR_P = INT( DUM(1) ) + MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_P ) + END IF + IF( .NOT.WNTUN ) THEN + MAXWRK = MAX( MAXWRK, 3*M+LWORK_SORGBR_Q ) + END IF + MAXWRK = MAX( MAXWRK, BDSPAC ) + MINWRK = MAX( 3*M+N, BDSPAC ) + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + IF( N .GT. 1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), + $ LDA ) + END IF + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N-N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT, + $ WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + N +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (Workspace: need N*N+2*N, prefer N*N+M*N+N) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT, + $ A, LDA, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IR ), LDWRKR, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (Workspace: need N*N) +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, + $ WORK( IU ), LDWRKU, ZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, + $ 1, WORK( IR ), LDWRKR, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IR ), LDWRKR, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, + $ 1, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*N*N+4*N, +* prefer 2*N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*N*N+4*N-1, +* prefer 2*N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (Workspace: need 2*N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A, + $ LDA, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need N*N+2*N, prefer N*N+N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N*N+N+M, prefer N*N+N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB) +* + CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB) +* + CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need N*N+4*N-1, +* prefer N*N+3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (Workspace: need N*N+BDSPAC) +* + CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (Workspace: need N*N) +* + CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, + $ WORK( IU ), LDWRKU, ZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL SLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (Workspace: need 2*N, prefer N+N*NB) +* + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (Workspace: need N+M, prefer N+M*NB) +* + CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, + $ VT( 2, 1 ), LDVT ) + IE = ITAU + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (Workspace: need 4*N, prefer 3*N+2*N*NB) +* + CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (Workspace: need 3*N+M, prefer 3*N+M*NB) +* + CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = IE + N + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB) +* + CALL SLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*N, prefer 3*N+N*NB) +* + CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB) +* + CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA ) + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A, + $ LDA, DUM, 1, WORK( IWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL SLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA, + $ DUM, 1, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M-M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) + IU = IE + M +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (Workspace: need M*M+2*M, prefer M*M+M*N+M)) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, ZERO, + $ WORK( IU ), LDWRKU ) + CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (Workspace: need M*M) +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, compute left +* singular vectors of A in A and compute right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, A, LDA, ZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (Workspace: need M*M+4*M-1, +* prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ), + $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ), + $ LDWRKR, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT, + $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (Workspace: need 2*M*M+4*M, +* prefer 2*M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need 2*M*M+4*M-1, +* prefer 2*M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (Workspace: need 2*M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, DUM, 1, WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), + $ LDA ) +* +* Bidiagonalize L in A +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need M*M+2*M, prefer M*M+M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M*M+M+N, prefer M*M+M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* + CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB) +* + CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (Workspace: need M*M+BDSPAC) +* + CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, DUM, 1, + $ WORK( IWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (Workspace: need M*M) +* + CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ), + $ LDWRKU, VT, LDVT, ZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (Workspace: need 2*M, prefer M+M*NB) +* + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (Workspace: need M+N, prefer M+N*NB) +* + CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ), + $ LDU ) + IE = ITAU + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (Workspace: need 4*M, prefer 3*M+2*M*NB) +* + CALL SGEBRD( M, M, U, LDU, S, WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (Workspace: need 3*M+N, prefer 3*M+N*NB) +* + CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), + $ INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = IE + M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB) +* + CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB) +* + CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB) +* + CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (Workspace: need 4*M, prefer 3*M+M*NB) +* + CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA, + $ U, LDU, DUM, 1, WORK( IWORK ), INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (Workspace: need BDSPAC) +* + CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT, + $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO ) + END IF +* + END IF +* + END IF +* +* If SBDSQR failed to converge, copy unconverged superdiagonals +* to WORK( 2:MINMN ) +* + IF( INFO.NE.0 ) THEN + IF( IE.GT.2 ) THEN + DO 50 I = 1, MINMN - 1 + WORK( I+1 ) = WORK( I+IE-1 ) + 50 CONTINUE + END IF + IF( IE.LT.2 ) THEN + DO 60 I = MINMN - 1, 1, -1 + WORK( I+1 ) = WORK( I+IE-1 ) + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ), + $ MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGESVD +* + END diff --git a/dspl/liblapack/SRC/sgesvdx.f b/dspl/liblapack/SRC/sgesvdx.f new file mode 100644 index 0000000..d74e98f --- /dev/null +++ b/dspl/liblapack/SRC/sgesvdx.f @@ -0,0 +1,834 @@ +*> \brief SGESVDX computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, +* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, +* $ LWORK, IWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT, RANGE +* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS +* REAL VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), S( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESVDX computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and +*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> SGESVDX uses an eigenvalue problem for obtaining the SVD, which +*> allows for the computation of a subset of singular values and +*> vectors. See SBDSVDX for details. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'V': the first min(m,n) columns of U (the left singular +*> vectors) or as specified by RANGE are returned in +*> the array U; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'V': the first min(m,n) rows of V**T (the right singular +*> vectors) or as specified by RANGE are returned in +*> the array VT; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found, +*> 0 <= NS <= min(M,N). +*> If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,UCOL) +*> If JOBU = 'V', U contains columns of U (the left singular +*> vectors, stored columnwise) as specified by RANGE; if +*> JOBU = 'N', U is not referenced. +*> Note: The user must ensure that UCOL >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'V', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,N) +*> If JOBVT = 'V', VT contains the rows of V**T (the right singular +*> vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', +*> VT is not referenced. +*> Note: The user must ensure that LDVT >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'V', LDVT >= NS (see above). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see +*> comments inside the code): +*> - PATH 1 (M much larger than N) +*> - PATH 1t (N much larger than M) +*> LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*MIN(M,N)) +*> If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, +*> then IWORK contains the indices of the eigenvectors that failed +*> to converge in SBDSVDX/SSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in SBDSVDX/SSTEVX. +*> if INFO = N*2 + 1, an internal error occurred in +*> SBDSVDX +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEsing +* +* ===================================================================== + SUBROUTINE SGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, + $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, + $ LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT, RANGE + INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS + REAL VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), S( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + CHARACTER JOBZ, RNGTGK + LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT + INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL, + $ ITAU, ITAUP, ITAUQ, ITEMP, ITGKZ, IUTGK, + $ J, MAXWRK, MINMN, MINWRK, MNTHR + REAL ABSTOL, ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + REAL DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SBDSVDX, SGEBRD, SGELQF, SGEQRF, SLACPY, + $ SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, + $ SCOPY, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + NS = 0 + INFO = 0 + ABSTOL = 2*SLAMCH('S') + LQUERY = ( LWORK.EQ.-1 ) + MINMN = MIN( M, N ) + + WANTU = LSAME( JOBU, 'V' ) + WANTVT = LSAME( JOBVT, 'V' ) + IF( WANTU .OR. WANTVT ) THEN + JOBZ = 'V' + ELSE + JOBZ = 'N' + END IF + ALLS = LSAME( RANGE, 'A' ) + VALS = LSAME( RANGE, 'V' ) + INDS = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.LSAME( JOBU, 'V' ) .AND. + $ .NOT.LSAME( JOBU, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( JOBVT, 'V' ) .AND. + $ .NOT.LSAME( JOBVT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLS .OR. VALS .OR. INDS ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( M.GT.LDA ) THEN + INFO = -7 + ELSE IF( MINMN.GT.0 ) THEN + IF( VALS ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -8 + ELSE IF( VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDS ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, MINMN ) ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( MINMN, IL ) .OR. IU.GT.MINMN ) THEN + INFO = -11 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( WANTU .AND. LDU.LT.M ) THEN + INFO = -15 + ELSE IF( WANTVT ) THEN + IF( INDS ) THEN + IF( LDVT.LT.IU-IL+1 ) THEN + INFO = -17 + END IF + ELSE IF( LDVT.LT.MINMN ) THEN + INFO = -17 + END IF + END IF + END IF + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + IF( M.GE.N ) THEN + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N) +* + MAXWRK = N + + $ N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, N*(N+5) + 2*N* + $ ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,N*(N*3+6)+N* + $ ILAENV( 1, 'SORMQR', ' ', N, N, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,N*(N*3+6)+N* + $ ILAENV( 1, 'SORMLQ', ' ', N, N, -1, -1 ) ) + END IF + MINWRK = N*(N*3+20) + ELSE +* +* Path 2 (M at least N, but not much larger) +* + MAXWRK = 4*N + ( M+N )* + $ ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,N*(N*2+5)+N* + $ ILAENV( 1, 'SORMQR', ' ', N, N, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,N*(N*2+5)+N* + $ ILAENV( 1, 'SORMLQ', ' ', N, N, -1, -1 ) ) + END IF + MINWRK = MAX(N*(N*2+19),4*N+M) + END IF + ELSE + MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M) +* + MAXWRK = M + + $ M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 ) + MAXWRK = MAX( MAXWRK, M*(M+5) + 2*M* + $ ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,M*(M*3+6)+M* + $ ILAENV( 1, 'SORMQR', ' ', M, M, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,M*(M*3+6)+M* + $ ILAENV( 1, 'SORMLQ', ' ', M, M, -1, -1 ) ) + END IF + MINWRK = M*(M*3+20) + ELSE +* +* Path 2t (N at least M, but not much larger) +* + MAXWRK = 4*M + ( M+N )* + $ ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) + IF (WANTU) THEN + MAXWRK = MAX(MAXWRK,M*(M*2+5)+M* + $ ILAENV( 1, 'SORMQR', ' ', M, M, -1, -1 ) ) + END IF + IF (WANTVT) THEN + MAXWRK = MAX(MAXWRK,M*(M*2+5)+M* + $ ILAENV( 1, 'SORMLQ', ' ', M, M, -1, -1 ) ) + END IF + MINWRK = MAX(M*(M*2+19),4*M+N) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = REAL( MAXWRK ) +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVDX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Set singular values indices accord to RANGE. +* + IF( ALLS ) THEN + RNGTGK = 'I' + ILTGK = 1 + IUTGK = MIN( M, N ) + ELSE IF( INDS ) THEN + RNGTGK = 'I' + ILTGK = IL + IUTGK = IU + ELSE + RNGTGK = 'V' + ILTGK = 0 + IUTGK = 0 + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce A using the QR +* decomposition. +* + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N): +* A = Q * R = Q * ( QB * B * PB**T ) +* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) +* U = Q * QB * UB; V**T = VB**T * PB**T +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + ITEMP = ITAU + N + CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Copy R into WORK and bidiagonalize it: +* (Workspace: need N*N+5*N, prefer N*N+4*N+2*N*NB) +* + IQRF = ITEMP + ID = IQRF + N*N + IE = ID + N + ITAUQ = IE + N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + CALL SLACPY( 'U', N, N, A, LDA, WORK( IQRF ), N ) + CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IQRF+1 ), N ) + CALL SGEBRD( N, N, WORK( IQRF ), N, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 14*N + 2*N*(N+1)) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + N*(N*2+1) + CALL SBDSVDX( 'U', JOBZ, RNGTGK, N, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ N*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL SCOPY( N, WORK( J ), 1, U( 1,I ), 1 ) + J = J + N*2 + END DO + CALL SLASET( 'A', M-N, NS, ZERO, ZERO, U( N+1,1 ), LDU ) +* +* Call SORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', N, NS, N, WORK( IQRF ), N, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call SORMQR to compute Q*(QB*UB). +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL SORMQR( 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAU ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + N + DO I = 1, NS + CALL SCOPY( N, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + N*2 + END DO +* +* Call SORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL SORMBR( 'P', 'R', 'T', NS, N, N, WORK( IQRF ), N, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* Reduce A to bidiagonal form without QR decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 4*N+M, prefer 4*N+(M+N)*NB) +* + ID = 1 + IE = ID + N + ITAUQ = IE + N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + CALL SGEBRD( M, N, A, LDA, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 14*N + 2*N*(N+1)) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + N*(N*2+1) + CALL SBDSVDX( 'U', JOBZ, RNGTGK, N, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ N*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL SCOPY( N, WORK( J ), 1, U( 1,I ), 1 ) + J = J + N*2 + END DO + CALL SLASET( 'A', M-N, NS, ZERO, ZERO, U( N+1,1 ), LDU ) +* +* Call SORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + N + DO I = 1, NS + CALL SCOPY( N, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + N*2 + END DO +* +* Call SORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL SORMBR( 'P', 'R', 'T', NS, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF + END IF + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce A using the LQ decomposition. +* + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M): +* A = L * Q = ( QB * B * PB**T ) * Q +* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q +* U = QB * UB ; V**T = VB**T * PB**T * Q +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + ITAU = 1 + ITEMP = ITAU + M + CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + +* Copy L into WORK and bidiagonalize it: +* (Workspace in WORK( ITEMP ): need M*M+5*N, prefer M*M+4*M+2*M*NB) +* + ILQF = ITEMP + ID = ILQF + M*M + IE = ID + M + ITAUQ = IE + M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + CALL SLACPY( 'L', M, M, A, LDA, WORK( ILQF ), M ) + CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( ILQF+M ), M ) + CALL SGEBRD( M, M, WORK( ILQF ), M, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + M*(M*2+1) + CALL SBDSVDX( 'U', JOBZ, RNGTGK, M, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ M*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL SCOPY( M, WORK( J ), 1, U( 1,I ), 1 ) + J = J + M*2 + END DO +* +* Call SORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, NS, M, WORK( ILQF ), M, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + M + DO I = 1, NS + CALL SCOPY( M, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + M*2 + END DO + CALL SLASET( 'A', NS, N-M, ZERO, ZERO, VT( 1,M+1 ), LDVT) +* +* Call SORMBR to compute (VB**T)*(PB**T) +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL SORMBR( 'P', 'R', 'T', NS, M, M, WORK( ILQF ), M, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call SORMLQ to compute ((VB**T)*(PB**T))*Q. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL SORMLQ( 'R', 'N', NS, N, M, A, LDA, + $ WORK( ITAU ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 4*M+N, prefer 4*M+(M+N)*NB) +* + ID = 1 + IE = ID + M + ITAUQ = IE + M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + CALL SGEBRD( M, N, A, LDA, WORK( ID ), WORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + ITGKZ = ITEMP + ITEMP = ITGKZ + M*(M*2+1) + CALL SBDSVDX( 'L', JOBZ, RNGTGK, M, WORK( ID ), WORK( IE ), + $ VL, VU, ILTGK, IUTGK, NS, S, WORK( ITGKZ ), + $ M*2, WORK( ITEMP ), IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + J = ITGKZ + DO I = 1, NS + CALL SCOPY( M, WORK( J ), 1, U( 1,I ), 1 ) + J = J + M*2 + END DO +* +* Call SORMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL SORMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + J = ITGKZ + M + DO I = 1, NS + CALL SCOPY( M, WORK( J ), 1, VT( I,1 ), LDVT ) + J = J + M*2 + END DO + CALL SLASET( 'A', NS, N-M, ZERO, ZERO, VT( 1,M+1 ), LDVT) +* +* Call SORMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL SORMBR( 'P', 'R', 'T', NS, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + IF( ANRM.LT.SMLNUM ) + $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = REAL( MAXWRK ) +* + RETURN +* +* End of SGESVDX +* + END diff --git a/dspl/liblapack/SRC/sgesvj.f b/dspl/liblapack/SRC/sgesvj.f new file mode 100644 index 0000000..7a79011 --- /dev/null +++ b/dspl/liblapack/SRC/sgesvj.f @@ -0,0 +1,1601 @@ +*> \brief \b SGESVJ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESVJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, +* LDV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N +* CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), SVA( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESVJ computes the singular value decomposition (SVD) of a real +*> M-by-N matrix A, where M >= N. The SVD of A is written as +*> [++] [xx] [x0] [xx] +*> A = U * SIGMA * V^t, [++] = [xx] * [ox] * [xx] +*> [++] [xx] +*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal +*> matrix, and V is an N-by-N orthogonal matrix. The diagonal elements +*> of SIGMA are the singular values of A. The columns of U and V are the +*> left and the right singular vectors of A, respectively. +*> SGESVJ can sometimes compute tiny singular values and their singular vectors much +*> more accurately than other SVD routines, see below under Further Details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the structure of A. +*> = 'L': The input matrix A is lower triangular; +*> = 'U': The input matrix A is upper triangular; +*> = 'G': The input matrix A is general M-by-N matrix, M >= N. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the left singular vectors +*> (columns of U): +*> = 'U': The left singular vectors corresponding to the nonzero +*> singular values are computed and returned in the leading +*> columns of A. See more details in the description of A. +*> The default numerical orthogonality threshold is set to +*> approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=SLAMCH('E'). +*> = 'C': Analogous to JOBU='U', except that user can control the +*> level of numerical orthogonality of the computed left +*> singular vectors. TOL can be set to TOL = CTOL*EPS, where +*> CTOL is given on input in the array WORK. +*> No CTOL smaller than ONE is allowed. CTOL greater +*> than 1 / EPS is meaningless. The option 'C' +*> can be used if M*EPS is satisfactory orthogonality +*> of the computed left singular vectors, so CTOL=M could +*> save few sweeps of Jacobi rotations. +*> See the descriptions of A and WORK(1). +*> = 'N': The matrix U is not computed. However, see the +*> description of A. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the right singular vectors, that +*> is, the matrix V: +*> = 'V' : the matrix V is computed and returned in the array V +*> = 'A' : the Jacobi rotations are applied to the MV-by-N +*> array V. In other words, the right singular vector +*> matrix V is not computed explicitly; instead it is +*> applied to an MV-by-N matrix initially stored in the +*> first MV rows of V. +*> = 'N' : the matrix V is not computed and the array V is not +*> referenced +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. 1/SLAMCH('E') > M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': +*> If INFO .EQ. 0 : +*> RANKA orthonormal columns of U are returned in the +*> leading RANKA columns of the array A. Here RANKA <= N +*> is the number of computed singular values of A that are +*> above the underflow threshold SLAMCH('S'). The singular +*> vectors corresponding to underflowed or zero singular +*> values are not computed. The value of RANKA is returned +*> in the array WORK as RANKA=NINT(WORK(2)). Also see the +*> descriptions of SVA and WORK. The computed columns of U +*> are mutually numerically orthogonal up to approximately +*> TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), +*> see the description of JOBU. +*> If INFO .GT. 0, +*> the procedure SGESVJ did not converge in the given number +*> of iterations (sweeps). In that case, the computed +*> columns of U may not be orthogonal up to TOL. The output +*> U (stored in A), SIGMA (given by the computed singular +*> values in SVA(1:N)) and V is still a decomposition of the +*> input matrix A in the sense that the residual +*> ||A-SCALE*U*SIGMA*V^T||_2 / ||A||_2 is small. +*> If JOBU .EQ. 'N': +*> If INFO .EQ. 0 : +*> Note that the left singular vectors are 'for free' in the +*> one-sided Jacobi SVD algorithm. However, if only the +*> singular values are needed, the level of numerical +*> orthogonality of U is not an issue and iterations are +*> stopped when the columns of the iterated matrix are +*> numerically orthogonal up to approximately M*EPS. Thus, +*> on exit, A contains the columns of U scaled with the +*> corresponding singular values. +*> If INFO .GT. 0 : +*> the procedure SGESVJ did not converge in the given number +*> of iterations (sweeps). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On exit, +*> If INFO .EQ. 0 : +*> depending on the value SCALE = WORK(1), we have: +*> If SCALE .EQ. ONE: +*> SVA(1:N) contains the computed singular values of A. +*> During the computation SVA contains the Euclidean column +*> norms of the iterated matrices in the array A. +*> If SCALE .NE. ONE: +*> The singular values of A are SCALE*SVA(1:N), and this +*> factored representation is due to the fact that some of the +*> singular values of A might underflow or overflow. +*> +*> If INFO .GT. 0 : +*> the procedure SGESVJ did not converge in the given number of +*> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then the product of Jacobi rotations in SGESVJ +*> is applied to the first MV rows of V. See the description of JOBV. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,N) +*> If JOBV = 'V', then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'A', then V contains the product of the computed right +*> singular vector matrix and the initial matrix in +*> the array V. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV .GE. 1. +*> If JOBV .EQ. 'V', then LDV .GE. max(1,N). +*> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . +*> \endverbatim +*> +*> \param[in,out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On entry, +*> If JOBU .EQ. 'C' : +*> WORK(1) = CTOL, where CTOL defines the threshold for convergence. +*> The process stops if all columns of A are mutually +*> orthogonal up to CTOL*EPS, EPS=SLAMCH('E'). +*> It is required that CTOL >= ONE, i.e. it is not +*> allowed to force the routine to obtain orthogonality +*> below EPSILON. +*> On exit, +*> WORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) +*> are the computed singular vcalues of A. +*> (See description of SVA().) +*> WORK(2) = NINT(WORK(2)) is the number of the computed nonzero +*> singular values. +*> WORK(3) = NINT(WORK(3)) is the number of the computed singular +*> values that are larger than the underflow threshold. +*> WORK(4) = NINT(WORK(4)) is the number of sweeps of Jacobi +*> rotations needed for numerical convergence. +*> WORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. +*> This is useful information in cases when SGESVJ did +*> not converge, as it can be used to estimate whether +*> the output is stil useful and for post festum analysis. +*> WORK(6) = the largest absolute value over all sines of the +*> Jacobi rotation angles in the last sweep. It can be +*> useful for a post festum analysis. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> length of WORK, WORK >= MAX(6,M+N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> > 0 : SGESVJ did not converge in the maximal allowed number (30) +*> of sweeps. The output may still be useful. See the +*> description of WORK. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane +*> rotations. The rotations are implemented as fast scaled rotations of +*> Anda and Park [1]. In the case of underflow of the Jacobi angle, a +*> modified Jacobi transformation of Drmac [4] is used. Pivot strategy uses +*> column interchanges of de Rijk [2]. The relative accuracy of the computed +*> singular values and the accuracy of the computed singular vectors (in +*> angle metric) is as guaranteed by the theory of Demmel and Veselic [3]. +*> The condition number that determines the accuracy in the full rank case +*> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the +*> spectral condition number. The best performance of this Jacobi SVD +*> procedure is achieved if used in an accelerated version of Drmac and +*> Veselic [5,6], and it is the kernel routine in the SIGMA library [7]. +*> Some tunning parameters (marked with [TP]) are available for the +*> implementer. \n +*> The computational range for the nonzero singular values is the machine +*> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even +*> denormalized singular values can be computed with the corresponding +*> gradual loss of accurate digits. +*> +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +*> +*> \par References: +* ================ +*> +*> [1] A. A. Anda and H. Park: Fast plane rotations with dynamic scaling. \n +*> SIAM J. matrix Anal. Appl., Vol. 15 (1994), pp. 162-174. \n\n +*> [2] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the +*> singular value decomposition on a vector computer. \n +*> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. \n\n +*> [3] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. \n +*> [4] Z. Drmac: Implementation of Jacobi rotations for accurate singular +*> value computation in floating point arithmetic. \n +*> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. \n\n +*> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. \n +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. \n +*> LAPACK Working note 169. \n\n +*> [6] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. \n +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. \n +*> LAPACK Working note 170. \n\n +*> [7] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations.\n +*> Department of Mathematics, University of Zagreb, 2008. +*> +*> \par Bugs, Examples and Comments: +* ================================= +*> +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +* +* ===================================================================== + SUBROUTINE SGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, + $ LDV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N + CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SVA( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) + INTEGER NSWEEP + PARAMETER ( NSWEEP = 30 ) +* .. +* .. Local Scalars .. + REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, CTOL, EPSLN, LARGE, MXAAPQ, + $ MXSINJ, ROOTBIG, ROOTEPS, ROOTSFMIN, ROOTTOL, + $ SKL, SFMIN, SMALL, SN, T, TEMP1, THETA, + $ THSIGN, TOL + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, N2, N34, + $ N4, NBL, NOTROT, p, PSKIPPED, q, ROWSKIP, + $ SWBAND + LOGICAL APPLV, GOSCALE, LOWER, LSVEC, NOSCALE, ROTOK, + $ RSVEC, UCTOL, UPPER +* .. +* .. Local Arrays .. + REAL FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, FLOAT, SIGN, SQRT +* .. +* .. External Functions .. +* .. +* from BLAS + REAL SDOT, SNRM2 + EXTERNAL SDOT, SNRM2 + INTEGER ISAMAX + EXTERNAL ISAMAX +* from LAPACK + REAL SLAMCH + EXTERNAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL SAXPY, SCOPY, SROTM, SSCAL, SSWAP +* from LAPACK + EXTERNAL SLASCL, SLASET, SLASSQ, XERBLA +* + EXTERNAL SGSVJ0, SGSVJ1 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) + UCTOL = LSAME( JOBU, 'C' ) + RSVEC = LSAME( JOBV, 'V' ) + APPLV = LSAME( JOBV, 'A' ) + UPPER = LSAME( JOBA, 'U' ) + LOWER = LSAME( JOBA, 'L' ) +* + IF( .NOT.( UPPER .OR. LOWER .OR. LSAME( JOBA, 'G' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSVEC .OR. UCTOL .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -5 + ELSE IF( LDA.LT.M ) THEN + INFO = -7 + ELSE IF( MV.LT.0 ) THEN + INFO = -9 + ELSE IF( ( RSVEC .AND. ( LDV.LT.N ) ) .OR. + $ ( APPLV .AND. ( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( UCTOL .AND. ( WORK( 1 ).LE.ONE ) ) THEN + INFO = -12 + ELSE IF( LWORK.LT.MAX( M+N, 6 ) ) THEN + INFO = -13 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVJ', -INFO ) + RETURN + END IF +* +* #:) Quick return for void matrix +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )RETURN +* +* Set numerical parameters +* The stopping criterion for Jacobi rotations is +* +* max_{i<>j}|A(:,i)^T * A(:,j)|/(||A(:,i)||*||A(:,j)||) < CTOL*EPS +* +* where EPS is the round-off and CTOL is defined as follows: +* + IF( UCTOL ) THEN +* ... user controlled + CTOL = WORK( 1 ) + ELSE +* ... default + IF( LSVEC .OR. RSVEC .OR. APPLV ) THEN + CTOL = SQRT( FLOAT( M ) ) + ELSE + CTOL = FLOAT( M ) + END IF + END IF +* ... and the machine dependent parameters are +*[!] (Make sure that SLAMCH() works properly on the target machine.) +* + EPSLN = SLAMCH( 'Epsilon' ) + ROOTEPS = SQRT( EPSLN ) + SFMIN = SLAMCH( 'SafeMinimum' ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPSLN + BIG = SLAMCH( 'Overflow' ) +* BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + LARGE = BIG / SQRT( FLOAT( M*N ) ) + BIGTHETA = ONE / ROOTEPS +* + TOL = CTOL*EPSLN + ROOTTOL = SQRT( TOL ) +* + IF( FLOAT( M )*EPSLN.GE.ONE ) THEN + INFO = -4 + CALL XERBLA( 'SGESVJ', -INFO ) + RETURN + END IF +* +* Initialize the right singular vector matrix. +* + IF( RSVEC ) THEN + MVL = N + CALL SLASET( 'A', MVL, N, ZERO, ONE, V, LDV ) + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV +* +* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) +*(!) If necessary, scale A to protect the largest singular value +* from overflow. It is possible that saving the largest singular +* value destroys the information about the small ones. +* This initial scaling is almost minimal in the sense that the +* goal is to make sure that no column norm overflows, and that +* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries +* in A are detected, the procedure returns with INFO=-6. +* + SKL = ONE / SQRT( FLOAT( M )*FLOAT( N ) ) + NOSCALE = .TRUE. + GOSCALE = .TRUE. +* + IF( LOWER ) THEN +* the input matrix is M-by-N lower triangular (trapezoidal) + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL SLASSQ( M-p+1, A( p, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'SGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 1873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 1873 CONTINUE + END IF + END IF + 1874 CONTINUE + ELSE IF( UPPER ) THEN +* the input matrix is M-by-N upper triangular (trapezoidal) + DO 2874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL SLASSQ( p, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'SGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 2873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 2873 CONTINUE + END IF + END IF + 2874 CONTINUE + ELSE +* the input matrix is M-by-N general dense + DO 3874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'SGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 3873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 3873 CONTINUE + END IF + END IF + 3874 CONTINUE + END IF +* + IF( NOSCALE )SKL = ONE +* +* Move the smaller part of the spectrum from the underflow threshold +*(!) Start by determining the position of the nonzero entries of the +* array SVA() relative to ( SFMIN, BIG ). +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + IF( SVA( p ).NE.ZERO )AAQQ = MIN( AAQQ, SVA( p ) ) + AAPP = MAX( AAPP, SVA( p ) ) + 4781 CONTINUE +* +* #:) Quick return for zero matrix +* + IF( AAPP.EQ.ZERO ) THEN + IF( LSVEC )CALL SLASET( 'G', M, N, ZERO, ONE, A, LDA ) + WORK( 1 ) = ONE + WORK( 2 ) = ZERO + WORK( 3 ) = ZERO + WORK( 4 ) = ZERO + WORK( 5 ) = ZERO + WORK( 6 ) = ZERO + RETURN + END IF +* +* #:) Quick return for one-column matrix +* + IF( N.EQ.1 ) THEN + IF( LSVEC )CALL SLASCL( 'G', 0, 0, SVA( 1 ), SKL, M, 1, + $ A( 1, 1 ), LDA, IERR ) + WORK( 1 ) = ONE / SKL + IF( SVA( 1 ).GE.SFMIN ) THEN + WORK( 2 ) = ONE + ELSE + WORK( 2 ) = ZERO + END IF + WORK( 3 ) = ZERO + WORK( 4 ) = ZERO + WORK( 5 ) = ZERO + WORK( 6 ) = ZERO + RETURN + END IF +* +* Protect small singular values from underflow, and try to +* avoid underflows/overflows in computing Jacobi rotations. +* + SN = SQRT( SFMIN / EPSLN ) + TEMP1 = SQRT( BIG / FLOAT( N ) ) + IF( ( AAPP.LE.SN ) .OR. ( AAQQ.GE.TEMP1 ) .OR. + $ ( ( SN.LE.AAQQ ) .AND. ( AAPP.LE.TEMP1 ) ) ) THEN + TEMP1 = MIN( BIG, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.LE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( AAPP*SQRT( FLOAT( N ) ) ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.GE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MAX( SN / AAQQ, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( SQRT( FLOAT( N ) )*AAPP ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE + TEMP1 = ONE + END IF +* +* Scale, if necessary +* + IF( TEMP1.NE.ONE ) THEN + CALL SLASCL( 'G', 0, 0, ONE, TEMP1, N, 1, SVA, N, IERR ) + END IF + SKL = TEMP1*SKL + IF( SKL.NE.ONE ) THEN + CALL SLASCL( JOBA, 0, 0, ONE, SKL, M, N, A, LDA, IERR ) + SKL = ONE / SKL + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* A is represented in factored form A = A * diag(WORK), where diag(WORK) +* is initialized to identity. WORK is updated during fast scaled +* rotations. +* + DO 1868 q = 1, N + WORK( q ) = ONE + 1868 CONTINUE +* +* + SWBAND = 3 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if SGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* + IF( ( LOWER .OR. UPPER ) .AND. ( N.GT.MAX( 64, 4*KBL ) ) ) THEN +*[TP] The number of partition levels and the actual partition are +* tuning parameters. + N4 = N / 4 + N2 = N / 2 + N34 = 3*N4 + IF( APPLV ) THEN + q = 0 + ELSE + q = 1 + END IF +* + IF( LOWER ) THEN +* +* This works very well on lower triangular matrices, in particular +* in the framework of the preconditioned Jacobi SVD (xGEJSV). +* The idea is simple: +* [+ 0 0 0] Note that Jacobi transformations of [0 0] +* [+ + 0 0] [0 0] +* [+ + x 0] actually work on [x 0] [x 0] +* [+ + x x] [x x]. [x x] +* + CALL SGSVJ0( JOBV, M-N34, N-N34, A( N34+1, N34+1 ), LDA, + $ WORK( N34+1 ), SVA( N34+1 ), MVL, + $ V( N34*q+1, N34+1 ), LDV, EPSLN, SFMIN, TOL, + $ 2, WORK( N+1 ), LWORK-N, IERR ) +* + CALL SGSVJ0( JOBV, M-N2, N34-N2, A( N2+1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 2, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL SGSVJ1( JOBV, M-N2, N-N2, N4, A( N2+1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL SGSVJ0( JOBV, M-N4, N2-N4, A( N4+1, N4+1 ), LDA, + $ WORK( N4+1 ), SVA( N4+1 ), MVL, + $ V( N4*q+1, N4+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) +* + CALL SGSVJ0( JOBV, M, N4, A, LDA, WORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 1, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL SGSVJ1( JOBV, M, N2, N4, A, LDA, WORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, WORK( N+1 ), + $ LWORK-N, IERR ) +* +* + ELSE IF( UPPER ) THEN +* +* + CALL SGSVJ0( JOBV, N4, N4, A, LDA, WORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 2, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL SGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, WORK( N4+1 ), + $ SVA( N4+1 ), MVL, V( N4*q+1, N4+1 ), LDV, + $ EPSLN, SFMIN, TOL, 1, WORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL SGSVJ1( JOBV, N2, N2, N4, A, LDA, WORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, WORK( N+1 ), + $ LWORK-N, IERR ) +* + CALL SGSVJ0( JOBV, N2+N4, N4, A( 1, N2+1 ), LDA, + $ WORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ WORK( N+1 ), LWORK-N, IERR ) + + END IF +* + END IF +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = WORK( p ) + WORK( p ) = WORK( q ) + WORK( q ) = TEMP1 + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Unfortunately, some BLAS implementations compute SNRM2(M,A(1,p),1) +* as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, SNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented SNRM2 is available, the IF-THEN-ELSE +* below should read "AAPP = SNRM2( M, A(1,p), 1 ) * WORK(p)". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = SNRM2( M, A( 1, p ), 1 )*WORK( p ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP )*WORK( p ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL SCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, + $ WORK( p ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = SDOT( M, WORK( N+1 ), 1, + $ A( 1, q ), 1 )*WORK( q ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL SCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, + $ WORK( q ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = SDOT( M, WORK( N+1 ), 1, + $ A( 1, p ), 1 )*WORK( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ ).GT.TOL ) THEN +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + FASTR( 3 ) = T*WORK( p ) / WORK( q ) + FASTR( 4 ) = -T*WORK( q ) / + $ WORK( p ) + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = WORK( p ) / WORK( q ) + AQOAP = WORK( q ) / WORK( p ) + IF( WORK( p ).GE.ONE ) THEN + IF( WORK( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q )*CS + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + END IF + ELSE + IF( WORK( q ).GE.ONE ) THEN + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + ELSE + IF( WORK( p ).GE.WORK( q ) ) + $ THEN + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL SCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK( N+1 ), LDA, + $ IERR ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + TEMP1 = -AAPQ*WORK( p ) / WORK( q ) + CALL SAXPY( M, TEMP1, WORK( N+1 ), 1, + $ A( 1, q ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SNRM2( M, A( 1, q ), 1 )* + $ WORK( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ )*WORK( q ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SNRM2( M, A( 1, p ), 1 )* + $ WORK( p ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP )*WORK( p ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL SCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, + $ WORK( p ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = SDOT( M, WORK( N+1 ), 1, + $ A( 1, q ), 1 )*WORK( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*WORK( p )*WORK( q ) / + $ AAQQ ) / AAPP + ELSE + CALL SCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, + $ WORK( q ), M, 1, + $ WORK( N+1 ), LDA, IERR ) + AAPQ = SDOT( M, WORK( N+1 ), 1, + $ A( 1, p ), 1 )*WORK( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*WORK( p ) / WORK( q ) + FASTR( 4 ) = -T*WORK( q ) / + $ WORK( p ) + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = WORK( p ) / WORK( q ) + AQOAP = WORK( q ) / WORK( p ) + IF( WORK( p ).GE.ONE ) THEN +* + IF( WORK( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q )*CS + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + END IF + ELSE + IF( WORK( q ).GE.ONE ) THEN + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + ELSE + IF( WORK( p ).GE.WORK( q ) ) + $ THEN + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + WORK( p ) = WORK( p )*CS + WORK( q ) = WORK( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + WORK( p ) = WORK( p ) / CS + WORK( q ) = WORK( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL SCOPY( M, A( 1, p ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK( N+1 ), LDA, + $ IERR ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*WORK( p ) / WORK( q ) + CALL SAXPY( M, TEMP1, WORK( N+1 ), + $ 1, A( 1, q ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL SCOPY( M, A( 1, q ), 1, + $ WORK( N+1 ), 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK( N+1 ), LDA, + $ IERR ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*WORK( q ) / WORK( p ) + CALL SAXPY( M, TEMP1, WORK( N+1 ), + $ 1, A( 1, p ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SNRM2( M, A( 1, q ), 1 )* + $ WORK( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ )*WORK( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SNRM2( M, A( 1, p ), 1 )* + $ WORK( p ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP )*WORK( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SNRM2( M, A( 1, N ), 1 )*WORK( N ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP )*WORK( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( FLOAT( N ) )* + $ TOL ) .AND. ( FLOAT( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the singular values and find how many are above +* the underflow threshold. +* + N2 = 0 + N4 = 0 + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = WORK( p ) + WORK( p ) = WORK( q ) + WORK( q ) = TEMP1 + CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + IF( SVA( p ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( p )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF + 5991 CONTINUE + IF( SVA( N ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( N )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF +* +* Normalize the left singular vectors. +* + IF( LSVEC .OR. UCTOL ) THEN + DO 1998 p = 1, N2 + CALL SSCAL( M, WORK( p ) / SVA( p ), A( 1, p ), 1 ) + 1998 CONTINUE + END IF +* +* Scale the product of Jacobi rotations (assemble the fast rotations). +* + IF( RSVEC ) THEN + IF( APPLV ) THEN + DO 2398 p = 1, N + CALL SSCAL( MVL, WORK( p ), V( 1, p ), 1 ) + 2398 CONTINUE + ELSE + DO 2399 p = 1, N + TEMP1 = ONE / SNRM2( MVL, V( 1, p ), 1 ) + CALL SSCAL( MVL, TEMP1, V( 1, p ), 1 ) + 2399 CONTINUE + END IF + END IF +* +* Undo scaling, if necessary (and possible). + IF( ( ( SKL.GT.ONE ) .AND. ( SVA( 1 ).LT.( BIG / SKL ) ) ) + $ .OR. ( ( SKL.LT.ONE ) .AND. ( SVA( MAX( N2, 1 ) ) .GT. + $ ( SFMIN / SKL ) ) ) ) THEN + DO 2400 p = 1, N + SVA( P ) = SKL*SVA( P ) + 2400 CONTINUE + SKL = ONE + END IF +* + WORK( 1 ) = SKL +* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE +* then some of the singular values may overflow or underflow and +* the spectrum is given in this factored representation. +* + WORK( 2 ) = FLOAT( N4 ) +* N4 is the number of computed nonzero singular values of A. +* + WORK( 3 ) = FLOAT( N2 ) +* N2 is the number of singular values of A greater than SFMIN. +* If N2 \brief SGESVX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), C( * ), FERR( * ), R( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESVX uses the LU factorization to compute the solution to a real +*> system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = P * L * U, +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> On exit, WORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If WORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 WORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization has +*> been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), C( * ), FERR( * ), R( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE, SLANTR + EXTERNAL LSAME, SLAMCH, SLANGE, SLANTR +* .. +* .. External Subroutines .. + EXTERNAL SGECON, SGEEQU, SGERFS, SGETRF, SGETRS, SLACPY, + $ SLAQGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL SLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL SGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = SLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW + END IF + WORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGE( NORM, N, N, A, LDA, WORK ) + RPVGRW = SLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = SLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL SGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = RPVGRW + RETURN +* +* End of SGESVX +* + END diff --git a/dspl/liblapack/SRC/sgesvxx.f b/dspl/liblapack/SRC/sgesvxx.f new file mode 100644 index 0000000..281f198 --- /dev/null +++ b/dspl/liblapack/SRC/sgesvxx.f @@ -0,0 +1,772 @@ +*> \brief SGESVXX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* REAL R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGESVXX uses the LU factorization to compute the solution to a +*> real system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. SGESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> SGESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> SGESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what SGESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In SGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGEsolve +* +* ===================================================================== + SUBROUTINE SGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + REAL R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, COLCND, RCMAX, RCMIN, ROWCND, + $ SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, SLA_GERPVGRW + LOGICAL LSAME + REAL SLAMCH, SLA_GERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL SGEEQUB, SGETRF, SGETRS, SLACPY, SLAQGE, + $ XERBLA, SLASCL2, SGERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in SGERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until SGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SGEEQUB( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL SLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL SLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL SLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL SGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = SLA_GERPVGRW( N, INFO, A, LDA, AF, LDAF ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = SLA_GERPVGRW( N, N, A, LDA, AF, LDAF ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL SLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL SLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of SGESVXX + + END diff --git a/dspl/liblapack/SRC/sgetc2.f b/dspl/liblapack/SRC/sgetc2.f new file mode 100644 index 0000000..b0301b9 --- /dev/null +++ b/dspl/liblapack/SRC/sgetc2.f @@ -0,0 +1,234 @@ +*> \brief \b SGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETC2 computes an LU factorization with complete pivoting of the +*> n-by-n matrix A. The factorization has the form A = P * L * U * Q, +*> where P and Q are permutation matrices, L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> +*> This is the Level 2 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the n-by-n matrix A to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U*Q; the unit diagonal elements of L are not stored. +*> If U(k, k) appears to be less than SMIN, U(k, k) is given the +*> value of SMIN, i.e., giving a nonsingular perturbed system. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension(N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension(N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, U(k, k) is likely to produce owerflow if +*> we try to solve for x in Ax = b. So U is perturbed to +*> avoid the overflow. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + REAL BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SLABAD, SSWAP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Handle the case N=1 by itself +* + IF( N.EQ.1 ) THEN + IPIV( 1 ) = 1 + JPIV( 1 ) = 1 + IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN + INFO = 1 + A( 1, 1 ) = SMLNUM + END IF + RETURN + END IF +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL SSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL SSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = SMIN + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL SGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA, + $ A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = SMIN + END IF +* +* Set last pivots to N +* + IPIV( N ) = N + JPIV( N ) = N +* + RETURN +* +* End of SGETC2 +* + END diff --git a/dspl/liblapack/SRC/sgetf2.f b/dspl/liblapack/SRC/sgetf2.f new file mode 100644 index 0000000..8d28a4a --- /dev/null +++ b/dspl/liblapack/SRC/sgetf2.f @@ -0,0 +1,213 @@ +*> \brief \b SGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETF2 computes an LU factorization of a general m-by-n matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ISAMAX + EXTERNAL SLAMCH, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL SSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL SGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA, + $ A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of SGETF2 +* + END diff --git a/dspl/liblapack/SRC/sgetrf.f b/dspl/liblapack/SRC/sgetrf.f new file mode 100644 index 0000000..9e13f0f --- /dev/null +++ b/dspl/liblapack/SRC/sgetrf.f @@ -0,0 +1,225 @@ +*> \brief \b SGETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETRF computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGETRF2, SLASWP, STRSM, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL SGETRF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL SGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL SGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SGETRF +* + END diff --git a/dspl/liblapack/SRC/sgetrf2.f b/dspl/liblapack/SRC/sgetrf2.f new file mode 100644 index 0000000..aa826f6 --- /dev/null +++ b/dspl/liblapack/SRC/sgetrf2.f @@ -0,0 +1,272 @@ +*> \brief \b SGETRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETRF2 computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = min(m,n)/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> [ A11 ] +*> The subroutine calls itself to factor [ --- ], +*> [ A12 ] +*> [ A12 ] +*> do the swaps on [ --- ], solve A12, update A22, +*> [ A22 ] +*> +*> then calls itself to factor A22 and do the swaps on A21. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE SGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL SFMIN, TEMP + INTEGER I, IINFO, n1, n2 +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ISAMAX + EXTERNAL SLAMCH, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SSCAL, SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN + + IF ( M.EQ.1 ) THEN +* +* Use unblocked code for one row case +* Just need to handle IPIV and INFO +* + IPIV( 1 ) = 1 + IF ( A(1,1).EQ.ZERO ) + $ INFO = 1 +* + ELSE IF( N.EQ.1 ) THEN +* +* Use unblocked code for one column case +* +* +* Compute machine safe minimum +* + SFMIN = SLAMCH('S') +* +* Find pivot and test for singularity +* + I = ISAMAX( M, A( 1, 1 ), 1 ) + IPIV( 1 ) = I + IF( A( I, 1 ).NE.ZERO ) THEN +* +* Apply the interchange +* + IF( I.NE.1 ) THEN + TEMP = A( 1, 1 ) + A( 1, 1 ) = A( I, 1 ) + A( I, 1 ) = TEMP + END IF +* +* Compute elements 2:M of the column +* + IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN + CALL SSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 ) + ELSE + DO 10 I = 1, M-1 + A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 ) + 10 CONTINUE + END IF +* + ELSE + INFO = 1 + END IF +* + ELSE +* +* Use recursive code +* + N1 = MIN( M, N ) / 2 + N2 = N-N1 +* +* [ A11 ] +* Factor [ --- ] +* [ A21 ] +* + CALL SGETRF2( m, n1, A, lda, ipiv, iinfo ) + + IF ( info.EQ.0 .AND. iinfo.GT.0 ) + $ info = iinfo +* +* [ A12 ] +* Apply interchanges to [ --- ] +* [ A22 ] +* + CALL SLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 ) +* +* Solve A12 +* + CALL STRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA, + $ A( 1, N1+1 ), LDA ) +* +* Update A22 +* + CALL SGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA ) +* +* Factor A22 +* + CALL SGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ), + $ IINFO ) +* +* Adjust INFO and the pivot indices +* + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + N1 + DO 20 I = N1+1, MIN( M, N ) + IPIV( I ) = IPIV( I ) + N1 + 20 CONTINUE +* +* Apply interchanges to A21 +* + CALL SLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 ) +* + END IF + RETURN +* +* End of SGETRF2 +* + END diff --git a/dspl/liblapack/SRC/sgetri.f b/dspl/liblapack/SRC/sgetri.f new file mode 100644 index 0000000..e609247 --- /dev/null +++ b/dspl/liblapack/SRC/sgetri.f @@ -0,0 +1,261 @@ +*> \brief \b SGETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETRI computes the inverse of a matrix using the LU factorization +*> computed by SGETRF. +*> +*> This method inverts U and then computes inv(A) by solving the system +*> inv(A)*L = inv(U) for inv(A). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. +*> On exit, if INFO = 0, the inverse of the original matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimal performance LWORK >= N*NB, where NB is +*> the optimal blocksize returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +*> singular and its inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from STRTRI, then U is singular, +* and the inverse is not computed. +* + CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of SGETRI +* + END diff --git a/dspl/liblapack/SRC/sgetrs.f b/dspl/liblapack/SRC/sgetrs.f new file mode 100644 index 0000000..d52f773 --- /dev/null +++ b/dspl/liblapack/SRC/sgetrs.f @@ -0,0 +1,225 @@ +*> \brief \b SGETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETRS solves a system of linear equations +*> A * X = B or A**T * X = B +*> with a general N-by-N matrix A using the LU factorization computed +*> by SGETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from SGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B. +* +* Solve U**T *X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of SGETRS +* + END diff --git a/dspl/liblapack/SRC/sgetsls.f b/dspl/liblapack/SRC/sgetsls.f new file mode 100644 index 0000000..35af66c --- /dev/null +++ b/dspl/liblapack/SRC/sgetsls.f @@ -0,0 +1,494 @@ +* Definition: +* =========== +* +* SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGETSLS solves overdetermined or underdetermined real linear systems +*> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'T' and m >= n: find the minimum norm solution of +*> an undetermined system A**T * X = B. +*> +*> 4. If TRANS = 'T' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'T': the linear system involves A**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> A is overwritten by details of its QR or LQ +*> factorization as returned by SGEQR or SGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'T'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors. +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'T' and m < n, rows 1 to M of B contain the +*> least squares solution vectors. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE SGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TRAN + INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, + $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, + $ WSIZEO, WSIZEM, INFO2 + REAL ANRM, BIGNUM, BNRM, SMLNUM, TQ( 5 ), WORKQ( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLABAD, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SGEQR, SGEMQR, SLASCL, SLASET, + $ STRTRS, XERBLA, SGELQ, SGEMLQ +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN, INT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNK = MAX( MINMN, NRHS ) + TRAN = LSAME( TRANS, 'T' ) +* + LQUERY = ( LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. + $ LSAME( TRANS, 'T' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size and minimum LWORK +* + IF( M.GE.N ) THEN + CALL SGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL SGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL SGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL SGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZM, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + ELSE + CALL SGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL SGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + END IF +* + IF( ( LWORK.LT.WSIZEM ).AND.( .NOT.LQUERY ) ) THEN + INFO = -10 + END IF +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGETSLS', -INFO ) + WORK( 1 ) = REAL( WSIZEO ) + RETURN + END IF + IF( LQUERY ) THEN + IF( LWORK.EQ.-1 ) WORK( 1 ) = REAL( WSIZEO ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + RETURN + END IF + IF( LWORK.LT.WSIZEO ) THEN + LW1 = TSZM + LW2 = LWM + ELSE + LW1 = TSZO + LW2 = LWO + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL SLASET( 'FULL', MAX( M, N ), NRHS, ZERO, ZERO, + $ B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', M, N, A, LDA, WORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL SLASET( 'F', MAXMN, NRHS, ZERO, ZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF ( TRAN ) THEN + BROW = N + END IF + BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, WORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF ( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL SGEQR( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) + IF ( .NOT.TRAN ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL SGEMQR( 'L' , 'T', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL STRTRS( 'U', 'N', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) + IF( INFO.GT.0 ) THEN + RETURN + END IF + SCLLEN = N + ELSE +* +* Overdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL STRTRS( 'U', 'T', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL SGEMQR( 'L', 'N', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL SGELQ( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TRAN ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL STRTRS( 'L', 'N', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL SGEMLQ( 'L', 'T', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL SGEMLQ( 'L', 'N', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL STRTRS( 'Lower', 'Transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = REAL( TSZO + LWO ) + RETURN +* +* End of SGETSLS +* + END diff --git a/dspl/liblapack/SRC/sggbak.f b/dspl/liblapack/SRC/sggbak.f new file mode 100644 index 0000000..7e5620e --- /dev/null +++ b/dspl/liblapack/SRC/sggbak.f @@ -0,0 +1,306 @@ +*> \brief \b SGGBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, +* LDV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* REAL LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGBAK forms the right or left eigenvectors of a real generalized +*> eigenvalue problem A*x = lambda*B*x, by backward transformation on +*> the computed eigenvectors of the balanced pair of matrices output by +*> SGGBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N': do nothing, return immediately; +*> = 'P': do backward transformation for permutation only; +*> = 'S': do backward transformation for scaling only; +*> = 'B': do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to SGGBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by SGGBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the left side of A and B, as returned by SGGBAL. +*> \endverbatim +*> +*> \param[in] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the right side of A and B, as returned by SGGBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by STGEVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the matrix V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. Ward, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + REAL LSCALE( * ), RSCALE( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( N.EQ.0 .AND. IHI.EQ.0 .AND. ILO.NE.1 ) THEN + INFO = -4 + ELSE IF( N.GT.0 .AND. ( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -5 + ELSE IF( N.EQ.0 .AND. ILO.EQ.1 .AND. IHI.NE.0 ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -8 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL SSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL SSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 +* + DO 40 I = ILO - 1, 1, -1 + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = RSCALE( I ) + IF( K.EQ.I ) + $ GO TO 60 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 80 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = LSCALE( I ) + IF( K.EQ.I ) + $ GO TO 100 + CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of SGGBAK +* + END diff --git a/dspl/liblapack/SRC/sggbal.f b/dspl/liblapack/SRC/sggbal.f new file mode 100644 index 0000000..3e64a0f --- /dev/null +++ b/dspl/liblapack/SRC/sggbal.f @@ -0,0 +1,559 @@ +*> \brief \b SGGBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, +* RSCALE, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), LSCALE( * ), +* $ RSCALE( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGBAL balances a pair of general real matrices (A,B). This +*> involves, first, permuting A and B by similarity transformations to +*> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +*> elements on the diagonal; and second, applying a diagonal similarity +*> transformation to rows and columns ILO to IHI to make the rows +*> and columns as close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrices, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors in the +*> generalized eigenvalue problem A*x = lambda*B*x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A and B: +*> = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +*> and RSCALE(I) = 1.0 for i = 1,...,N. +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the input matrix B. +*> On exit, B is overwritten by the balanced matrix. +*> If JOB = 'N', B is not referenced. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If P(j) is the index of the +*> row interchanged with row j, and D(j) +*> is the scaling factor applied to row j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If P(j) is the index of the +*> column interchanged with column j, and D(j) +*> is the scaling factor applied to column j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (lwork) +*> lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +*> at least 1 when JOB = 'N' or 'P'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. WARD, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), LSCALE( * ), + $ RSCALE( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL THREE, SCLFAC + PARAMETER ( THREE = 3.0E+0, SCLFAC = 1.0E+1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + REAL ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG10, MAX, MIN, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = ONE + LSCALE( 1 ) = ONE + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL SSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL SSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL SSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + TB = B( I, J ) + TA = A( I, J ) + IF( TA.EQ.ZERO ) + $ GO TO 210 + TA = LOG10( ABS( TA ) ) / BASL + 210 CONTINUE + IF( TB.EQ.ZERO ) + $ GO TO 220 + TB = LOG10( ABS( TB ) ) / BASL + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / REAL( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL SSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.ZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.ZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = SLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = ISAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = ISAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) ) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = ISAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = ISAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) ) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL SSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL SSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL SSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL SSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of SGGBAL +* + END diff --git a/dspl/liblapack/SRC/sgges.f b/dspl/liblapack/SRC/sgges.f new file mode 100644 index 0000000..75f31c7 --- /dev/null +++ b/dspl/liblapack/SRC/sgges.f @@ -0,0 +1,680 @@ +*> \brief SGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, +* SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, +* LDVSR, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B), +*> the generalized eigenvalues, the generalized real Schur form (S,T), +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T.The +*> leading columns of VSL and VSR then form an orthonormal basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> SGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG); +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three REAL arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> +*> Note that in the ill-conditioned case, a selected complex +*> eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), +*> BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +*> in this case. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is REAL array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is REAL array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else LWORK >= max(8*N,6*N+16). +*> For good performance , LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ. +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in STGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, + $ LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK, + $ MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0 )THEN + MINWRK = MAX( 8*N, 6*N + 16 ) + MAXWRK = MINWRK - N + + $ N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'SORMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) ) + END IF + ELSE + MINWRK = 1 + MAXWRK = 1 + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -19 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N space for storing balancing factors) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: need 4*N+16 ) +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL STGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL )THEN + DO 50 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAR( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I )/ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAI( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I+1 )/ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 50 CONTINUE + END IF +* + IF( ILBSCL )THEN + DO 60 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I )/SAFMAX ).GT.( BNRMTO/BNRM ) .OR. + $ ( SAFMIN/BETA( I ) ).GT.( BNRM/BNRMTO ) ) THEN + WORK( 1 ) = ABS(B( I, I )/BETA( I )) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 60 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of SGGES +* + END diff --git a/dspl/liblapack/SRC/sgges3.f b/dspl/liblapack/SRC/sgges3.f new file mode 100644 index 0000000..81ab96c --- /dev/null +++ b/dspl/liblapack/SRC/sgges3.f @@ -0,0 +1,671 @@ +*> \brief SGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGES3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, +* $ LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, +* $ VSR, LDVSR, WORK, LWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), +* $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGES3 computes for a pair of N-by-N real nonsymmetric matrices (A,B), +*> the generalized eigenvalues, the generalized real Schur form (S,T), +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T.The +*> leading columns of VSL and VSR then form an orthonormal basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> SGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG); +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three REAL arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> +*> Note that in the ill-conditioned case, a selected complex +*> eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j), +*> BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2 +*> in this case. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i, +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is REAL array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is REAL array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ. +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in STGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, + $ LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, WORK, LWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VSL( LDVSL, * ), + $ VSR( LDVSR, * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IP, IRIGHT, IROWS, ITAU, IWRK, LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.6*N+16 .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL SGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 6*N+16, 3*N+INT( WORK( 1 ) ) ) + CALL SORMQR( 'L', 'T', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK( 1 ) ) ) + IF( ILVSL ) THEN + CALL SORGQR( N, N, N, VSL, LDVSL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK( 1 ) ) ) + END IF + CALL SGGHD3( JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT( WORK( 1 ) ) ) + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK( 1 ) ) ) + IF( WANTST ) THEN + CALL STGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PVSL, PVSR, DIF, WORK, -1, IDUM, 1, + $ IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT( WORK( 1 ) ) ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGES3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL SGGHD3( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWRK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* + SDIM = 0 + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* + CALL STGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, + $ PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, + $ IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* + IF( ILVSL ) + $ CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL )THEN + DO 50 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAR( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I )/ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR. + $ ( SAFMIN/ALPHAI( I ) ).GT.( ANRM/ANRMTO ) ) THEN + WORK( 1 ) = ABS( A( I, I+1 )/ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 50 CONTINUE + END IF +* + IF( ILBSCL )THEN + DO 60 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I )/SAFMAX ).GT.( BNRMTO/BNRM ) .OR. + $ ( SAFMIN/BETA( I ) ).GT.( BNRM/BNRMTO ) ) THEN + WORK( 1 ) = ABS(B( I, I )/BETA( I )) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 60 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SGGES3 +* + END diff --git a/dspl/liblapack/SRC/sggesx.f b/dspl/liblapack/SRC/sggesx.f new file mode 100644 index 0000000..3c6273d --- /dev/null +++ b/dspl/liblapack/SRC/sggesx.f @@ -0,0 +1,820 @@ +*> \brief SGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, +* B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, +* VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, +* LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SENSE, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, +* $ SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), RCONDE( 2 ), +* $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGESX computes for a pair of N-by-N real nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the real Schur form (S,T), and, +*> optionally, the left and/or right matrices of Schur vectors (VSL and +*> VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T ) +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> quasi-triangular matrix S and the upper triangular matrix T; computes +*> a reciprocal condition number for the average of the selected +*> eigenvalues (RCONDE); and computes a reciprocal condition number for +*> the right and left deflating subspaces corresponding to the selected +*> eigenvalues (RCONDV). The leading columns of VSL and VSR then form +*> an orthonormal basis for the corresponding left and right eigenspaces +*> (deflating subspaces). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or for both being zero. +*> +*> A pair of matrices (S,T) is in generalized real Schur form if T is +*> upper triangular with non-negative diagonal and S is block upper +*> triangular with 1-by-1 and 2-by-2 blocks. 1-by-1 blocks correspond +*> to real generalized eigenvalues, while 2-by-2 blocks of S will be +*> "standardized" by making the corresponding elements of T have the +*> form: +*> [ a 0 ] +*> [ 0 b ] +*> +*> and the pair of corresponding 2-by-2 blocks in S and T will have a +*> complex conjugate pair of generalized eigenvalues. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of three REAL arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either +*> one of a complex conjugate pair of eigenvalues is selected, +*> then both complex eigenvalues are selected. +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering, +*> since ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+3. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N' : None are computed; +*> = 'E' : Computed for average of selected eigenvalues only; +*> = 'V' : Computed for selected deflating subspaces only; +*> = 'B' : Computed for both. +*> If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. (Complex conjugate pairs for which +*> SELCTG is true for either eigenvalue count as 2.) +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real Schur form of (A,B) were further reduced to +*> triangular form using 2-by-2 complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio. +*> However, ALPHAR and ALPHAI will be always less than and +*> usually comparable with norm(A) in magnitude, and BETA always +*> less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is REAL array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is REAL array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension ( 2 ) +*> If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +*> reciprocal condition numbers for the average of the selected +*> eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension ( 2 ) +*> If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +*> reciprocal condition numbers for the selected deflating +*> subspaces. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', +*> LWORK >= max( 8*N, 6*N+16, 2*SDIM*(N-SDIM) ), else +*> LWORK >= max( 8*N, 6*N+16 ). +*> Note that 2*SDIM*(N-SDIM) <= N*N/2. +*> Note also that an error is only returned if +*> LWORK < max( 8*N, 6*N+16), but if SENSE = 'E' or 'V' or 'B' +*> this may not be large enough. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the bound on the optimal size of the WORK +*> array and the minimum size of the IWORK array, returns these +*> values as the first entries of the WORK and IWORK arrays, and +*> no error message related to LWORK or LIWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise +*> LIWORK >= N+6. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the bound on the optimal size of the +*> WORK array and the minimum size of the IWORK array, returns +*> these values as the first entries of the WORK and IWORK +*> arrays, and no error message related to LWORK or LIWORK is +*> issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHAR(j), ALPHAI(j), and BETA(j) should +*> be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in STGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / RCONDE( 1 ). +*> +*> An approximate (asymptotic) bound on the maximum angular error in +*> the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / RCONDV( 2 ). +*> +*> See LAPACK User's Guide, section 4.11 for more information. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, + $ VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK, + $ LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), RCONDE( 2 ), + $ RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, LST2SL, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK, + $ LIWMIN, LWRK, MAXWRK, MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SAFMAX, SAFMIN, SMLNUM +* .. +* .. Local Arrays .. + REAL DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) + IF( WANTSN ) THEN + IJOB = 0 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -16 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -18 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0) THEN + MINWRK = MAX( 8*N, 6*N + 16 ) + MAXWRK = MINWRK - N + + $ N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'SORMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, MINWRK - N + + $ N*ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) ) + END IF + LWRK = MAXWRK + IF( IJOB.GE.1 ) + $ LWRK = MAX( LWRK, N*N/2 ) + ELSE + MINWRK = 1 + MAXWRK = 1 + LWRK = 1 + END IF + WORK( 1 ) = LWRK + IF( WANTSN .OR. N.EQ.0 ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 6 + END IF + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGESX', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + SMLNUM = SQRT( SAFMIN ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Workspace: need 6*N + 2*N for permutation parameters) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Workspace: need N) +* + IWRK = ITAU + CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 50 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) ) +* otherwise, need 8*(N+1) ) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, + $ IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, + $ IERR ) + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* + CALL STGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, + $ SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1, + $ IWORK, LIWORK, IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-22 ) THEN +* +* not enough real workspace +* + INFO = -22 + ELSE + IF( IJOB.EQ.1 .OR. IJOB.EQ.4 ) THEN + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + END IF + IF( IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + END IF + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Check if unscaling would cause over/underflow, if so, rescale +* (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of +* B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I) +* + IF( ILASCL ) THEN + DO 20 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR. + $ ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) + $ .OR. ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) ) + $ THEN + WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 20 CONTINUE + END IF +* + IF( ILBSCL ) THEN + DO 25 I = 1, N + IF( ALPHAI( I ).NE.ZERO ) THEN + IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR. + $ ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN + WORK( 1 ) = ABS( B( I, I ) / BETA( I ) ) + BETA( I ) = BETA( I )*WORK( 1 ) + ALPHAR( I ) = ALPHAR( I )*WORK( 1 ) + ALPHAI( I ) = ALPHAI( I )*WORK( 1 ) + END IF + END IF + 25 CONTINUE + END IF +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + LST2SL = .TRUE. + SDIM = 0 + IP = 0 + DO 40 I = 1, N + CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) ) + IF( ALPHAI( I ).EQ.ZERO ) THEN + IF( CURSL ) + $ SDIM = SDIM + 1 + IP = 0 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + ELSE + IF( IP.EQ.1 ) THEN +* +* Last eigenvalue of conjugate pair +* + CURSL = CURSL .OR. LASTSL + LASTSL = CURSL + IF( CURSL ) + $ SDIM = SDIM + 2 + IP = -1 + IF( CURSL .AND. .NOT.LST2SL ) + $ INFO = N + 2 + ELSE +* +* First eigenvalue of conjugate pair +* + IP = 1 + END IF + END IF + LST2SL = LASTSL + LASTSL = CURSL + 40 CONTINUE +* + END IF +* + 50 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SGGESX +* + END diff --git a/dspl/liblapack/SRC/sggev.f b/dspl/liblapack/SRC/sggev.f new file mode 100644 index 0000000..8551eb0 --- /dev/null +++ b/dspl/liblapack/SRC/sggev.f @@ -0,0 +1,592 @@ +*> \brief SGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, +* BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B . +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> alpha/beta. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,8*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ. +*> =N+2: error return from STGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI, + $ BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK, + $ MINWRK + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = MAX( 1, 8*N ) + MAXWRK = MAX( 1, N*( 7 + + $ ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) ) ) + MAXWRK = MAX( MAXWRK, N*( 7 + + $ ILAENV( 1, 'SORMQR', ' ', N, 1, N, 0 ) ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N*( 7 + + $ ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) ) ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* (Workspace: need 6*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + 110 CONTINUE +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGGEV +* + END diff --git a/dspl/liblapack/SRC/sggev3.f b/dspl/liblapack/SRC/sggev3.f new file mode 100644 index 0000000..7a253ad --- /dev/null +++ b/dspl/liblapack/SRC/sggev3.f @@ -0,0 +1,589 @@ +*> \brief SGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGEV3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, +* $ ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, +* $ INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGEV3 computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B . +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> alpha/beta. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector is scaled so the largest component has +*> abs(real part)+abs(imag. part)=1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ. +*> =N+2: error return from STGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup realGEeigen +* +* ===================================================================== + SUBROUTINE SGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, + $ ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.6.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, LWKOPT + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHD3, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL LSAME, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -12 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -14 + ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL SGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 1, 8*N, 3*N+INT ( WORK( 1 ) ) ) + CALL SORMQR( 'L', 'T', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT ( WORK( 1 ) ) ) + CALL SGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT ( WORK( 1 ) ) ) + IF( ILVL ) THEN + CALL SORGQR( N, N, N, VL, LDVL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 3*N+INT ( WORK( 1 ) ) ) + CALL SHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT ( WORK( 1 ) ) ) + ELSE + CALL SHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, 2*N+INT ( WORK( 1 ) ) ) + END IF + WORK( 1 ) = REAL( LWKOPT ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGEV3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* + ILEFT = 1 + IRIGHT = N + 1 + IWRK = IRIGHT + N + CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), WORK( IWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = IWRK + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHD3( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK( IWRK ), LWORK+1-IWRK, IERR ) + ELSE + CALL SGGHD3( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 110 + END IF +* +* Compute Eigenvectors +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 110 + END IF +* +* Undo balancing on VL and VR and normalization +* + IF( ILVL ) THEN + CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 50 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 50 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 10 CONTINUE + ELSE + DO 20 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 20 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 30 CONTINUE + ELSE + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 40 CONTINUE + END IF + 50 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ), + $ WORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 100 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 100 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 60 CONTINUE + ELSE + DO 70 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 70 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 100 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 80 CONTINUE + ELSE + DO 90 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + END IF +* +* End of eigenvector calculation +* + END IF +* +* Undo scaling if necessary +* + 110 CONTINUE +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = REAL( LWKOPT ) + RETURN +* +* End of SGGEV3 +* + END diff --git a/dspl/liblapack/SRC/sggevx.f b/dspl/liblapack/SRC/sggevx.f new file mode 100644 index 0000000..08f0225 --- /dev/null +++ b/dspl/liblapack/SRC/sggevx.f @@ -0,0 +1,866 @@ +*> \brief SGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, +* ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, +* IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, +* RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), LSCALE( * ), +* $ RCONDE( * ), RCONDV( * ), RSCALE( * ), +* $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B) +*> the generalized eigenvalues, and optionally, the left and/or right +*> generalized eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +*> the eigenvalues (RCONDE), and reciprocal condition numbers for the +*> right eigenvectors (RCONDV). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j) . +*> +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B. +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Specifies the balance option to be performed. +*> = 'N': do not diagonally scale or permute; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> Computed reciprocal condition numbers will be for the +*> matrices after permuting and/or balancing. Permuting does +*> not change condition numbers (in exact arithmetic), but +*> balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': none are computed; +*> = 'E': computed for eigenvalues only; +*> = 'V': computed for eigenvectors only; +*> = 'B': computed for eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then A contains the first part of the real Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then B contains the second part of the real Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. If ALPHAI(j) is zero, then +*> the j-th eigenvalue is real; if positive, then the j-th and +*> (j+1)-st eigenvalues are a complex conjugate pair, with +*> ALPHAI(j+1) negative. +*> +*> Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j) +*> may easily over- or underflow, and BETA(j) may even be zero. +*> Thus, the user should avoid naively computing the ratio +*> ALPHA/BETA. However, ALPHAR and ALPHAI will be always less +*> than and usually comparable with norm(A) in magnitude, and +*> BETA always less than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> u(j) = VL(:,j), the j-th column of VL. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1). +*> Each eigenvector will be scaled so the largest component have +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order as +*> their eigenvalues. If the j-th eigenvalue is real, then +*> v(j) = VR(:,j), the j-th column of VR. If the j-th and +*> (j+1)-th eigenvalues form a complex conjugate pair, then +*> v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1). +*> Each eigenvector will be scaled so the largest component have +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If PL(j) is the index of the +*> row interchanged with row j, and DL(j) is the scaling +*> factor applied to row j, then +*> LSCALE(j) = PL(j) for j = 1,...,ILO-1 +*> = DL(j) for j = ILO,...,IHI +*> = PL(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is REAL array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If PR(j) is the index of the +*> column interchanged with column j, and DR(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = PR(j) for j = 1,...,ILO-1 +*> = DR(j) for j = ILO,...,IHI +*> = PR(j) for j = IHI+1,...,N +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is REAL +*> The one-norm of the balanced matrix A. +*> \endverbatim +*> +*> \param[out] BBNRM +*> \verbatim +*> BBNRM is REAL +*> The one-norm of the balanced matrix B. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is REAL array, dimension (N) +*> If SENSE = 'E' or 'B', the reciprocal condition numbers of +*> the eigenvalues, stored in consecutive elements of the array. +*> For a complex conjugate pair of eigenvalues two consecutive +*> elements of RCONDE are set to the same value. Thus RCONDE(j), +*> RCONDV(j), and the j-th columns of VL and VR all correspond +*> to the j-th eigenpair. +*> If SENSE = 'N' or 'V', RCONDE is not referenced. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is REAL array, dimension (N) +*> If SENSE = 'V' or 'B', the estimated reciprocal condition +*> numbers of the eigenvectors, stored in consecutive elements +*> of the array. For a complex eigenvector two consecutive +*> elements of RCONDV are set to the same value. If the +*> eigenvalues cannot be reordered to compute RCONDV(j), +*> RCONDV(j) is set to 0; this can only occur when the true +*> value would be very small anyway. +*> If SENSE = 'N' or 'E', RCONDV is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> If BALANC = 'S' or 'B', or JOBVL = 'V', or JOBVR = 'V', +*> LWORK >= max(1,6*N). +*> If SENSE = 'E', LWORK >= max(1,10*N). +*> If SENSE = 'V' or 'B', LWORK >= 2*N*N+8*N+16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+6) +*> If SENSE = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> If SENSE = 'N', BWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHAR(j), ALPHAI(j), and BETA(j) +*> should be correct for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in SHGEQZ. +*> =N+2: error return from STGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing a matrix pair (A,B) includes, first, permuting rows and +*> columns to isolate eigenvalues, second, applying diagonal similarity +*> transformation to the rows and columns to make the rows and columns +*> as close in norm as possible. The computed reciprocal condition +*> numbers correspond to the balanced matrix. Permuting rows and columns +*> will not change the condition numbers (in exact arithmetic) but +*> diagonal scaling will. For further explanation of balancing, see +*> section 4.11.1.2 of LAPACK Users' Guide. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +*> +*> An approximate error bound for the angle between the i-th computed +*> eigenvector VL(i) or VR(i) is given by +*> +*> EPS * norm(ABNRM, BBNRM) / DIF(i). +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see section 4.11 of LAPACK User's Guide. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO, + $ IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, + $ RCONDV, WORK, LWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + REAL ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), LSCALE( * ), + $ RCONDE( * ), RCONDV( * ), RSCALE( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, NOSCL, + $ PAIR, WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, + $ MINWRK, MM + REAL ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD, + $ SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC, + $ STGSNA, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANGE + EXTERNAL LSAME, ILAENV, SLAMCH, SLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + NOSCL = LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'P' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( NOSCL .OR. LSAME( BALANC, 'S' ) .OR. + $ LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + IF( NOSCL .AND. .NOT.ILV ) THEN + MINWRK = 2*N + ELSE + MINWRK = 6*N + END IF + IF( WANTSE ) THEN + MINWRK = 10*N + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*( N + 4 ) + 16 + END IF + MAXWRK = MINWRK + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'SORMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N + + $ N*ILAENV( 1, 'SORGQR', ' ', N, 1, N, 0 ) ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -26 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = SLANGE( 'M', N, N, A, LDA, WORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = SLANGE( 'M', N, N, B, LDB, WORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) +* + CALL SGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ WORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = SLANGE( '1', N, N, A, LDA, WORK( 1 ) ) + IF( ILASCL ) THEN + WORK( 1 ) = ABNRM + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + ABNRM = WORK( 1 ) + END IF +* + BBNRM = SLANGE( '1', N, N, B, LDB, WORK( 1 ) ) + IF( ILBSCL ) THEN + WORK( 1 ) = BBNRM + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1, + $ IERR ) + BBNRM = WORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to A +* (Workspace: need N, prefer N*NB) +* + CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Workspace: need N) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK, + $ LWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 130 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* (Workspace: STGEVC: need 6*N +* STGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B', +* need N otherwise ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (STGEVC) and estimate condition +* numbers (STGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to recalculate +* eigenvectors and estimate one condition numbers at a time. +* + PAIR = .FALSE. + DO 20 I = 1, N +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + END IF + MM = 1 + IF( I.LT.N ) THEN + IF( A( I+1, I ).NE.ZERO ) THEN + PAIR = .TRUE. + MM = 2 + END IF + END IF +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + IF( MM.EQ.1 ) THEN + BWORK( I ) = .TRUE. + ELSE IF( MM.EQ.2 ) THEN + BWORK( I ) = .TRUE. + BWORK( I+1 ) = .TRUE. + END IF +* + IWRK = MM*N + 1 + IWRK1 = IWRK + MM*N +* +* Compute a pair of left and right eigenvectors. +* (compute workspace: need up to 4*N + 6*N) +* + IF( WANTSE .OR. WANTSB ) THEN + CALL STGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, MM, M, + $ WORK( IWRK1 ), IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 130 + END IF + END IF +* + CALL STGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), MM, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL SGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 70 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 70 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) ) + 30 CONTINUE + ELSE + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+ + $ ABS( VL( JR, JC+1 ) ) ) + 40 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 70 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 50 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 50 CONTINUE + ELSE + DO 60 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP + 60 CONTINUE + END IF + 70 CONTINUE + END IF + IF( ILVR ) THEN + CALL SGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 120 JC = 1, N + IF( ALPHAI( JC ).LT.ZERO ) + $ GO TO 120 + TEMP = ZERO + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 80 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) ) + 80 CONTINUE + ELSE + DO 90 JR = 1, N + TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+ + $ ABS( VR( JR, JC+1 ) ) ) + 90 CONTINUE + END IF + IF( TEMP.LT.SMLNUM ) + $ GO TO 120 + TEMP = ONE / TEMP + IF( ALPHAI( JC ).EQ.ZERO ) THEN + DO 100 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 100 CONTINUE + ELSE + DO 110 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF +* +* Undo scaling if necessary +* + 130 CONTINUE +* + IF( ILASCL ) THEN + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR ) + CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of SGGEVX +* + END diff --git a/dspl/liblapack/SRC/sggglm.f b/dspl/liblapack/SRC/sggglm.f new file mode 100644 index 0000000..fe63da5 --- /dev/null +++ b/dspl/liblapack/SRC/sggglm.f @@ -0,0 +1,348 @@ +*> \brief \b SGGGLM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGGLM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), +* $ X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGGLM solves a general Gauss-Markov linear model (GLM) problem: +*> +*> minimize || y ||_2 subject to d = A*x + B*y +*> x +*> +*> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +*> given N-vector. It is assumed that M <= N <= M+P, and +*> +*> rank(A) = M and rank( A B ) = N. +*> +*> Under these assumptions, the constrained equation is always +*> consistent, and there is a unique solution x and a minimal 2-norm +*> solution y, which is obtained using a generalized QR factorization +*> of the matrices (A, B) given by +*> +*> A = Q*(R), B = Q*T*Z. +*> (0) +*> +*> In particular, if matrix B is square nonsingular, then the problem +*> GLM is equivalent to the following weighted linear least squares +*> problem +*> +*> minimize || inv(B)*(d-A*x) ||_2 +*> x +*> +*> where inv(B) denotes the inverse of B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= N-M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the upper triangular part of the array A contains +*> the M-by-M upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D is the left hand side of the GLM equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (M) +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is REAL array, dimension (P) +*> +*> On exit, X and Y are the solutions of the GLM problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N+M+P). +*> For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> SGEQRF, SGERQF, SORMQR and SORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with A in the +*> generalized QR factorization of the pair (A, B) is +*> singular, so that rank(A) < M; the least squares +*> solution could not be computed. +*> = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal +*> factor T associated with B in the generalized QR +*> factorization of the pair (A, B) is singular, so that +*> rank( A B ) < N; the least squares solution could not +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3, + $ NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SGGQRF, SORMQR, SORMRQ, STRTRS, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = M + NP + MAX( N, P )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q**T*A = ( R11 ) M, Q**T*B*Z**T = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* orthogonal. +* + CALL SGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q**T*d = ( d1 ) M +* ( d2 ) N-M +* + CALL SORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D, + $ MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + IF( N.GT.M ) THEN + CALL STRTRS( 'Upper', 'No transpose', 'Non unit', N-M, 1, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), N-M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* + CALL SCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) + END IF +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = ZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL SGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, ONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + IF( M.GT.0 ) THEN + CALL STRTRS( 'Upper', 'No Transpose', 'Non unit', M, 1, A, LDA, + $ D, M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Copy D to X +* + CALL SCOPY( M, D, 1, X, 1 ) + END IF +* +* Backward transformation y = Z**T *y +* + CALL SORMRQ( 'Left', 'Transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of SGGGLM +* + END diff --git a/dspl/liblapack/SRC/sgghd3.f b/dspl/liblapack/SRC/sgghd3.f new file mode 100644 index 0000000..add2161 --- /dev/null +++ b/dspl/liblapack/SRC/sgghd3.f @@ -0,0 +1,897 @@ +*> \brief \b SGGHD3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGHD3 reduces a pair of real matrices (A,B) to generalized upper +*> Hessenberg form using orthogonal transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the orthogonal matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**T*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**T*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**T*x. +*> +*> The orthogonal matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> +*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T +*> +*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T +*> +*> If Q1 is the orthogonal matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then SGGHD3 reduces the original +*> problem to generalized Hessenberg form. +*> +*> This is a blocked variant of SGGHRD, using matrix-matrix +*> multiplications for parts of the computation to enhance performance. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> orthogonal matrix Z is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to SGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**T B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1, +*> typically from the QR factorization of B. +*> On exit, if COMPQ='I', the orthogonal matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1. +*> On exit, if COMPZ='I', the orthogonal matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= 6*N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg form and maintains B in +*> using a blocked variant of Moler and Stewart's original algorithm, +*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti +*> (BIT 2008). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ + CHARACTER*1 COMPQ2, COMPZ2 + INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K, + $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN, + $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ + REAL C, C1, C2, S, S1, S2, TEMP, TEMP1, TEMP2, TEMP3 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGGHRD, SLARTG, SLASET, SORM22, SROT, SGEMM, + $ SGEMV, STRMV, SLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'SGGHD3', ' ', N, ILO, IHI, -1 ) + LWKOPT = MAX( 6*N*NB, 1 ) + WORK( 1 ) = REAL( LWKOPT ) + INITQ = LSAME( COMPQ, 'I' ) + WANTQ = INITQ .OR. LSAME( COMPQ, 'V' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( WANTQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( WANTZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGHD3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( INITQ ) + $ CALL SLASET( 'All', N, N, ZERO, ONE, Q, LDQ ) + IF( INITZ ) + $ CALL SLASET( 'All', N, N, ZERO, ONE, Z, LDZ ) +* +* Zero out lower triangle of B. +* + IF( N.GT.1 ) + $ CALL SLASET( 'Lower', N-1, N-1, ZERO, ZERO, B(2, 1), LDB ) +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* +* Determine the blocksize. +* + NBMIN = ILAENV( 2, 'SGGHD3', ' ', N, ILO, IHI, -1 ) + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to use unblocked instead of blocked code. +* + NX = MAX( NB, ILAENV( 3, 'SGGHD3', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF( LWORK.LT.LWKOPT ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'SGGHD3', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.6*N*NBMIN ) THEN + NB = LWORK / ( 6*N ) + ELSE + NB = 1 + END IF + END IF + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + JCOL = ILO +* + ELSE +* +* Use blocked code +* + KACC22 = ILAENV( 16, 'SGGHD3', ' ', N, ILO, IHI, -1 ) + BLK22 = KACC22.EQ.2 + DO JCOL = ILO, IHI-2, NB + NNB = MIN( NB, IHI-JCOL-1 ) +* +* Initialize small orthogonal factors that will hold the +* accumulated Givens rotations in workspace. +* N2NB denotes the number of 2*NNB-by-2*NNB factors +* NBLST denotes the (possibly smaller) order of the last +* factor. +* + N2NB = ( IHI-JCOL-1 ) / NNB - 1 + NBLST = IHI - JCOL - N2NB*NNB + CALL SLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK, NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL SLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. +* + DO J = JCOL, JCOL+NNB-1 +* +* Reduce Jth column of A. Store cosines and sines in Jth +* column of A and B, respectively. +* + DO I = IHI, J+2, -1 + TEMP = A( I-1, J ) + CALL SLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) ) + A( I, J ) = C + B( I, J ) = S + END DO +* +* Accumulate Givens rotations into workspace array. +* + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + C = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + C = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO +* +* TOP denotes the number of top rows in A and B that will +* not be updated during the next steps. +* + IF( JCOL.LE.2 ) THEN + TOP = 0 + ELSE + TOP = JCOL + END IF +* +* Propagate transformations through B and replace stored +* left sines/cosines by right sines/cosines. +* + DO JJ = N, J+1, -1 +* +* Update JJth column of B. +* + DO I = MIN( JJ+1, IHI ), J+2, -1 + C = A( I, J ) + S = B( I, J ) + TEMP = B( I, JJ ) + B( I, JJ ) = C*TEMP - S*B( I-1, JJ ) + B( I-1, JJ ) = S*TEMP + C*B( I-1, JJ ) + END DO +* +* Annihilate B( JJ+1, JJ ). +* + IF( JJ.LT.IHI ) THEN + TEMP = B( JJ+1, JJ+1 ) + CALL SLARTG( TEMP, B( JJ+1, JJ ), C, S, + $ B( JJ+1, JJ+1 ) ) + B( JJ+1, JJ ) = ZERO + CALL SROT( JJ-TOP, B( TOP+1, JJ+1 ), 1, + $ B( TOP+1, JJ ), 1, C, S ) + A( JJ+1, J ) = C + B( JJ+1, J ) = -S + END IF + END DO +* +* Update A by transformations from right. +* Explicit loop unrolling provides better performance +* compared to SLASR. +* CALL SLASR( 'Right', 'Variable', 'Backward', IHI-TOP, +* $ IHI-J, A( J+2, J ), B( J+2, J ), +* $ A( TOP+1, J+1 ), LDA ) +* + JJ = MOD( IHI-J-1, 3 ) + DO I = IHI-J-3, JJ+1, -3 + C = A( J+1+I, J ) + S = -B( J+1+I, J ) + C1 = A( J+2+I, J ) + S1 = -B( J+2+I, J ) + C2 = A( J+3+I, J ) + S2 = -B( J+3+I, J ) +* + DO K = TOP+1, IHI + TEMP = A( K, J+I ) + TEMP1 = A( K, J+I+1 ) + TEMP2 = A( K, J+I+2 ) + TEMP3 = A( K, J+I+3 ) + A( K, J+I+3 ) = C2*TEMP3 + S2*TEMP2 + TEMP2 = -S2*TEMP3 + C2*TEMP2 + A( K, J+I+2 ) = C1*TEMP2 + S1*TEMP1 + TEMP1 = -S1*TEMP2 + C1*TEMP1 + A( K, J+I+1 ) = C*TEMP1 + S*TEMP + A( K, J+I ) = -S*TEMP1 + C*TEMP + END DO + END DO +* + IF( JJ.GT.0 ) THEN + DO I = JJ, 1, -1 + CALL SROT( IHI-TOP, A( TOP+1, J+I+1 ), 1, + $ A( TOP+1, J+I ), 1, A( J+1+I, J ), + $ -B( J+1+I, J ) ) + END DO + END IF +* +* Update (J+1)th column of A by transformations from left. +* + IF ( J .LT. JCOL + NNB - 1 ) THEN + LEN = 1 + J - JCOL +* +* Multiply with the trailing accumulated orthogonal +* matrix, which takes the form +* +* [ U11 U12 ] +* U = [ ], +* [ U21 U22 ] +* +* where U21 is a LEN-by-LEN matrix and U12 is lower +* triangular. +* + JROW = IHI - NBLST + 1 + CALL SGEMV( 'Transpose', NBLST, LEN, ONE, WORK, + $ NBLST, A( JROW, J+1 ), 1, ZERO, + $ WORK( PW ), 1 ) + PPW = PW + LEN + DO I = JROW, JROW+NBLST-LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL STRMV( 'Lower', 'Transpose', 'Non-unit', + $ NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST, + $ WORK( PW+LEN ), 1 ) + CALL SGEMV( 'Transpose', LEN, NBLST-LEN, ONE, + $ WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST, + $ A( JROW+NBLST-LEN, J+1 ), 1, ONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+NBLST-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO +* +* Multiply with the other accumulated orthogonal +* matrices, which take the form +* +* [ U11 U12 0 ] +* [ ] +* U = [ U21 U22 0 ], +* [ ] +* [ 0 0 I ] +* +* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity +* matrix, U21 is a LEN-by-LEN upper triangular matrix +* and U12 is an NNB-by-NNB lower triangular matrix. +* + PPWO = 1 + NBLST*NBLST + J0 = JROW - NNB + DO JROW = J0, JCOL+1, -NNB + PPW = PW + LEN + DO I = JROW, JROW+NNB-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + PPW = PW + DO I = JROW+NNB, JROW+NNB+LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL STRMV( 'Upper', 'Transpose', 'Non-unit', LEN, + $ WORK( PPWO + NNB ), 2*NNB, WORK( PW ), + $ 1 ) + CALL STRMV( 'Lower', 'Transpose', 'Non-unit', NNB, + $ WORK( PPWO + 2*LEN*NNB ), + $ 2*NNB, WORK( PW + LEN ), 1 ) + CALL SGEMV( 'Transpose', NNB, LEN, ONE, + $ WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1, + $ ONE, WORK( PW ), 1 ) + CALL SGEMV( 'Transpose', LEN, NNB, ONE, + $ WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB, + $ A( JROW+NNB, J+1 ), 1, ONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+LEN+NNB-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO +* +* Apply accumulated orthogonal matrices to A. +* + COLA = N - JCOL - NNB + 1 + J = IHI - NBLST + 1 + CALL SGEMM( 'Transpose', 'No Transpose', NBLST, + $ COLA, NBLST, ONE, WORK, NBLST, + $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ), + $ NBLST ) + CALL SLACPY( 'All', NBLST, COLA, WORK( PW ), NBLST, + $ A( J, JCOL+NNB ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of +* +* [ U11 U12 ] +* U = [ ] +* [ U21 U22 ], +* +* where all blocks are NNB-by-NNB, U21 is upper +* triangular and U12 is lower triangular. +* + CALL SORM22( 'Left', 'Transpose', 2*NNB, COLA, NNB, + $ NNB, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL SGEMM( 'Transpose', 'No Transpose', 2*NNB, + $ COLA, 2*NNB, ONE, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, ZERO, WORK( PW ), + $ 2*NNB ) + CALL SLACPY( 'All', 2*NNB, COLA, WORK( PW ), 2*NNB, + $ A( J, JCOL+NNB ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* +* Apply accumulated orthogonal matrices to Q. +* + IF( WANTQ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL SGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, ONE, Q( TOPQ, J ), LDQ, + $ WORK, NBLST, ZERO, WORK( PW ), NH ) + CALL SLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL SORM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Q( TOPQ, J ), LDQ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL SGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, ONE, Q( TOPQ, J ), LDQ, + $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ), + $ NH ) + CALL SLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Accumulate right Givens rotations if required. +* + IF ( WANTZ .OR. TOP.GT.0 ) THEN +* +* Initialize small orthogonal factors that will hold the +* accumulated Givens rotations in workspace. +* + CALL SLASET( 'All', NBLST, NBLST, ZERO, ONE, WORK, + $ NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL SLASET( 'All', 2*NNB, 2*NNB, ZERO, ONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Accumulate Givens rotations into workspace array. +* + DO J = JCOL, JCOL+NNB-1 + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + C = A( I, J ) + A( I, J ) = ZERO + S = B( I, J ) + B( I, J ) = ZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + C = A( I, J ) + A( I, J ) = ZERO + S = B( I, J ) + B( I, J ) = ZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = C*TEMP - S*WORK( JJ ) + WORK( JJ ) = S*TEMP + C*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END DO + ELSE +* + CALL SLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO, + $ A( JCOL + 2, JCOL ), LDA ) + CALL SLASET( 'Lower', IHI - JCOL - 1, NNB, ZERO, ZERO, + $ B( JCOL + 2, JCOL ), LDB ) + END IF +* +* Apply accumulated orthogonal matrices to A and B. +* + IF ( TOP.GT.0 ) THEN + J = IHI - NBLST + 1 + CALL SGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, ONE, A( 1, J ), LDA, + $ WORK, NBLST, ZERO, WORK( PW ), TOP ) + CALL SLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL SORM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ A( 1, J ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL SGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, ONE, A( 1, J ), LDA, + $ WORK( PPWO ), 2*NNB, ZERO, + $ WORK( PW ), TOP ) + CALL SLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* + J = IHI - NBLST + 1 + CALL SGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, ONE, B( 1, J ), LDB, + $ WORK, NBLST, ZERO, WORK( PW ), TOP ) + CALL SLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL SORM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ B( 1, J ), LDB, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL SGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, ONE, B( 1, J ), LDB, + $ WORK( PPWO ), 2*NNB, ZERO, + $ WORK( PW ), TOP ) + CALL SLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Apply accumulated orthogonal matrices to Z. +* + IF( WANTZ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL SGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, ONE, Z( TOPQ, J ), LDZ, + $ WORK, NBLST, ZERO, WORK( PW ), NH ) + CALL SLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL SORM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Z( TOPQ, J ), LDZ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL SGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, ONE, Z( TOPQ, J ), LDZ, + $ WORK( PPWO ), 2*NNB, ZERO, WORK( PW ), + $ NH ) + CALL SLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO + END IF +* +* Use unblocked code to reduce the rest of the matrix +* Avoid re-initialization of modified Q and Z. +* + COMPQ2 = COMPQ + COMPZ2 = COMPZ + IF ( JCOL.NE.ILO ) THEN + IF ( WANTQ ) + $ COMPQ2 = 'V' + IF ( WANTZ ) + $ COMPZ2 = 'V' + END IF +* + IF ( JCOL.LT.IHI ) + $ CALL SGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, IERR ) + WORK( 1 ) = REAL( LWKOPT ) +* + RETURN +* +* End of SGGHD3 +* + END diff --git a/dspl/liblapack/SRC/sgghrd.f b/dspl/liblapack/SRC/sgghrd.f new file mode 100644 index 0000000..6580ab7 --- /dev/null +++ b/dspl/liblapack/SRC/sgghrd.f @@ -0,0 +1,361 @@ +*> \brief \b SGGHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGHRD reduces a pair of real matrices (A,B) to generalized upper +*> Hessenberg form using orthogonal transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the orthogonal matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**T*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**T*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**T*x. +*> +*> The orthogonal matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> +*> Q1 * A * Z1**T = (Q1*Q) * H * (Z1*Z)**T +*> +*> Q1 * B * Z1**T = (Q1*Q) * T * (Z1*Z)**T +*> +*> If Q1 is the orthogonal matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then SGGHRD reduces the original +*> problem to generalized Hessenberg form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> orthogonal matrix Z is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to SGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**T B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1, +*> typically from the QR factorization of B. +*> On exit, if COMPQ='I', the orthogonal matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1. +*> On exit, if COMPZ='I', the orthogonal matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg and B to triangular form by +*> an unblocked reduction, as described in _Matrix_Computations_, +*> by Golub and Van Loan (Johns Hopkins Press.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + REAL C, S, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + TEMP = A( JROW-1, JCOL ) + CALL SLARTG( TEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = ZERO + CALL SROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL SROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL SROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + TEMP = B( JROW, JROW ) + CALL SLARTG( TEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = ZERO + CALL SROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL SROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL SROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of SGGHRD +* + END diff --git a/dspl/liblapack/SRC/sgglse.f b/dspl/liblapack/SRC/sgglse.f new file mode 100644 index 0000000..20e319c --- /dev/null +++ b/dspl/liblapack/SRC/sgglse.f @@ -0,0 +1,354 @@ +*> \brief SGGLSE solves overdetermined or underdetermined systems for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGLSE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), C( * ), D( * ), +* $ WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGLSE solves the linear equality-constrained least squares (LSE) +*> problem: +*> +*> minimize || c - A*x ||_2 subject to B*x = d +*> +*> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +*> M-vector, and d is a given P-vector. It is assumed that +*> P <= N <= M+P, and +*> +*> rank(B) = P and rank( (A) ) = N. +*> ( (B) ) +*> +*> These conditions ensure that the LSE problem has a unique solution, +*> which is obtained using a generalized RQ factorization of the +*> matrices (B, A) given by +*> +*> B = (0 R)*Q, A = Z*T*Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. 0 <= P <= N <= M+P. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the upper triangle of the subarray B(1:P,N-P+1:N) +*> contains the P-by-P upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (M) +*> On entry, C contains the right hand side vector for the +*> least squares part of the LSE problem. +*> On exit, the residual sum of squares for the solution +*> is given by the sum of squares of elements N-P+1 to M of +*> vector C. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (P) +*> On entry, D contains the right hand side vector for the +*> constrained equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On exit, X is the solution of the LSE problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M+N+P). +*> For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> SGEQRF, SGERQF, SORMQR and SORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with B in the +*> generalized RQ factorization of the pair (B, A) is +*> singular, so that rank(B) < P; the least squares +*> solution could not be computed. +*> = 2: the (N-P) by (N-P) part of the upper trapezoidal factor +*> T associated with A in the generalized RQ factorization +*> of the pair (B, A) is singular, so that +*> rank( (A) ) < N; the least squares solution could not +*> ( (B) ) +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERsolve +* +* ===================================================================== + SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKMIN, LWKOPT, MN, NB, NB1, NB2, NB3, + $ NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGGRQF, SORMQR, SORMRQ, + $ STRMV, STRTRS, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = P + MN + MAX( M, N )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q**T = ( 0 T12 ) P Z**T*A*Q**T = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* orthogonal. +* + CALL SGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z**T *c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL SORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ), + $ C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + IF( P.GT.0 ) THEN + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', P, 1, + $ B( 1, N-P+1 ), LDB, D, P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* +* Put the solution in X +* + CALL SCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Update c1 +* + CALL SGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, + $ D, 1, ONE, C, 1 ) + END IF +* +* Solve R11*x1 = c1 for x1 +* + IF( N.GT.P ) THEN + CALL STRTRS( 'Upper', 'No transpose', 'Non-unit', N-P, 1, + $ A, LDA, C, N-P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Put the solutions in X +* + CALL SCOPY( N-P, C, 1, X, 1 ) + END IF +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + IF( NR.GT.0 ) + $ CALL SGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + IF( NR.GT.0 ) THEN + CALL STRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL SAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 ) + END IF +* +* Backward transformation x = Q**T*x +* + CALL SORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X, + $ N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of SGGLSE +* + END diff --git a/dspl/liblapack/SRC/sggqrf.f b/dspl/liblapack/SRC/sggqrf.f new file mode 100644 index 0000000..bce1d5b --- /dev/null +++ b/dspl/liblapack/SRC/sggqrf.f @@ -0,0 +1,299 @@ +*> \brief \b SGGQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGQRF computes a generalized QR factorization of an N-by-M matrix A +*> and an N-by-P matrix B: +*> +*> A = Q*R, B = Q*T*Z, +*> +*> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +*> matrix, and R and T assume one of the forms: +*> +*> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +*> ( 0 ) N-M N M-N +*> M +*> +*> where R11 is upper triangular, and +*> +*> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +*> P-N N ( T21 ) P +*> P +*> +*> where T12 or T21 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GQR factorization +*> of A and B implicitly gives the QR factorization of inv(B)*A: +*> +*> inv(B)*A = Z**T*(inv(T)*R) +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the +*> transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(N,M)-by-M upper trapezoidal matrix R (R is +*> upper triangular if N >= M); the elements below the diagonal, +*> with the array TAUA, represent the orthogonal matrix Q as a +*> product of min(N,M) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is REAL array, dimension (min(N,M)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)-th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T; the remaining +*> elements, with the array TAUB, represent the orthogonal +*> matrix Z as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is REAL array, dimension (min(N,P)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the QR factorization +*> of an N-by-M matrix, NB2 is the optimal blocksize for the +*> RQ factorization of an N-by-P matrix, and NB3 is the optimal +*> blocksize for a call of SORMQR. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(n,m). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**T +*> +*> where taua is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine SORGQR. +*> To use Q to update another matrix, use LAPACK subroutine SORMQR. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(n,p). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**T +*> +*> where taub is a real scalar, and v is a real vector with +*> v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +*> B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine SORGRQ. +*> To use Z to update another matrix, use LAPACK subroutine SORMRQ. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGERQF, SORMQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'SGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL SGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q**T*B. +* + CALL SORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA, + $ B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL SGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of SGGQRF +* + END diff --git a/dspl/liblapack/SRC/sggrqf.f b/dspl/liblapack/SRC/sggrqf.f new file mode 100644 index 0000000..82a57b1 --- /dev/null +++ b/dspl/liblapack/SRC/sggrqf.f @@ -0,0 +1,299 @@ +*> \brief \b SGGRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGRQF computes a generalized RQ factorization of an M-by-N matrix A +*> and a P-by-N matrix B: +*> +*> A = R*Q, B = Z*T*Q, +*> +*> where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal +*> matrix, and R and T assume one of the forms: +*> +*> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +*> N-M M ( R21 ) N +*> N +*> +*> where R12 or R21 is upper triangular, and +*> +*> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +*> ( 0 ) P-N P N-P +*> N +*> +*> where T11 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GRQ factorization +*> of A and B implicitly gives the RQ factorization of A*inv(B): +*> +*> A*inv(B) = (R*inv(T))*Z**T +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**T denotes the +*> transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, if M <= N, the upper triangle of the subarray +*> A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +*> if M > N, the elements on and above the (M-N)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; the remaining +*> elements, with the array TAUA, represent the orthogonal +*> matrix Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(P,N)-by-N upper trapezoidal matrix T (T is +*> upper triangular if P >= N); the elements below the diagonal, +*> with the array TAUB, represent the orthogonal matrix Z as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is REAL array, dimension (min(P,N)) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the RQ factorization +*> of an M-by-N matrix, NB2 is the optimal blocksize for the +*> QR factorization of a P-by-N matrix, and NB3 is the optimal +*> blocksize for a call of SORMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INF0= -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**T +*> +*> where taua is a real scalar, and v is a real vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine SORGRQ. +*> To use Q to update another matrix, use LAPACK subroutine SORMRQ. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(p,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**T +*> +*> where taub is a real scalar, and v is a real vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +*> and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine SORGQR. +*> To use Z to update another matrix, use LAPACK subroutine SORMQR. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL SGEQRF, SGERQF, SORMRQ, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'SGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P)*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL SGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q**T +* + CALL SORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL SGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of SGGRQF +* + END diff --git a/dspl/liblapack/SRC/sggsvd3.f b/dspl/liblapack/SRC/sggsvd3.f new file mode 100644 index 0000000..4f41fc3 --- /dev/null +++ b/dspl/liblapack/SRC/sggsvd3.f @@ -0,0 +1,503 @@ +*> \brief SGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGSVD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGSVD3 computes the generalized singular value decomposition (GSVD) +*> of an M-by-N real matrix A and P-by-N real matrix B: +*> +*> U**T*A*Q = D1*( 0 R ), V**T*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are orthogonal matrices. +*> Let K+L = the effective numerical rank of the matrix (A**T,B**T)**T, +*> then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and +*> D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the +*> following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the orthogonal +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**T. +*> If ( A**T,B**T)**T has orthonormal columns, then the GSVD of A and B is +*> also equal to the CS decomposition of A and B. Furthermore, the GSVD +*> can be used to derive the solution of the eigenvalue problem: +*> A**T*A x = lambda* B**T*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**T*A*X = ( 0 D1 ), V**T*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix R if M-K-L < 0. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine STGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA REAL +*> TOLB REAL +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**T,B**T)**T. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup realGEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* +*> \par Further Details: +* ===================== +*> +*> SGGSVD3 replaces the deprecated subroutine SGGSVD. +*> +* ===================================================================== + SUBROUTINE SGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV, LQUERY + INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT + REAL ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGE + EXTERNAL LSAME, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGGSVP3, STGSJA, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK, -1, INFO ) + LWKOPT = N + INT( WORK( 1 ) ) + LWKOPT = MAX( 2*N, LWKOPT ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = REAL( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVD3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = SLANGE( '1', M, N, A, LDA, WORK ) + BNORM = SLANGE( '1', P, N, B, LDB, WORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = SLAMCH( 'Precision' ) + UNFL = SLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* +* Preprocessing +* + CALL SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK, + $ WORK( N+1 ), LWORK-N, INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to WORK, then sort ALPHA in WORK +* + CALL SCOPY( N, ALPHA, 1, WORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = WORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = WORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + WORK( K+ISUB ) = WORK( K+I ) + WORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + WORK( 1 ) = REAL( LWKOPT ) + RETURN +* +* End of SGGSVD3 +* + END diff --git a/dspl/liblapack/SRC/sggsvp3.f b/dspl/liblapack/SRC/sggsvp3.f new file mode 100644 index 0000000..ec7229a --- /dev/null +++ b/dspl/liblapack/SRC/sggsvp3.f @@ -0,0 +1,571 @@ +*> \brief \b SGGSVP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGGSVP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGGSVP3 computes orthogonal matrices U, V and Q such that +*> +*> N-K-L K L +*> U**T*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**T*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**T,B**T)**T. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> SGGSVD3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Orthogonal matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Orthogonal matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Orthogonal matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**T,B**T)**T. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,M) +*> If JOBU = 'U', U contains the orthogonal matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,P) +*> If JOBV = 'V', V contains the orthogonal matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the orthogonal matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The subroutine uses LAPACK subroutine SGEQP3 for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +*> SGGSVP3 replaces the deprecated subroutine SGGSVP. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV, LQUERY + INTEGER I, J, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEQP3, SGEQR2, SGERQ2, SLACPY, SLAPMT, + $ SLASET, SORG2R, SORM2R, SORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL SGEQP3( P, N, B, LDB, IWORK, TAU, WORK, -1, INFO ) + LWKOPT = INT( WORK ( 1 ) ) + IF( WANTV ) THEN + LWKOPT = MAX( LWKOPT, P ) + END IF + LWKOPT = MAX( LWKOPT, MIN( N, P ) ) + LWKOPT = MAX( LWKOPT, M ) + IF( WANTQ ) THEN + LWKOPT = MAX( LWKOPT, N ) + END IF + CALL SGEQP3( M, N, A, LDA, IWORK, TAU, WORK, -1, INFO ) + LWKOPT = MAX( LWKOPT, INT( WORK ( 1 ) ) ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = REAL( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGGSVP3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL SGEQP3( P, N, B, LDB, IWORK, TAU, WORK, LWORK, INFO ) +* +* Update A := A*P +* + CALL SLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL SLASET( 'Full', P, P, ZERO, ZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL SLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL SORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL SLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + CALL SLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z +* + CALL SGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**T +* + CALL SORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A, + $ LDA, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q := Q*Z**T +* + CALL SORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q, + $ LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL SLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**T +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL SGEQP3( M, N-L, A, LDA, IWORK, TAU, WORK, LWORK, INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**T*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL SORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA, + $ TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL SLASET( 'Full', M, M, ZERO, ZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL SLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL SORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL SLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = ZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL SLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL SGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**T +* + CALL SORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU, + $ Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL SLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = ZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL SGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL SORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = ZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + WORK( 1 ) = REAL( LWKOPT ) + RETURN +* +* End of SGGSVP3 +* + END diff --git a/dspl/liblapack/SRC/sgsvj0.f b/dspl/liblapack/SRC/sgsvj0.f new file mode 100644 index 0000000..e580efc --- /dev/null +++ b/dspl/liblapack/SRC/sgsvj0.f @@ -0,0 +1,1079 @@ +*> \brief \b SGSVJ0 pre-processor for the routine sgesvj. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGSVJ0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, +* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP +* REAL EPS, SFMIN, TOL +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGSVJ0 is called from SGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but +*> it does not check convergence (stopping criterion). Few tuning +*> parameters (marked by [TP]) are available for the implementer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is REAL +*> EPS = SLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is REAL +*> SFMIN = SLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> SGSVJ0 is used just to enable SGESVJ to call a simplified version of +*> itself to work on a submatrix of the original matrix. +*> +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +*> +*> \par Bugs, Examples and Comments: +* ================================= +*> +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +* +* ===================================================================== + SUBROUTINE SGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, + $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP + REAL EPS, SFMIN, TOL + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + REAL A( LDA, * ), SVA( N ), D( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) +* .. +* .. Local Scalars .. + REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, ROOTEPS, + $ ROOTSFMIN, ROOTTOL, SMALL, SN, T, TEMP1, THETA, + $ THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, NBL, + $ NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. Local Arrays .. + REAL FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, FLOAT, MIN, SIGN, SQRT +* .. +* .. External Functions .. + REAL SDOT, SNRM2 + INTEGER ISAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, LSAME, SDOT, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( LDA.LT.M ) THEN + INFO = -5 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -8 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -10 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -13 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -14 + ELSE IF( LWORK.LT.M ) THEN + INFO = -16 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGSVJ0', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Row-cyclic Jacobi SVD algorithm with column pivoting .. +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if SGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm SGESVJ. For sweeps i=1:SWBAND the procedure +* ...... + + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 + + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. + SWBAND = 0 + PSKIPPED = 0 +* + DO 1993 i = 1, NSWEEP +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* + DO 2000 ibr = 1, NBL + + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) + +* .. de Rijk's pivoting + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Some BLAS implementations compute SNRM2(M,A(1,p),1) +* as SQRT(SDOT(M,A(1,p),1,A(1,p),1)), which may result in +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and +* undeflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, SNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented SNRM2 is available, the IF-THEN-ELSE +* below should read "AAPP = SNRM2( M, A(1,p), 1 ) * D(p)". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = SNRM2( M, A( 1, p ), 1 )*D( p ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP )*D( p ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF + +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) + + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ ).GT.TOL ) THEN +* +* .. rotate +* ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL SCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL SAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop + + SVA( p ) = AAPP + + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +*........................................................ +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* .. Safe Gram matrix computation .. +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF +* + MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +* ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) +* + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN +* + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF +* + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL SCOPY( M, A( 1, p ), 1, WORK, + $ 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL SAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL SCOPY( M, A( 1, q ), 1, WORK, + $ 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( q ) / D( p ) + CALL SAXPY( M, TEMP1, WORK, 1, + $ A( 1, p ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 + END IF + + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +* + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SNRM2( M, A( 1, N ), 1 )*D( N ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP )*D( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.FLOAT( N )*TOL ) .AND. + $ ( FLOAT( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 + + 1993 CONTINUE +* end i=1:NSWEEP loop +* #:) Reaching this point means that the procedure has comleted the given +* number of iterations. + INFO = NSWEEP - 1 + GO TO 1995 + 1994 CONTINUE +* #:) Reaching this point means that during the i-th sweep all pivots were +* below the given tolerance, causing early exit. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector D. + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF SGSVJ0 +* .. + END diff --git a/dspl/liblapack/SRC/sgsvj1.f b/dspl/liblapack/SRC/sgsvj1.f new file mode 100644 index 0000000..49b81cf --- /dev/null +++ b/dspl/liblapack/SRC/sgsvj1.f @@ -0,0 +1,784 @@ +*> \brief \b SGSVJ1 pre-processor for the routine sgesvj, applies Jacobi rotations targeting only particular pivots. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGSVJ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, +* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* REAL EPS, SFMIN, TOL +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), +* $ WORK( LWORK ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGSVJ1 is called from SGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as SGESVJ does, but +*> it targets only particular pivots and it does not check convergence +*> (stopping criterion). Few tunning parameters (marked by [TP]) are +*> available for the implementer. +*> +*> Further Details +*> ~~~~~~~~~~~~~~~ +*> SGSVJ1 applies few sweeps of Jacobi rotations in the column space of +*> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) +*> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The +*> block-entries (tiles) of the (1,2) off-diagonal block are marked by the +*> [x]'s in the following scheme: +*> +*> | * * * [x] [x] [x]| +*> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +*> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> +*> In terms of the columns of A, the first N1 columns are rotated 'against' +*> the remaining N-N1 columns, trying to increase the angle between the +*> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is +*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. +*> The number of sweeps is given in NSWEEP and the orthogonality threshold +*> is given in TOL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> N1 specifies the 2 x 2 block partition, the first N1 columns are +*> rotated 'against' the remaining N-N1 columns of A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is REAL array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is REAL +*> EPS = SLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is REAL +*> SFMIN = SLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany) +* +* ===================================================================== + SUBROUTINE SGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, + $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + REAL EPS, SFMIN, TOL + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( N ), SVA( N ), V( LDV, * ), + $ WORK( LWORK ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0) +* .. +* .. Local Scalars .. + REAL AAPP, AAPP0, AAPQ, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, LARGE, MXAAPQ, MXSINJ, ROOTBIG, + $ ROOTEPS, ROOTSFMIN, ROOTTOL, SMALL, SN, T, + $ TEMP1, THETA, THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, igl, IERR, IJBLSK, + $ ISWROT, jbc, jgl, KBL, MVL, NOTROT, nblc, nblr, + $ p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. Local Arrays .. + REAL FASTR( 5 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, FLOAT, MIN, SIGN, SQRT +* .. +* .. External Functions .. + REAL SDOT, SNRM2 + INTEGER ISAMAX + LOGICAL LSAME + EXTERNAL ISAMAX, LSAME, SDOT, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLASCL, SLASSQ, SROTM, SSWAP, + $ XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( N1.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.M ) THEN + INFO = -6 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -9 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -14 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -15 + ELSE IF( LWORK.LT.M ) THEN + INFO = -17 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGSVJ1', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + LARGE = BIG / SQRT( FLOAT( M*N ) ) + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Initialize the right singular vector matrix .. +* +* RSVEC = LSAME( JOBV, 'Y' ) +* + EMPTSW = N1*( N-N1 ) + NOTROT = 0 + FASTR( 1 ) = ZERO +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + KBL = MIN( 8, N ) + NBLR = N1 / KBL + IF( ( NBLR*KBL ).NE.N1 )NBLR = NBLR + 1 + +* .. the tiling is nblr-by-nblc [tiles] + + NBLC = ( N-N1 ) / KBL + IF( ( NBLC*KBL ).NE.( N-N1 ) )NBLC = NBLC + 1 + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if SGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm SGESVJ. +* +* +* | * * * [x] [x] [x]| +* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* +* + DO 1993 i = 1, NSWEEP +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* + DO 2000 ibr = 1, NBLR + + igl = ( ibr-1 )*KBL + 1 +* +* +*........................................................ +* ... go to the off diagonal blocks + + igl = ( ibr-1 )*KBL + 1 + + DO 2010 jbc = 1, NBLC + + jgl = N1 + ( jbc-1 )*KBL + 1 + +* doing the block at ( ibr, jbc ) + + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N1 ) + + AAPP = SVA( p ) + + IF( AAPP.GT.ZERO ) THEN + + PSKIPPED = 0 + + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* .. Safe Gram matrix computation .. +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, p ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, D( p ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, q ), + $ 1 )*D( q ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( SDOT( M, A( 1, p ), 1, A( 1, + $ q ), 1 )*D( p )*D( q ) / AAQQ ) + $ / AAPP + ELSE + CALL SCOPY( M, A( 1, q ), 1, WORK, 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, D( q ), + $ M, 1, WORK, LDA, IERR ) + AAPQ = SDOT( M, WORK, 1, A( 1, p ), + $ 1 )*D( p ) / AAPP + END IF + END IF + + MXAAPQ = MAX( MXAAPQ, ABS( AAPQ ) ) + +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ ).GT.TOL ) THEN + NOTROT = 0 +* ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ ) / AAPQ + IF( AAQQ.GT.AAPP0 )THETA = -THETA + + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + FASTR( 3 ) = T*D( p ) / D( q ) + FASTR( 4 ) = -T*D( q ) / D( p ) + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, + $ V( 1, q ), 1, + $ FASTR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ ) ) + + APOAQ = D( p ) / D( q ) + AQOAP = D( q ) / D( p ) + IF( D( p ).GE.ONE ) THEN +* + IF( D( q ).GE.ONE ) THEN + FASTR( 3 ) = T*APOAQ + FASTR( 4 ) = -T*AQOAP + D( p ) = D( p )*CS + D( q ) = D( q )*CS + CALL SROTM( M, A( 1, p ), 1, + $ A( 1, q ), 1, + $ FASTR ) + IF( RSVEC )CALL SROTM( MVL, + $ V( 1, p ), 1, V( 1, q ), + $ 1, FASTR ) + ELSE + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + END IF + ELSE + IF( D( q ).GE.ONE ) THEN + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + IF( RSVEC ) THEN + CALL SAXPY( MVL, T*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + ELSE + IF( D( p ).GE.D( q ) ) THEN + CALL SAXPY( M, -T*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + CALL SAXPY( M, CS*SN*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + D( p ) = D( p )*CS + D( q ) = D( q ) / CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ -T*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + CALL SAXPY( MVL, + $ CS*SN*APOAQ, + $ V( 1, p ), 1, + $ V( 1, q ), 1 ) + END IF + ELSE + CALL SAXPY( M, T*APOAQ, + $ A( 1, p ), 1, + $ A( 1, q ), 1 ) + CALL SAXPY( M, + $ -CS*SN*AQOAP, + $ A( 1, q ), 1, + $ A( 1, p ), 1 ) + D( p ) = D( p ) / CS + D( q ) = D( q )*CS + IF( RSVEC ) THEN + CALL SAXPY( MVL, + $ T*APOAQ, V( 1, p ), + $ 1, V( 1, q ), 1 ) + CALL SAXPY( MVL, + $ -CS*SN*AQOAP, + $ V( 1, q ), 1, + $ V( 1, p ), 1 ) + END IF + END IF + END IF + END IF + END IF + + ELSE + IF( AAPP.GT.AAQQ ) THEN + CALL SCOPY( M, A( 1, p ), 1, WORK, + $ 1 ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( p ) / D( q ) + CALL SAXPY( M, TEMP1, WORK, 1, + $ A( 1, q ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL SCOPY( M, A( 1, q ), 1, WORK, + $ 1 ) + CALL SLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK, LDA, IERR ) + CALL SLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + TEMP1 = -AAPQ*D( q ) / D( p ) + CALL SAXPY( M, TEMP1, WORK, 1, + $ A( 1, p ), 1 ) + CALL SLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ*AAPQ ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q) +* .. recompute SVA(q) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = SNRM2( M, A( 1, q ), 1 )* + $ D( q ) + ELSE + T = ZERO + AAQQ = ONE + CALL SLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ )*D( q ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = SNRM2( M, A( 1, p ), 1 )* + $ D( p ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP )*D( p ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +* SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + +* IF ( NOTROT .GE. EMPTSW ) GO TO 2011 + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF + +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE + + SVA( p ) = AAPP +* + ELSE + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +*** IF ( NOTROT .GE. EMPTSW ) GO TO 2011 + END IF + + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** IF ( NOTROT .GE. EMPTSW ) GO TO 1994 + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = SNRM2( M, A( 1, N ), 1 )*D( N ) + ELSE + T = ZERO + AAPP = ONE + CALL SLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP )*D( N ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i + + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.FLOAT( N )*TOL ) .AND. + $ ( FLOAT( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF + +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 + + 1993 CONTINUE +* end i=1:NSWEEP loop +* #:) Reaching this point means that the procedure has completed the given +* number of sweeps. + INFO = NSWEEP - 1 + GO TO 1995 + 1994 CONTINUE +* #:) Reaching this point means that during the i-th sweep all pivots were +* below the given threshold, causing early exit. + + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector D +* + DO 5991 p = 1, N - 1 + q = ISAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + TEMP1 = D( p ) + D( p ) = D( q ) + D( q ) = TEMP1 + CALL SSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL SSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF SGSVJ1 +* .. + END diff --git a/dspl/liblapack/SRC/sgtcon.f b/dspl/liblapack/SRC/sgtcon.f new file mode 100644 index 0000000..e053265 --- /dev/null +++ b/dspl/liblapack/SRC/sgtcon.f @@ -0,0 +1,255 @@ +*> \brief \b SGTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTCON estimates the reciprocal of the condition number of a real +*> tridiagonal matrix A using the LU factorization as computed by +*> SGTTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by SGTTRF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTcomputational +* +* ===================================================================== + SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGTTRS, SLACN2, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL SGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L**T)*inv(U**T). +* + CALL SGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SGTCON +* + END diff --git a/dspl/liblapack/SRC/sgtrfs.f b/dspl/liblapack/SRC/sgtrfs.f new file mode 100644 index 0000000..8d60da8 --- /dev/null +++ b/dspl/liblapack/SRC/sgtrfs.f @@ -0,0 +1,474 @@ +*> \brief \b SGTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is tridiagonal, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] DLF +*> \verbatim +*> DLF is REAL array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by SGTTRF. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DUF +*> \verbatim +*> DUF is REAL array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTcomputational +* +* ===================================================================== + SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGTTRS, SLACN2, SLAGTM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'T' + ELSE + TRANSN = 'T' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK( N+1 ), N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DU( 1 )*X( 2, J ) ) + DO 30 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DL( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DU( I )*X( I+1, J ) ) + 30 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DL( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + ELSE + WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) + + $ ABS( DL( 1 )*X( 2, J ) ) + DO 40 I = 2, N - 1 + WORK( I ) = ABS( B( I, J ) ) + + $ ABS( DU( I-1 )*X( I-1, J ) ) + + $ ABS( D( I )*X( I, J ) ) + + $ ABS( DL( I )*X( I+1, J ) ) + 40 CONTINUE + WORK( N ) = ABS( B( N, J ) ) + + $ ABS( DU( N-1 )*X( N-1, J ) ) + + $ ABS( D( N )*X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL SGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + DO 80 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 90 CONTINUE + CALL SGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, + $ WORK( N+1 ), N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of SGTRFS +* + END diff --git a/dspl/liblapack/SRC/sgtsv.f b/dspl/liblapack/SRC/sgtsv.f new file mode 100644 index 0000000..d18f9e5 --- /dev/null +++ b/dspl/liblapack/SRC/sgtsv.f @@ -0,0 +1,333 @@ +*> \brief SGTSV computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTSV solves the equation +*> +*> A*X = B, +*> +*> where A is an n by n tridiagonal matrix, by Gaussian elimination with +*> partial pivoting. +*> +*> Note that the equation A**T*X = B may be solved by interchanging the +*> order of the arguments DU and DL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-2) elements of the +*> second super-diagonal of the upper triangular matrix U from +*> the LU factorization of A, in DL(1), ..., DL(n-2). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of U. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N by NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero, and the solution +*> has not been computed. The factorization has not been +*> completed unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTsolve +* +* ===================================================================== + SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + IF( NRHS.EQ.1 ) THEN + DO 10 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + 10 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 ) + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + TEMP = B( I, 1 ) + B( I, 1 ) = B( I+1, 1 ) + B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 ) + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + ELSE + DO 40 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 20 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 20 CONTINUE + ELSE + INFO = I + RETURN + END IF + DL( I ) = ZERO + ELSE +* +* Interchange rows I and I+1 +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DL( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DL( I ) + DU( I ) = TEMP + DO 30 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 30 CONTINUE + END IF + 40 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + DO 50 J = 1, NRHS + B( I+1, J ) = B( I+1, J ) - FACT*B( I, J ) + 50 CONTINUE + ELSE + INFO = I + RETURN + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + TEMP = D( I+1 ) + D( I+1 ) = DU( I ) - FACT*TEMP + DU( I ) = TEMP + DO 60 J = 1, NRHS + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - FACT*B( I+1, J ) + 60 CONTINUE + END IF + END IF + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF + END IF +* +* Back solve with the matrix U from the factorization. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 70 CONTINUE + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 80 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 80 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 100 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 90 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )* + $ B( I+2, J ) ) / D( I ) + 90 CONTINUE + 100 CONTINUE + END IF +* + RETURN +* +* End of SGTSV +* + END diff --git a/dspl/liblapack/SRC/sgtsvx.f b/dspl/liblapack/SRC/sgtsvx.f new file mode 100644 index 0000000..4f00934 --- /dev/null +++ b/dspl/liblapack/SRC/sgtsvx.f @@ -0,0 +1,414 @@ +*> \brief SGTSVX computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, +* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTSVX uses the LU factorization to compute the solution to a real +*> system of linear equations A * X = B or A**T * X = B, +*> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +*> as A = L * U, where L is a product of permutation and unit lower +*> bidiagonal matrices and U is upper triangular with nonzeros in +*> only the main diagonal and first two superdiagonals. +*> +*> 2. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored +*> form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV +*> will not be modified. +*> = 'N': The matrix will be copied to DLF, DF, and DUF +*> and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in,out] DLF +*> \verbatim +*> DLF is REAL array, dimension (N-1) +*> If FACT = 'F', then DLF is an input argument and on entry +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A as computed by SGTTRF. +*> +*> If FACT = 'N', then DLF is an output argument and on exit +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DUF +*> \verbatim +*> DUF is REAL array, dimension (N-1) +*> If FACT = 'F', then DUF is an input argument and on entry +*> contains the (n-1) elements of the first superdiagonal of U. +*> +*> If FACT = 'N', then DUF is an output argument and on exit +*> contains the (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> If FACT = 'F', then DU2 is an input argument and on entry +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> +*> If FACT = 'N', then DU2 is an output argument and on exit +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the LU factorization of A as +*> computed by SGTTRF. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the LU factorization of A; +*> row i of the matrix was interchanged with row IPIV(i). +*> IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +*> a row interchange was not required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has not been completed unless i = N, but the +*> factor U is exactly singular, so the solution +*> and error bounds could not be computed. +*> RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTsolve +* +* ===================================================================== + SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANGT + EXTERNAL LSAME, SLAMCH, SLANGT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGTCON, SGTRFS, SGTTRF, SGTTRS, SLACPY, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL SCOPY( N-1, DL, 1, DLF, 1 ) + CALL SCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL SGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = SLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SGTSVX +* + END diff --git a/dspl/liblapack/SRC/sgttrf.f b/dspl/liblapack/SRC/sgttrf.f new file mode 100644 index 0000000..f9846dd --- /dev/null +++ b/dspl/liblapack/SRC/sgttrf.f @@ -0,0 +1,237 @@ +*> \brief \b SGTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTTRF computes an LU factorization of a real tridiagonal matrix A +*> using elimination with partial pivoting and row interchanges. +*> +*> The factorization has the form +*> A = L * U +*> where L is a product of permutation and unit lower bidiagonal +*> matrices and U is upper triangular with nonzeros in only the main +*> diagonal and first two superdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-1) multipliers that +*> define the matrix L from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of the +*> upper triangular matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[out] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> On exit, DU2 is overwritten by the (n-2) elements of the +*> second super-diagonal of U. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTcomputational +* +* ===================================================================== + SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL FACT, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(I) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN + IF( D( I ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( D( I ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SGTTRF +* + END diff --git a/dspl/liblapack/SRC/sgttrs.f b/dspl/liblapack/SRC/sgttrs.f new file mode 100644 index 0000000..4f302da --- /dev/null +++ b/dspl/liblapack/SRC/sgttrs.f @@ -0,0 +1,223 @@ +*> \brief \b SGTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTTRS solves one of the systems of equations +*> A*X = B or A**T*X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by SGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTcomputational +* +* ===================================================================== + SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE + ITRANS = 1 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of SGTTRS +* + END diff --git a/dspl/liblapack/SRC/sgtts2.f b/dspl/liblapack/SRC/sgtts2.f new file mode 100644 index 0000000..0cefb29 --- /dev/null +++ b/dspl/liblapack/SRC/sgtts2.f @@ -0,0 +1,274 @@ +*> \brief \b SGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization computed by sgttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SGTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SGTTS2 solves one of the systems of equations +*> A*X = B or A**T*X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by SGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITRANS +*> \verbatim +*> ITRANS is INTEGER +*> Specifies the form of the system of equations. +*> = 0: A * X = B (No transpose) +*> = 1: A**T* X = B (Transpose) +*> = 2: A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is REAL array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGTcomputational +* +* ===================================================================== + SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IP, J + REAL TEMP +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IP = IPIV( I ) + TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J ) + B( I, J ) = B( IP, J ) + B( I+1, J ) = TEMP + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN +* +* Solve U**T*x = b. +* + J = 1 + 70 CONTINUE + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T*x = b. +* + DO 90 I = N - 1, 1, -1 + IP = IPIV( I ) + TEMP = B( I, J ) - DL( I )*B( I+1, J ) + B( I, J ) = B( IP, J ) + B( IP, J ) = TEMP + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF +* + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T*x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* +* End of SGTTS2 +* + END diff --git a/dspl/liblapack/SRC/shgeqz.f b/dspl/liblapack/SRC/shgeqz.f new file mode 100644 index 0000000..34f6a8c --- /dev/null +++ b/dspl/liblapack/SRC/shgeqz.f @@ -0,0 +1,1367 @@ +*> \brief \b SHGEQZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SHGEQZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, +* ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ, JOB +* INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* REAL ALPHAI( * ), ALPHAR( * ), BETA( * ), +* $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SHGEQZ computes the eigenvalues of a real matrix pair (H,T), +*> where H is an upper Hessenberg matrix and T is upper triangular, +*> using the double-shift QZ method. +*> Matrix pairs of this type are produced by the reduction to +*> generalized upper Hessenberg form of a real matrix pair (A,B): +*> +*> A = Q1*H*Z1**T, B = Q1*T*Z1**T, +*> +*> as computed by SGGHRD. +*> +*> If JOB='S', then the Hessenberg-triangular pair (H,T) is +*> also reduced to generalized Schur form, +*> +*> H = Q*S*Z**T, T = Q*P*Z**T, +*> +*> where Q and Z are orthogonal matrices, P is an upper triangular +*> matrix, and S is a quasi-triangular matrix with 1-by-1 and 2-by-2 +*> diagonal blocks. +*> +*> The 1-by-1 blocks correspond to real eigenvalues of the matrix pair +*> (H,T) and the 2-by-2 blocks correspond to complex conjugate pairs of +*> eigenvalues. +*> +*> Additionally, the 2-by-2 upper triangular diagonal blocks of P +*> corresponding to 2-by-2 blocks of S are reduced to positive diagonal +*> form, i.e., if S(j+1,j) is non-zero, then P(j+1,j) = P(j,j+1) = 0, +*> P(j,j) > 0, and P(j+1,j+1) > 0. +*> +*> Optionally, the orthogonal matrix Q from the generalized Schur +*> factorization may be postmultiplied into an input matrix Q1, and the +*> orthogonal matrix Z may be postmultiplied into an input matrix Z1. +*> If Q1 and Z1 are the orthogonal matrices from SGGHRD that reduced +*> the matrix pair (A,B) to generalized upper Hessenberg form, then the +*> output matrices Q1*Q and Z1*Z are the orthogonal factors from the +*> generalized Schur factorization of (A,B): +*> +*> A = (Q1*Q)*S*(Z1*Z)**T, B = (Q1*Q)*P*(Z1*Z)**T. +*> +*> To avoid overflow, eigenvalues of the matrix pair (H,T) (equivalently, +*> of (A,B)) are computed as a pair of values (alpha,beta), where alpha is +*> complex and beta real. +*> If beta is nonzero, lambda = alpha / beta is an eigenvalue of the +*> generalized nonsymmetric eigenvalue problem (GNEP) +*> A*x = lambda*B*x +*> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the +*> alternate form of the GNEP +*> mu*A*y = B*y. +*> Real eigenvalues can be read directly from the generalized Schur +*> form: +*> alpha = S(i,i), beta = P(i,i). +*> +*> Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +*> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +*> pp. 241--256. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': Compute eigenvalues only; +*> = 'S': Compute eigenvalues and the Schur form. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': Left Schur vectors (Q) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Q +*> of left Schur vectors of (H,T) is returned; +*> = 'V': Q must contain an orthogonal matrix Q1 on entry and +*> the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Right Schur vectors (Z) are not computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of right Schur vectors of (H,T) is returned; +*> = 'V': Z must contain an orthogonal matrix Z1 on entry and +*> the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices H, T, Q, and Z. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI mark the rows and columns of H which are in +*> Hessenberg form. It is assumed that A is already upper +*> triangular in rows and columns 1:ILO-1 and IHI+1:N. +*> If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH, N) +*> On entry, the N-by-N upper Hessenberg matrix H. +*> On exit, if JOB = 'S', H contains the upper quasi-triangular +*> matrix S from the generalized Schur factorization. +*> If JOB = 'E', the diagonal blocks of H match those of S, but +*> the rest of H is unspecified. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is REAL array, dimension (LDT, N) +*> On entry, the N-by-N upper triangular matrix T. +*> On exit, if JOB = 'S', T contains the upper triangular +*> matrix P from the generalized Schur factorization; +*> 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks of S +*> are reduced to positive diagonal form, i.e., if H(j+1,j) is +*> non-zero, then T(j+1,j) = T(j,j+1) = 0, T(j,j) > 0, and +*> T(j+1,j+1) > 0. +*> If JOB = 'E', the diagonal blocks of T match those of P, but +*> the rest of T is unspecified. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> The real parts of each scalar alpha defining an eigenvalue +*> of GNEP. +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> The imaginary parts of each scalar alpha defining an +*> eigenvalue of GNEP. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) = -ALPHAI(j). +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> The scalars beta that define the eigenvalues of GNEP. +*> Together, the quantities alpha = (ALPHAR(j),ALPHAI(j)) and +*> beta = BETA(j) represent the j-th eigenvalue of the matrix +*> pair (A,B), in one of the forms lambda = alpha/beta or +*> mu = beta/alpha. Since either lambda or mu may overflow, +*> they should not, in general, be computed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the orthogonal matrix Q1 used in +*> the reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPQ = 'I', the orthogonal matrix of left Schur +*> vectors of (H,T), and if COMPQ = 'V', the orthogonal matrix +*> of left Schur vectors of (A,B). +*> Not referenced if COMPQ = 'N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If COMPQ='V' or 'I', then LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix Z1 used in +*> the reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPZ = 'I', the orthogonal matrix of +*> right Schur vectors of (H,T), and if COMPZ = 'V', the +*> orthogonal matrix of right Schur vectors of (A,B). +*> Not referenced if COMPZ = 'N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If COMPZ='V' or 'I', then LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1,...,N: the QZ iteration did not converge. (H,T) is not +*> in Schur form, but ALPHAR(i), ALPHAI(i), and +*> BETA(i), i=INFO+1,...,N should be correct. +*> = N+1,...,2*N: the shift calculation failed. (H,T) is not +*> in Schur form, but ALPHAR(i), ALPHAI(i), and +*> BETA(i), i=INFO-N+1,...,N should be correct. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Iteration counters: +*> +*> JITER -- counts iterations. +*> IITER -- counts iterations run since ILAST was last +*> changed. This is therefore reset only when a 1-by-1 or +*> 2-by-2 block deflates off the bottom. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL ALPHAI( * ), ALPHAR( * ), BETA( * ), + $ H( LDH, * ), Q( LDQ, * ), T( LDT, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* $ SAFETY = 1.0E+0 ) + REAL HALF, ZERO, ONE, SAFETY + PARAMETER ( HALF = 0.5E+0, ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ, + $ LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + REAL A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11, + $ AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L, + $ AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I, + $ B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE, + $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, + $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, + $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, + $ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L, + $ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR, + $ WR2 +* .. +* .. Local Arrays .. + REAL V( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANHS, SLAPY2, SLAPY3 + EXTERNAL LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3 +* .. +* .. External Subroutines .. + EXTERNAL SLAG2, SLARFG, SLARTG, SLASET, SLASV2, SROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDH.LT.N ) THEN + INFO = -8 + ELSE IF( LDT.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -17 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = REAL( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + ULP = SLAMCH( 'E' )*SLAMCH( 'B' ) + ANORM = SLANHS( 'F', IN, H( ILO, ILO ), LDH, WORK ) + BNORM = SLANHS( 'F', IN, T( ILO, ILO ), LDT, WORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* Set Eigenvalues IHI+1:N +* + DO 30 J = IHI + 1, N + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 10 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 10 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 20 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 20 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 30 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 380 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever. +* Row operations modify columns whatever:ILASTM. +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = ZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 360 JITER = 1, MAXIT +* +* Split the matrix if possible. +* +* Two tests: +* 1: H(j,j-1)=0 or j=ILO +* 2: T(j,j)=0 +* + IF( ILAST.EQ.ILO ) THEN +* +* Special case: j=ILAST +* + GO TO 80 + ELSE + IF( ABS( H( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + H( ILAST, ILAST-1 ) = ZERO + GO TO 80 + END IF + END IF +* + IF( ABS( T( ILAST, ILAST ) ).LE.BTOL ) THEN + T( ILAST, ILAST ) = ZERO + GO TO 70 + END IF +* +* General case: j unfl ) +* __ +* (sA - wB) ( CZ -SZ ) +* ( SZ CZ ) +* + C11R = S1*A11 - WR*B11 + C11I = -WI*B11 + C12 = S1*A12 + C21 = S1*A21 + C22R = S1*A22 - WR*B22 + C22I = -WI*B22 +* + IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+ + $ ABS( C22R )+ABS( C22I ) ) THEN + T1 = SLAPY3( C12, C11R, C11I ) + CZ = C12 / T1 + SZR = -C11R / T1 + SZI = -C11I / T1 + ELSE + CZ = SLAPY2( C22R, C22I ) + IF( CZ.LE.SAFMIN ) THEN + CZ = ZERO + SZR = ONE + SZI = ZERO + ELSE + TEMPR = C22R / CZ + TEMPI = C22I / CZ + T1 = SLAPY2( CZ, C21 ) + CZ = CZ / T1 + SZR = -C21*TEMPR / T1 + SZI = C21*TEMPI / T1 + END IF + END IF +* +* Compute Givens rotation on left +* +* ( CQ SQ ) +* ( __ ) A or B +* ( -SQ CQ ) +* + AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 ) + BN = ABS( B11 ) + ABS( B22 ) + WABS = ABS( WR ) + ABS( WI ) + IF( S1*AN.GT.WABS*BN ) THEN + CQ = CZ*B11 + SQR = SZR*B22 + SQI = -SZI*B22 + ELSE + A1R = CZ*A11 + SZR*A12 + A1I = SZI*A12 + A2R = CZ*A21 + SZR*A22 + A2I = SZI*A22 + CQ = SLAPY2( A1R, A1I ) + IF( CQ.LE.SAFMIN ) THEN + CQ = ZERO + SQR = ONE + SQI = ZERO + ELSE + TEMPR = A1R / CQ + TEMPI = A1I / CQ + SQR = TEMPR*A2R + TEMPI*A2I + SQI = TEMPI*A2R - TEMPR*A2I + END IF + END IF + T1 = SLAPY3( CQ, SQR, SQI ) + CQ = CQ / T1 + SQR = SQR / T1 + SQI = SQI / T1 +* +* Compute diagonal elements of QBZ +* + TEMPR = SQR*SZR - SQI*SZI + TEMPI = SQR*SZI + SQI*SZR + B1R = CQ*CZ*B11 + TEMPR*B22 + B1I = TEMPI*B22 + B1A = SLAPY2( B1R, B1I ) + B2R = CQ*CZ*B22 + TEMPR*B11 + B2I = -TEMPI*B11 + B2A = SLAPY2( B2R, B2I ) +* +* Normalize so beta > 0, and Im( alpha1 ) > 0 +* + BETA( ILAST-1 ) = B1A + BETA( ILAST ) = B2A + ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV + ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV + ALPHAR( ILAST ) = ( WR*B2A )*S1INV + ALPHAI( ILAST ) = -( WI*B2A )*S1INV +* +* Step 3: Go to next block -- exit if finished. +* + ILAST = IFIRST - 1 + IF( ILAST.LT.ILO ) + $ GO TO 380 +* +* Reset counters +* + IITER = 0 + ESHIFT = ZERO + IF( .NOT.ILSCHR ) THEN + ILASTM = ILAST + IF( IFRSTM.GT.ILAST ) + $ IFRSTM = ILO + END IF + GO TO 350 + ELSE +* +* Usual case: 3x3 or larger block, using Francis implicit +* double-shift +* +* 2 +* Eigenvalue equation is w - c w + d = 0, +* +* -1 2 -1 +* so compute 1st column of (A B ) - c A B + d +* using the formula in QZIT (from EISPACK) +* +* We assume that the block is at least 3x3 +* + AD11 = ( ASCALE*H( ILAST-1, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD21 = ( ASCALE*H( ILAST, ILAST-1 ) ) / + $ ( BSCALE*T( ILAST-1, ILAST-1 ) ) + AD12 = ( ASCALE*H( ILAST-1, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + AD22 = ( ASCALE*H( ILAST, ILAST ) ) / + $ ( BSCALE*T( ILAST, ILAST ) ) + U12 = T( ILAST-1, ILAST ) / T( ILAST, ILAST ) + AD11L = ( ASCALE*H( IFIRST, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD21L = ( ASCALE*H( IFIRST+1, IFIRST ) ) / + $ ( BSCALE*T( IFIRST, IFIRST ) ) + AD12L = ( ASCALE*H( IFIRST, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD22L = ( ASCALE*H( IFIRST+1, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + AD32L = ( ASCALE*H( IFIRST+2, IFIRST+1 ) ) / + $ ( BSCALE*T( IFIRST+1, IFIRST+1 ) ) + U12L = T( IFIRST, IFIRST+1 ) / T( IFIRST+1, IFIRST+1 ) +* + V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 + + $ AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L + V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )- + $ ( AD22-AD11L )+AD21*U12 )*AD21L + V( 3 ) = AD32L*AD21L +* + ISTART = IFIRST +* + CALL SLARFG( 3, V( 1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE +* +* Sweep +* + DO 290 J = ISTART, ILAST - 2 +* +* All but last elements: use 3x3 Householder transforms. +* +* Zero (j-1)st column of A +* + IF( J.GT.ISTART ) THEN + V( 1 ) = H( J, J-1 ) + V( 2 ) = H( J+1, J-1 ) + V( 3 ) = H( J+2, J-1 ) +* + CALL SLARFG( 3, H( J, J-1 ), V( 2 ), 1, TAU ) + V( 1 ) = ONE + H( J+1, J-1 ) = ZERO + H( J+2, J-1 ) = ZERO + END IF +* + DO 230 JC = J, ILASTM + TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )* + $ H( J+2, JC ) ) + H( J, JC ) = H( J, JC ) - TEMP + H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 ) + H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 ) + TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )* + $ T( J+2, JC ) ) + T( J, JC ) = T( J, JC ) - TEMP2 + T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 ) + T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 ) + 230 CONTINUE + IF( ILQ ) THEN + DO 240 JR = 1, N + TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )* + $ Q( JR, J+2 ) ) + Q( JR, J ) = Q( JR, J ) - TEMP + Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 ) + Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 ) + 240 CONTINUE + END IF +* +* Zero j-th column of B (see SLAGBC for details) +* +* Swap rows to pivot +* + ILPIVT = .FALSE. + TEMP = MAX( ABS( T( J+1, J+1 ) ), ABS( T( J+1, J+2 ) ) ) + TEMP2 = MAX( ABS( T( J+2, J+1 ) ), ABS( T( J+2, J+2 ) ) ) + IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN + SCALE = ZERO + U1 = ONE + U2 = ZERO + GO TO 250 + ELSE IF( TEMP.GE.TEMP2 ) THEN + W11 = T( J+1, J+1 ) + W21 = T( J+2, J+1 ) + W12 = T( J+1, J+2 ) + W22 = T( J+2, J+2 ) + U1 = T( J+1, J ) + U2 = T( J+2, J ) + ELSE + W21 = T( J+1, J+1 ) + W11 = T( J+2, J+1 ) + W22 = T( J+1, J+2 ) + W12 = T( J+2, J+2 ) + U2 = T( J+1, J ) + U1 = T( J+2, J ) + END IF +* +* Swap columns if nec. +* + IF( ABS( W12 ).GT.ABS( W11 ) ) THEN + ILPIVT = .TRUE. + TEMP = W12 + TEMP2 = W22 + W12 = W11 + W22 = W21 + W11 = TEMP + W21 = TEMP2 + END IF +* +* LU-factor +* + TEMP = W21 / W11 + U2 = U2 - TEMP*U1 + W22 = W22 - TEMP*W12 + W21 = ZERO +* +* Compute SCALE +* + SCALE = ONE + IF( ABS( W22 ).LT.SAFMIN ) THEN + SCALE = ZERO + U2 = ONE + U1 = -W12 / W11 + GO TO 250 + END IF + IF( ABS( W22 ).LT.ABS( U2 ) ) + $ SCALE = ABS( W22 / U2 ) + IF( ABS( W11 ).LT.ABS( U1 ) ) + $ SCALE = MIN( SCALE, ABS( W11 / U1 ) ) +* +* Solve +* + U2 = ( SCALE*U2 ) / W22 + U1 = ( SCALE*U1-W12*U2 ) / W11 +* + 250 CONTINUE + IF( ILPIVT ) THEN + TEMP = U2 + U2 = U1 + U1 = TEMP + END IF +* +* Compute Householder Vector +* + T1 = SQRT( SCALE**2+U1**2+U2**2 ) + TAU = ONE + SCALE / T1 + VS = -ONE / ( SCALE+T1 ) + V( 1 ) = ONE + V( 2 ) = VS*U1 + V( 3 ) = VS*U2 +* +* Apply transformations from the right. +* + DO 260 JR = IFRSTM, MIN( J+3, ILAST ) + TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )* + $ H( JR, J+2 ) ) + H( JR, J ) = H( JR, J ) - TEMP + H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 ) + H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 ) + 260 CONTINUE + DO 270 JR = IFRSTM, J + 2 + TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )* + $ T( JR, J+2 ) ) + T( JR, J ) = T( JR, J ) - TEMP + T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 ) + T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 ) + 270 CONTINUE + IF( ILZ ) THEN + DO 280 JR = 1, N + TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )* + $ Z( JR, J+2 ) ) + Z( JR, J ) = Z( JR, J ) - TEMP + Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 ) + Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 ) + 280 CONTINUE + END IF + T( J+1, J ) = ZERO + T( J+2, J ) = ZERO + 290 CONTINUE +* +* Last elements: Use Givens rotations +* +* Rotations from the left +* + J = ILAST - 1 + TEMP = H( J, J-1 ) + CALL SLARTG( TEMP, H( J+1, J-1 ), C, S, H( J, J-1 ) ) + H( J+1, J-1 ) = ZERO +* + DO 300 JC = J, ILASTM + TEMP = C*H( J, JC ) + S*H( J+1, JC ) + H( J+1, JC ) = -S*H( J, JC ) + C*H( J+1, JC ) + H( J, JC ) = TEMP + TEMP2 = C*T( J, JC ) + S*T( J+1, JC ) + T( J+1, JC ) = -S*T( J, JC ) + C*T( J+1, JC ) + T( J, JC ) = TEMP2 + 300 CONTINUE + IF( ILQ ) THEN + DO 310 JR = 1, N + TEMP = C*Q( JR, J ) + S*Q( JR, J+1 ) + Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 ) + Q( JR, J ) = TEMP + 310 CONTINUE + END IF +* +* Rotations from the right. +* + TEMP = T( J+1, J+1 ) + CALL SLARTG( TEMP, T( J+1, J ), C, S, T( J+1, J+1 ) ) + T( J+1, J ) = ZERO +* + DO 320 JR = IFRSTM, ILAST + TEMP = C*H( JR, J+1 ) + S*H( JR, J ) + H( JR, J ) = -S*H( JR, J+1 ) + C*H( JR, J ) + H( JR, J+1 ) = TEMP + 320 CONTINUE + DO 330 JR = IFRSTM, ILAST - 1 + TEMP = C*T( JR, J+1 ) + S*T( JR, J ) + T( JR, J ) = -S*T( JR, J+1 ) + C*T( JR, J ) + T( JR, J+1 ) = TEMP + 330 CONTINUE + IF( ILZ ) THEN + DO 340 JR = 1, N + TEMP = C*Z( JR, J+1 ) + S*Z( JR, J ) + Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J ) + Z( JR, J+1 ) = TEMP + 340 CONTINUE + END IF +* +* End of Double-Shift code +* + END IF +* + GO TO 350 +* +* End of iteration loop +* + 350 CONTINUE + 360 CONTINUE +* +* Drop-through = non-convergence +* + INFO = ILAST + GO TO 420 +* +* Successful completion of all QZ steps +* + 380 CONTINUE +* +* Set Eigenvalues 1:ILO-1 +* + DO 410 J = 1, ILO - 1 + IF( T( J, J ).LT.ZERO ) THEN + IF( ILSCHR ) THEN + DO 390 JR = 1, J + H( JR, J ) = -H( JR, J ) + T( JR, J ) = -T( JR, J ) + 390 CONTINUE + ELSE + H( J, J ) = -H( J, J ) + T( J, J ) = -T( J, J ) + END IF + IF( ILZ ) THEN + DO 400 JR = 1, N + Z( JR, J ) = -Z( JR, J ) + 400 CONTINUE + END IF + END IF + ALPHAR( J ) = H( J, J ) + ALPHAI( J ) = ZERO + BETA( J ) = T( J, J ) + 410 CONTINUE +* +* Normal Termination +* + INFO = 0 +* +* Exit (other than argument error) -- return optimal workspace size +* + 420 CONTINUE + WORK( 1 ) = REAL( N ) + RETURN +* +* End of SHGEQZ +* + END diff --git a/dspl/liblapack/SRC/shsein.f b/dspl/liblapack/SRC/shsein.f new file mode 100644 index 0000000..53a3327 --- /dev/null +++ b/dspl/liblapack/SRC/shsein.f @@ -0,0 +1,530 @@ +*> \brief \b SHSEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SHSEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, +* VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, +* IFAILR, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EIGSRC, INITV, SIDE +* INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IFAILL( * ), IFAILR( * ) +* REAL H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WI( * ), WORK( * ), WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SHSEIN uses inverse iteration to find specified right and/or left +*> eigenvectors of a real upper Hessenberg matrix H. +*> +*> The right eigenvector x and the left eigenvector y of the matrix H +*> corresponding to an eigenvalue w are defined by: +*> +*> H * x = w * x, y**h * H = w * y**h +*> +*> where y**h denotes the conjugate transpose of the vector y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] EIGSRC +*> \verbatim +*> EIGSRC is CHARACTER*1 +*> Specifies the source of eigenvalues supplied in (WR,WI): +*> = 'Q': the eigenvalues were found using SHSEQR; thus, if +*> H has zero subdiagonal elements, and so is +*> block-triangular, then the j-th eigenvalue can be +*> assumed to be an eigenvalue of the block containing +*> the j-th row/column. This property allows SHSEIN to +*> perform inverse iteration on just one diagonal block. +*> = 'N': no assumptions are made on the correspondence +*> between eigenvalues and diagonal blocks. In this +*> case, SHSEIN must always perform inverse iteration +*> using the whole matrix H. +*> \endverbatim +*> +*> \param[in] INITV +*> \verbatim +*> INITV is CHARACTER*1 +*> = 'N': no initial vectors are supplied; +*> = 'U': user-supplied initial vectors are stored in the arrays +*> VL and/or VR. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> Specifies the eigenvectors to be computed. To select the +*> real eigenvector corresponding to a real eigenvalue WR(j), +*> SELECT(j) must be set to .TRUE.. To select the complex +*> eigenvector corresponding to a complex eigenvalue +*> (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)), +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is +*> .FALSE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> If a NaN is detected in H, the routine will return with INFO=-6. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> +*> On entry, the real and imaginary parts of the eigenvalues of +*> H; a complex conjugate pair of eigenvalues must be stored in +*> consecutive elements of WR and WI. +*> On exit, WR may have been altered since close eigenvalues +*> are perturbed slightly in searching for independent +*> eigenvectors. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,MM) +*> On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +*> contain starting vectors for the inverse iteration for the +*> left eigenvectors; the starting vector for each eigenvector +*> must be in the same column(s) in which the eigenvector will +*> be stored. +*> On exit, if SIDE = 'L' or 'B', the left eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VL, in the same order as their eigenvalues. A +*> complex eigenvector corresponding to a complex eigenvalue is +*> stored in two consecutive columns, the first holding the real +*> part and the second the imaginary part. +*> If SIDE = 'R', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,MM) +*> On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +*> contain starting vectors for the inverse iteration for the +*> right eigenvectors; the starting vector for each eigenvector +*> must be in the same column(s) in which the eigenvector will +*> be stored. +*> On exit, if SIDE = 'R' or 'B', the right eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VR, in the same order as their eigenvalues. A +*> complex eigenvector corresponding to a complex eigenvalue is +*> stored in two consecutive columns, the first holding the real +*> part and the second the imaginary part. +*> If SIDE = 'L', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR required to +*> store the eigenvectors; each selected real eigenvector +*> occupies one column and each selected complex eigenvector +*> occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ((N+2)*N) +*> \endverbatim +*> +*> \param[out] IFAILL +*> \verbatim +*> IFAILL is INTEGER array, dimension (MM) +*> If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +*> eigenvector in the i-th column of VL (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +*> eigenvector converged satisfactorily. If the i-th and (i+1)th +*> columns of VL hold a complex eigenvector, then IFAILL(i) and +*> IFAILL(i+1) are set to the same value. +*> If SIDE = 'R', IFAILL is not referenced. +*> \endverbatim +*> +*> \param[out] IFAILR +*> \verbatim +*> IFAILR is INTEGER array, dimension (MM) +*> If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +*> eigenvector in the i-th column of VR (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +*> eigenvector converged satisfactorily. If the i-th and (i+1)th +*> columns of VR hold a complex eigenvector, then IFAILR(i) and +*> IFAILR(i+1) are set to the same value. +*> If SIDE = 'L', IFAILR is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, i is the number of eigenvectors which +*> failed to converge; see IFAILL and IFAILR for further +*> details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x|+|y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI, + $ VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + REAL H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WI( * ), WORK( * ), WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK + REAL BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI, + $ WKR +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + REAL SLAMCH, SLANHS + EXTERNAL LSAME, SLAMCH, SLANHS, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SLAEIN, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors, and standardize the array SELECT. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( K ) = .FALSE. + ELSE + IF( WI( K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN + SELECT( K ) = .TRUE. + M = M + 2 + END IF + END IF + END IF + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( MM.LT.M ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = SLAMCH( 'Safe minimum' ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* + LDWORK = N + 1 +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KSR = 1 +* + DO 120 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = SLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK ) + IF( SISNAN( HNORM ) ) THEN + INFO = -6 + RETURN + ELSE IF( HNORM.GT.ZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WKR = WR( K ) + WKI = WI( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+ + $ ABS( WI( I )-WKI ).LT.EPS3 ) THEN + WKR = WKR + EPS3 + GO TO 60 + END IF + 70 CONTINUE + WR( K ) = WKR +* + PAIR = WKI.NE.ZERO + IF( PAIR ) THEN + KSI = KSR + 1 + ELSE + KSI = KSR + END IF + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL SLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WKR, WKI, VL( KL, KSR ), VL( KL, KSI ), + $ WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM, + $ BIGNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILL( KSR ) = K + IFAILL( KSI ) = K + ELSE + IFAILL( KSR ) = 0 + IFAILL( KSI ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KSR ) = ZERO + 80 CONTINUE + IF( PAIR ) THEN + DO 90 I = 1, KL - 1 + VL( I, KSI ) = ZERO + 90 CONTINUE + END IF + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL SLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI, + $ VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK, + $ WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM, + $ IINFO ) + IF( IINFO.GT.0 ) THEN + IF( PAIR ) THEN + INFO = INFO + 2 + ELSE + INFO = INFO + 1 + END IF + IFAILR( KSR ) = K + IFAILR( KSI ) = K + ELSE + IFAILR( KSR ) = 0 + IFAILR( KSI ) = 0 + END IF + DO 100 I = KR + 1, N + VR( I, KSR ) = ZERO + 100 CONTINUE + IF( PAIR ) THEN + DO 110 I = KR + 1, N + VR( I, KSI ) = ZERO + 110 CONTINUE + END IF + END IF +* + IF( PAIR ) THEN + KSR = KSR + 2 + ELSE + KSR = KSR + 1 + END IF + END IF + 120 CONTINUE +* + RETURN +* +* End of SHSEIN +* + END diff --git a/dspl/liblapack/SRC/shseqr.f b/dspl/liblapack/SRC/shseqr.f new file mode 100644 index 0000000..5654a46 --- /dev/null +++ b/dspl/liblapack/SRC/shseqr.f @@ -0,0 +1,516 @@ +*> \brief \b SHSEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SHSEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, +* LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SHSEQR computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': compute eigenvalues only; +*> = 'S': compute eigenvalues and the Schur form T. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': no Schur vectors are computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of Schur vectors of H is returned; +*> = 'V': Z must contain an orthogonal matrix Q on entry, and +*> the product Q*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to SGEBAL, and then passed to ZGEHRD +*> when the matrix output by SGEBAL is reduced to Hessenberg +*> form. Otherwise ILO and IHI should be set to 1 and N +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and JOB = 'S', then H contains the +*> upper quasi-triangular matrix T from the Schur decomposition +*> (the Schur form); 2-by-2 diagonal blocks (corresponding to +*> complex conjugate pairs of eigenvalues) are returned in +*> standard form, with H(i,i) = H(i+1,i+1) and +*> H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the +*> contents of H are unspecified on exit. (The output value of +*> H when INFO.GT.0 is given under the description of INFO +*> below.) +*> +*> Unlike earlier versions of SHSEQR, this subroutine may +*> explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +*> or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues. If two eigenvalues are computed as a complex +*> conjugate pair, they are stored in consecutive elements of +*> WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and +*> WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in +*> the same order as on the diagonal of the Schur form returned +*> in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 +*> diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> If COMPZ = 'N', Z is not referenced. +*> If COMPZ = 'I', on entry Z need not be set and on exit, +*> if INFO = 0, Z contains the orthogonal matrix Z of the Schur +*> vectors of H. If COMPZ = 'V', on entry Z must contain an +*> N-by-N matrix Q, which is assumed to be equal to the unit +*> matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +*> if INFO = 0, Z contains Q*Z. +*> Normally Q is the orthogonal matrix generated by SORGHR +*> after the call to SGEHRD which formed the Hessenberg matrix +*> H. (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if COMPZ = 'I' or +*> COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient and delivers very good and sometimes +*> optimal performance. However, LWORK as large as 11*N +*> may be required for optimal performance. A workspace +*> query is recommended to determine the optimal workspace +*> size. +*> +*> If LWORK = -1, then SHSEQR does a workspace query. +*> In this case, SHSEQR checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .LT. 0: if INFO = -i, the i-th argument had an illegal +*> value +*> .GT. 0: if INFO = i, SHSEQR failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and JOB = 'E', then on exit, the +*> remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and JOB = 'S', then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is an orthogonal matrix. The final +*> value of H is upper Hessenberg and quasi-triangular +*> in rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and COMPZ = 'V', then on exit +*> +*> (final value of Z) = (initial value of Z)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'I', then on exit +*> (final value of Z) = U +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'N', then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Default values supplied by +*> ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +*> It is suggested that these defaults be adjusted in order +*> to attain best performance in each particular +*> computational environment. +*> +*> ISPEC=12: The SLAHQR vs SLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> ISPEC=13: Recommended deflation window size. +*> This depends on ILO, IHI and NS. NS is the +*> number of simultaneous shifts returned +*> by ILAENV(ISPEC=15). (See ISPEC=15 below.) +*> The default for (IHI-ILO+1).LE.500 is NS. +*> The default for (IHI-ILO+1).GT.500 is 3*NS/2. +*> +*> ISPEC=14: Nibble crossover point. (See IPARMQ for +*> details.) Default: 14% of deflation window +*> size. +*> +*> ISPEC=15: Number of simultaneous shifts in a multishift +*> QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 1 30 NS = 2(+) +*> 30 60 NS = 4(+) +*> 60 150 NS = 10(+) +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default some or all matrices of this order +*> are passed to the implicit double shift routine +*> SLAHQR and this parameter is ignored. See +*> ISPEC=12 above and comments in IPARMQ for +*> details. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function of N increasing from 10 to 64. +*> +*> ISPEC=16: Select structured matrix multiply. +*> If the number of simultaneous shifts (specified +*> by ISPEC=15) is less than 14, then the default +*> for ISPEC=16 is 0. Otherwise the default for +*> ISPEC=16 is 2. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ===================================================================== + SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z, + $ LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare SLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== + INTEGER NL + PARAMETER ( NL = 49 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Arrays .. + REAL HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER I, KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLAQR0, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = REAL( MAX( 1, N ) ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'SHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = MAX( REAL( MAX( 1, N ) ), WORK( 1 ) ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by SGEBAL ==== +* + DO 10 I = 1, ILO - 1 + WR( I ) = H( I, I ) + WI( I ) = ZERO + 10 CONTINUE + DO 20 I = IHI + 1, N + WR( I ) = H( I, I ) + WI( I ) = ZERO + 20 CONTINUE +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL SLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'SHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, + $ ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== SLAQR0 for big matrices; SLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO, + $ IHI, Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare SLAHQR failure! SLAQR0 sometimes succeeds +* . when SLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call SLAQR0 directly. ==== +* + CALL SLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, WR, + $ WI, ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from SLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling SLAQR0. ==== +* + CALL SLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL SLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL SLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, WR, + $ WI, ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL SLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL SLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = MAX( REAL( MAX( 1, N ) ), WORK( 1 ) ) + END IF +* +* ==== End of SHSEQR ==== +* + END diff --git a/dspl/liblapack/SRC/sisnan.f b/dspl/liblapack/SRC/sisnan.f new file mode 100644 index 0000000..311bfbf --- /dev/null +++ b/dspl/liblapack/SRC/sisnan.f @@ -0,0 +1,80 @@ +*> \brief \b SISNAN tests input for NaN. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SISNAN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* LOGICAL FUNCTION SISNAN( SIN ) +* +* .. Scalar Arguments .. +* REAL, INTENT(IN) :: SIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SISNAN returns .TRUE. if its argument is NaN, and .FALSE. +*> otherwise. To be replaced by the Fortran 2003 intrinsic in the +*> future. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIN +*> \verbatim +*> SIN is REAL +*> Input to test for NaN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION SISNAN( SIN ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL, INTENT(IN) :: SIN +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL SLAISNAN + EXTERNAL SLAISNAN +* .. +* .. Executable Statements .. + SISNAN = SLAISNAN(SIN,SIN) + RETURN + END diff --git a/dspl/liblapack/SRC/sla_gbamv.f b/dspl/liblapack/SRC/sla_gbamv.f new file mode 100644 index 0000000..b513f24 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gbamv.f @@ -0,0 +1,410 @@ +*> \brief \b SLA_GBAMV performs a matrix-vector operation to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GBAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, +* INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GBAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension ( LDAB, n ) +*> Before entry, the leading m by n part of the array AB must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> On entry, LDA specifies the first dimension of AB as declared +*> in the calling (sub) program. LDAB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, + $ INCX, BETA, Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY, KD, KE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN + INFO = 4 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = 5 + ELSE IF( LDAB.LT.KL+KU+1 )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SLA_GBAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + KD = KU + 1 + KE = KL + 1 + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = ABS( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of SLA_GBAMV +* + END diff --git a/dspl/liblapack/SRC/sla_gbrcond.f b/dspl/liblapack/SRC/sla_gbrcond.f new file mode 100644 index 0000000..36aa93d --- /dev/null +++ b/dspl/liblapack/SRC/sla_gbrcond.f @@ -0,0 +1,351 @@ +*> \brief \b SLA_GBRCOND estimates the Skeel condition number for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GBRCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, LDAFB, +* IPIV, CMODE, C, INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDAB, LDAFB, INFO, KL, KU, CMODE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ), IPIV( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), +* $ C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GBRCOND Estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by SGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by SGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (5*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + REAL FUNCTION SLA_GBRCOND( TRANS, N, KL, KU, AB, LDAB, AFB, LDAFB, + $ IPIV, CMODE, C, INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDAB, LDAFB, INFO, KL, KU, CMODE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), IPIV( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), + $ C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J, KD, KE + REAL AINVNM, TMP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SLA_GBRCOND = 0.0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') + $ .AND. .NOT. LSAME(TRANS, 'C') ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLA_GBRCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + SLA_GBRCOND = 1.0 + RETURN + END IF +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + KD = KU + 1 + KE = KL + 1 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KD+I-J, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + ABS( AB( KE-I+J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0 + + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF ( NOTRANS ) THEN + CALL SGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL SGBTRS( 'Transpose', N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( NOTRANS ) THEN + CALL SGBTRS( 'Transpose', N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + ELSE + CALL SGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0 ) + $ SLA_GBRCOND = ( 1.0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/sla_gbrfsx_extended.f b/dspl/liblapack/SRC/sla_gbrfsx_extended.f new file mode 100644 index 0000000..a81feb4 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gbrfsx_extended.f @@ -0,0 +1,706 @@ +*> \brief \b SLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general banded matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GBRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, +* NRHS, AB, LDAB, AFB, LDAFB, IPIV, +* COLEQU, C, B, LDB, Y, LDY, +* BERR_OUT, N_NORMS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, +* $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) +* REAL C( * ), AYB(*), RCOND, BERR_OUT(*), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GBRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by SGBRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0 +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the N-by-N matrix AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= max(1,N). +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by SGBTRF. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AF. LDAFB >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by SGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by SGBTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by SLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is REAL array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is REAL array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is REAL array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to SGBTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ COLEQU, C, B, LDB, Y, LDY, + $ BERR_OUT, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, + $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ Y( LDY, * ), RES(*), DY(*), Y_TAIL(*) + REAL C( * ), AYB(*), RCOND, BERR_OUT(*), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, M, X_STATE, Z_STATE, Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGBTRS, SGBMV, BLAS_SGBMV_X, + $ BLAS_SGBMV2_X, SLA_GBAMV, SLA_WWADDW, SLAMCH, + $ CHLA_TRANSTYPE, SLA_LIN_BERR + REAL SLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS + M = KL+KU+1 + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL SGBMV( TRANS, M, N, KL, KU, -1.0, AB, LDAB, + $ Y( 1, J ), 1, 1.0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_SGBMV_X( TRANS_TYPE, N, N, KL, KU, + $ -1.0, AB, LDAB, Y( 1, J ), 1, 1.0, RES, 1, + $ PREC_TYPE ) + ELSE + CALL BLAS_SGBMV2_X( TRANS_TYPE, N, N, KL, KU, -1.0, + $ AB, LDAB, Y( 1, J ), Y_TAIL, 1, 1.0, RES, 1, + $ PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL SCOPY( N, RES, 1, DY, 1 ) + CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, DY, N, + $ INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( .NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL SAXPY( N, 1.0, DY, 1, Y(1,J), 1 ) + ELSE + CALL SLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL SGBMV(TRANS, N, N, KL, KU, -1.0, AB, LDAB, Y(1,J), + $ 1, 1.0, RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL SLA_GBAMV( TRANS_TYPE, N, N, KL, KU, 1.0, + $ AB, LDAB, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL SLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/sla_gbrpvgrw.f b/dspl/liblapack/SRC/sla_gbrpvgrw.f new file mode 100644 index 0000000..77e9f26 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gbrpvgrw.f @@ -0,0 +1,160 @@ +*> \brief \b SLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GBRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, +* LDAFB ) +* +* .. Scalar Arguments .. +* INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GBRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by SGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + REAL FUNCTION SLA_GBRPVGRW( N, KL, KU, NCOLS, AB, LDAB, AFB, + $ LDAFB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J, KD + REAL AMAX, UMAX, RPVGRW +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0 + + KD = KU + 1 + DO J = 1, NCOLS + AMAX = 0.0 + UMAX = 0.0 + DO I = MAX( J-KU, 1 ), MIN( J+KL, N ) + AMAX = MAX( ABS( AB( KD+I-J, J)), AMAX ) + END DO + DO I = MAX( J-KU, 1 ), J + UMAX = MAX( ABS( AFB( KD+I-J, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + SLA_GBRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/sla_geamv.f b/dspl/liblapack/SRC/sla_geamv.f new file mode 100644 index 0000000..7906d04 --- /dev/null +++ b/dspl/liblapack/SRC/sla_geamv.f @@ -0,0 +1,396 @@ +*> \brief \b SLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, +* Y, INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDA, M, N, TRANS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GEAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension ( LDA, n ) +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, +*> dimension at least +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, + $ Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N, TRANS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' )) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SLA_GEAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, LENX + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, LENX + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = 1, LENX + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + JX = KX + DO J = 1, LENX + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of SLA_GEAMV +* + END diff --git a/dspl/liblapack/SRC/sla_gercond.f b/dspl/liblapack/SRC/sla_gercond.f new file mode 100644 index 0000000..349a1b5 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gercond.f @@ -0,0 +1,327 @@ +*> \brief \b SLA_GERCOND estimates the Skeel condition number for a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GERCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_GERCOND ( TRANS, N, A, LDA, AF, LDAF, IPIV, +* CMODE, C, INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), +* $ C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GERCOND estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace.2 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + REAL FUNCTION SLA_GERCOND ( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ CMODE, C, INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), + $ C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + REAL AINVNM, TMP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SLA_GERCOND = 0.0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') + $ .AND. .NOT. LSAME(TRANS, 'C') ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLA_GERCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + SLA_GERCOND = 1.0 + RETURN + END IF +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF (NOTRANS) THEN + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, N + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0 + + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK(I) = WORK(I) * WORK(2*N+I) + END DO + + IF (NOTRANS) THEN + CALL SGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL SGETRS( 'Transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF (NOTRANS) THEN + CALL SGETRS( 'Transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL SGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0 ) + $ SLA_GERCOND = ( 1.0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/sla_gerfsx_extended.f b/dspl/liblapack/SRC/sla_gerfsx_extended.f new file mode 100644 index 0000000..1795ea9 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gerfsx_extended.f @@ -0,0 +1,689 @@ +*> \brief \b SLA_GERFSX_EXTENDED improves the computed solution to a system of linear equations for general matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, +* LDA, AF, LDAF, IPIV, COLEQU, C, B, +* LDB, Y, LDY, BERR_OUT, N_NORMS, +* ERRS_N, ERRS_C, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERRS_N( NRHS, * ), +* $ ERRS_C( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by SGERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERRS_N +*> and ERRS_C for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERRS_N and ERRS_C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by SGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by SLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERRS_N +*> and ERRS_C). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERRS_N +*> \verbatim +*> ERRS_N is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERRS_N(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_N(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERRS_C +*> \verbatim +*> ERRS_C is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERRS_C is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERRS_C(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_C(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is REAL array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is REAL array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is REAL array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERRS_N and ERRS_C may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to SGETRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + SUBROUTINE SLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, + $ LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, Y, LDY, BERR_OUT, N_NORMS, + $ ERRS_N, ERRS_C, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERRS_N( NRHS, * ), + $ ERRS_C( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, X_STATE, Z_STATE, Y_PREC_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGETRS, SGEMV, BLAS_SGEMV_X, + $ BLAS_SGEMV2_X, SLA_GEAMV, SLA_WWADDW, SLAMCH, + $ CHLA_TRANSTYPE, SLA_LIN_BERR + REAL SLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF ( INFO.NE.0 ) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS +* + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL SGEMV( TRANS, N, N, -1.0, A, LDA, Y( 1, J ), 1, + $ 1.0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_SGEMV_X( TRANS_TYPE, N, N, -1.0, A, LDA, + $ Y( 1, J ), 1, 1.0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_SGEMV2_X( TRANS_TYPE, N, N, -1.0, A, LDA, + $ Y( 1, J ), Y_TAIL, 1, 1.0, RES, 1, PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL SCOPY( N, RES, 1, DY, 1 ) + CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL +* + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL SAXPY( N, 1.0, DY, 1, Y( 1, J ), 1 ) + ELSE + CALL SLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds +* + IF (N_NORMS .GE. 1) THEN + ERRS_N( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERRS_C( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL SGEMV( TRANS, N, N, -1.0, A, LDA, Y(1,J), 1, 1.0, RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL SLA_GEAMV ( TRANS_TYPE, N, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL SLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/sla_gerpvgrw.f b/dspl/liblapack/SRC/sla_gerpvgrw.f new file mode 100644 index 0000000..a41a035 --- /dev/null +++ b/dspl/liblapack/SRC/sla_gerpvgrw.f @@ -0,0 +1,139 @@ +*> \brief \b SLA_GERPVGRW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_GERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) +* +* .. Scalar Arguments .. +* INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AF( LDAF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_GERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by SGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +* ===================================================================== + REAL FUNCTION SLA_GERPVGRW( N, NCOLS, A, LDA, AF, LDAF ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AF( LDAF, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL AMAX, UMAX, RPVGRW +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0 + + DO J = 1, NCOLS + AMAX = 0.0 + UMAX = 0.0 + DO I = 1, N + AMAX = MAX( ABS( A( I, J ) ), AMAX ) + END DO + DO I = 1, J + UMAX = MAX( ABS( AF( I, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + SLA_GERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/sla_lin_berr.f b/dspl/liblapack/SRC/sla_lin_berr.f new file mode 100644 index 0000000..2da50cb --- /dev/null +++ b/dspl/liblapack/SRC/sla_lin_berr.f @@ -0,0 +1,153 @@ +*> \brief \b SLA_LIN_BERR computes a component-wise relative backward error. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_LIN_BERR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* .. Scalar Arguments .. +* INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. +* REAL AYB( N, NRHS ), BERR( NRHS ) +* REAL RES( N, NRHS ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_LIN_BERR computes componentwise relative backward error from +*> the formula +*> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NZ +*> \verbatim +*> NZ is INTEGER +*> We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to +*> guard against spuriously zero residuals. Default value is N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices AYB, RES, and BERR. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is REAL array, dimension (N,NRHS) +*> The residual matrix, i.e., the matrix R in the relative backward +*> error formula above. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N, NRHS) +*> The denominator in the relative backward error formula above, i.e., +*> the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B +*> are from iterative refinement (see sla_gerfsx_extended.f). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error from the formula above. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. + REAL AYB( N, NRHS ), BERR( NRHS ) + REAL RES( N, NRHS ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL TMP + INTEGER I, J +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + EXTERNAL SLAMCH + REAL SLAMCH + REAL SAFE1 +* .. +* .. Executable Statements .. +* +* Adding SAFE1 to the numerator guards against spuriously zero +* residuals. A similar safeguard is in the SLA_yyAMV routine used +* to compute AYB. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (NZ+1)*SAFE1 + + DO J = 1, NRHS + BERR(J) = 0.0 + DO I = 1, N + IF (AYB(I,J) .NE. 0.0) THEN + TMP = (SAFE1+ABS(RES(I,J)))/AYB(I,J) + BERR(J) = MAX( BERR(J), TMP ) + END IF +* +* If AYB is exactly 0.0 (and if computed by SLA_yyAMV), then we know +* the true residual also must be exactly 0.0. +* + END DO + END DO + END diff --git a/dspl/liblapack/SRC/sla_porcond.f b/dspl/liblapack/SRC/sla_porcond.f new file mode 100644 index 0000000..9dd7c58 --- /dev/null +++ b/dspl/liblapack/SRC/sla_porcond.f @@ -0,0 +1,326 @@ +*> \brief \b SLA_PORCOND estimates the Skeel condition number for a symmetric positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_PORCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, CMODE, C, +* INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO, CMODE +* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), +* $ C( * ) +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_PORCOND Estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + REAL FUNCTION SLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, CMODE, C, + $ INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO, CMODE + REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), + $ C( * ) +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + REAL AINVNM, TMP + LOGICAL UP +* .. +* .. Array Arguments .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SLA_PORCOND = 0.0 +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLA_PORCOND', -INFO ) + RETURN + END IF + + IF( N.EQ.0 ) THEN + SLA_PORCOND = 1.0 + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( J ,I ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ENDIF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0 + + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF (UP) THEN + CALL SPOTRS( 'Upper', N, 1, AF, LDAF, WORK, N, INFO ) + ELSE + CALL SPOTRS( 'Lower', N, 1, AF, LDAF, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( UP ) THEN + CALL SPOTRS( 'Upper', N, 1, AF, LDAF, WORK, N, INFO ) + ELSE + CALL SPOTRS( 'Lower', N, 1, AF, LDAF, WORK, N, INFO ) + ENDIF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0 ) + $ SLA_PORCOND = ( 1.0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/sla_porfsx_extended.f b/dspl/liblapack/SRC/sla_porfsx_extended.f new file mode 100644 index 0000000..27baa20 --- /dev/null +++ b/dspl/liblapack/SRC/sla_porfsx_extended.f @@ -0,0 +1,678 @@ +*> \brief \b SLA_PORFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric or Hermitian positive-definite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_PORFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, COLEQU, C, B, LDB, Y, +* LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB(*), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_PORFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by SPORFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by SPOTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by SLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is REAL array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is REAL array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is REAL array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to SPOTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, COLEQU, C, B, LDB, Y, + $ LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB(*), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, Y_PREC_STATE, BASE_RESIDUAL, + $ EXTRA_RESIDUAL, EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SPOTRS, SSYMV, BLAS_SSYMV_X, + $ BLAS_SSYMV2_X, SLA_SYAMV, SLA_WWADDW, + $ SLA_LIN_BERR + REAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL SSYMV( UPLO, N, -1.0, A, LDA, Y(1,J), 1, + $ 1.0, RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_SSYMV_X( UPLO2, N, -1.0, A, LDA, + $ Y( 1, J ), 1, 1.0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_SSYMV2_X(UPLO2, N, -1.0, A, LDA, + $ Y(1, J), Y_TAIL, 1, 1.0, RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL SCOPY( N, RES, 1, DY, 1 ) + CALL SPOTRS( UPLO, N, 1, AF, LDAF, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL SAXPY( N, 1.0, DY, 1, Y(1,J), 1 ) + ELSE + CALL SLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL SSYMV( UPLO, N, -1.0, A, LDA, Y(1,J), 1, 1.0, RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL SLA_SYAMV( UPLO2, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL SLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/sla_porpvgrw.f b/dspl/liblapack/SRC/sla_porpvgrw.f new file mode 100644 index 0000000..c824d77 --- /dev/null +++ b/dspl/liblapack/SRC/sla_porpvgrw.f @@ -0,0 +1,208 @@ +*> \brief \b SLA_PORPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric or Hermitian positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_PORPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, LDAF, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> SLA_PORPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + REAL FUNCTION SLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, LDAF, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL AMAX, UMAX, RPVGRW + LOGICAL UPPER +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) +* +* SPOTRF will have factored only the NCOLSxNCOLS leading minor, so +* we restrict the growth search to that minor and use only the first +* 2*NCOLS workspace entries. +* + RPVGRW = 1.0 + DO I = 1, 2*NCOLS + WORK( I ) = 0.0 + END DO +* +* Find the max magnitude entry of each column. +* + IF ( UPPER ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( NCOLS+J ) = + $ MAX( ABS( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( NCOLS+J ) = + $ MAX( ABS( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of the factor in +* AF. No pivoting, so no permutations. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( J ) = MAX( ABS( AF( I, J ) ), WORK( J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( J ) = MAX( ABS( AF( I, J ) ), WORK( J ) ) + END DO + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + SLA_PORPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/sla_syamv.f b/dspl/liblapack/SRC/sla_syamv.f new file mode 100644 index 0000000..d40e7bd --- /dev/null +++ b/dspl/liblapack/SRC/sla_syamv.f @@ -0,0 +1,417 @@ +*> \brief \b SLA_SYAMV computes a matrix-vector product using a symmetric indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_SYAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. + REAL A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + REAL TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLAMCH + REAL SLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'SSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = SLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = ABS( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = ABS( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*ABS( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of SLA_SYAMV +* + END diff --git a/dspl/liblapack/SRC/sla_syrcond.f b/dspl/liblapack/SRC/sla_syrcond.f new file mode 100644 index 0000000..c4b204c --- /dev/null +++ b/dspl/liblapack/SRC/sla_syrcond.f @@ -0,0 +1,339 @@ +*> \brief \b SLA_SYRCOND estimates the Skeel condition number for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_SYRCOND + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, CMODE, +* C, INFO, WORK, IWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments +* INTEGER IWORK( * ), IPIV( * ) +* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_SYRCOND estimates the Skeel condition number of op(A) * op2(C) +*> where op2 is determined by CMODE as follows +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> The Skeel condition number cond(A) = norminf( |inv(A)||A| ) +*> is computed by computing scaling factors R such that +*> diag(R)*A*op2(C) is row equilibrated and computing the standard +*> infinity-norm condition number. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] CMODE +*> \verbatim +*> CMODE is INTEGER +*> Determines op2(C) in the formula op(A) * op2(C) as follows: +*> CMODE = 1 op2(C) = C +*> CMODE = 0 op2(C) = I +*> CMODE = -1 op2(C) = inv(C) +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The vector C in the formula op(A) * op2(C). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + REAL FUNCTION SLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, CMODE, + $ C, INFO, WORK, IWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO, CMODE +* .. +* .. Array Arguments + INTEGER IWORK( * ), IPIV( * ) + REAL A( LDA, * ), AF( LDAF, * ), WORK( * ), C( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER NORMIN + INTEGER KASE, I, J + REAL AINVNM, SMLNUM, TMP + LOGICAL UP +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, XERBLA, SSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SLA_SYRCOND = 0.0 +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLA_SYRCOND', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) THEN + SLA_SYRCOND = 1.0 + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute the equilibration matrix R such that +* inv(R)*A*C has unit 1-norm. +* + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( J, I ) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( I, J ) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ELSE + DO I = 1, N + TMP = 0.0 + IF ( CMODE .EQ. 1 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) * C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) * C( J ) ) + END DO + ELSE IF ( CMODE .EQ. 0 ) THEN + DO J = 1, I + TMP = TMP + ABS( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I ) ) + END DO + ELSE + DO J = 1, I + TMP = TMP + ABS( A( I, J) / C( J ) ) + END DO + DO J = I+1, N + TMP = TMP + ABS( A( J, I) / C( J ) ) + END DO + END IF + WORK( 2*N+I ) = TMP + END DO + ENDIF +* +* Estimate the norm of inv(op(A)). +* + SMLNUM = SLAMCH( 'Safe minimum' ) + AINVNM = 0.0 + NORMIN = 'N' + + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + + IF ( UP ) THEN + CALL SSYTRS( 'U', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ELSE + CALL SSYTRS( 'L', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CMODE .EQ. 1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) / C( I ) + END DO + ELSE IF ( CMODE .EQ. -1 ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + + IF ( UP ) THEN + CALL SSYTRS( 'U', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ELSE + CALL SSYTRS( 'L', N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * WORK( 2*N+I ) + END DO + END IF +* + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0 ) + $ SLA_SYRCOND = ( 1.0 / AINVNM ) +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/sla_syrfsx_extended.f b/dspl/liblapack/SRC/sla_syrfsx_extended.f new file mode 100644 index 0000000..f7b909a --- /dev/null +++ b/dspl/liblapack/SRC/sla_syrfsx_extended.f @@ -0,0 +1,707 @@ +*> \brief \b SLA_SYRFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_SYRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> SLA_SYRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by SSYRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by SSYTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is REAL array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by SLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is REAL array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is REAL array, dimension (N) +*> Workspace. This can be the same workspace passed for Y_TAIL. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is REAL array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is REAL array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is REAL +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is REAL +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to SLA_SYRFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + REAL RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + REAL C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE + REAL YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, Y_PREC_STATE, BASE_RESIDUAL, + $ EXTRA_RESIDUAL, EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SSYTRS, SSYMV, BLAS_SSYMV_X, + $ BLAS_SSYMV2_X, SLA_SYAMV, SLA_WWADDW, + $ SLA_LIN_BERR + REAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLA_SYRFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = SLAMCH( 'Epsilon' ) + HUGEVAL = SLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = REAL( N )*EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + DXRAT = 0.0 + DXRATMAX = 0.0 + DZRAT = 0.0 + DZRATMAX = 0.0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + IF (Y_PREC_STATE .EQ. BASE_RESIDUAL) THEN + CALL SSYMV( UPLO, N, -1.0, A, LDA, Y(1,J), 1, + $ 1.0, RES, 1 ) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_SSYMV_X( UPLO2, N, -1.0, A, LDA, + $ Y( 1, J ), 1, 1.0, RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_SSYMV2_X(UPLO2, N, -1.0, A, LDA, + $ Y(1, J), Y_TAIL, 1, 1.0, RES, 1, PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL SCOPY( N, RES, 1, DY, 1 ) + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0 + NORMY = 0.0 + NORMDX = 0.0 + DZ_Z = 0.0 + YMIN = HUGEVAL + + DO I = 1, N + YK = ABS( Y( I, J ) ) + DYK = ABS( DY( I ) ) + + IF ( YK .NE. 0.0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF ( NORMX .NE. 0.0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0 ) THEN + DX_X = 0.0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL SAXPY( N, 1.0, DY, 1, Y(1,J), 1 ) + ELSE + CALL SLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). + CALL SCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL SSYMV( UPLO, N, -1.0, A, LDA, Y(1,J), 1, 1.0, RES, 1 ) + + DO I = 1, N + AYB( I ) = ABS( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL SLA_SYAMV( UPLO2, N, 1.0, + $ A, LDA, Y(1, J), 1, 1.0, AYB, 1 ) + + CALL SLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/sla_syrpvgrw.f b/dspl/liblapack/SRC/sla_syrpvgrw.f new file mode 100644 index 0000000..f5eb81b --- /dev/null +++ b/dspl/liblapack/SRC/sla_syrpvgrw.f @@ -0,0 +1,320 @@ +*> \brief \b SLA_SYRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_SYRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> SLA_SYRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from SSYTRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + REAL FUNCTION SLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, + $ WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), AF( LDAF, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + REAL AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0 + DO I = 1, 2*N + WORK( I ) = 0.0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( ABS( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( ABS( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( ABS( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( ABS( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from ssytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = MAX( ABS( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( ABS( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( ABS( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = MAX( ABS( AF(I, K+1 ) ), WORK( K+1 ) ) + END DO + WORK( K ) = MAX( ABS( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + SLA_SYRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/sla_wwaddw.f b/dspl/liblapack/SRC/sla_wwaddw.f new file mode 100644 index 0000000..96a7d35 --- /dev/null +++ b/dspl/liblapack/SRC/sla_wwaddw.f @@ -0,0 +1,111 @@ +*> \brief \b SLA_WWADDW adds a vector into a doubled-single vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLA_WWADDW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLA_WWADDW( N, X, Y, W ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* REAL X( * ), Y( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLA_WWADDW adds a vector W into a doubled-single vector (X, Y). +*> +*> This works for all extant IBM's hex and binary floating point +*> arithmetics, but not for decimal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of vectors X, Y, and W. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> The first part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (N) +*> The second part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The vector to be added. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLA_WWADDW( N, X, Y, W ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + REAL X( * ), Y( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + REAL S + INTEGER I +* .. +* .. Executable Statements .. +* + DO 10 I = 1, N + S = X(I) + W(I) + S = (S + S) - S + Y(I) = ((X(I) - S) + W(I)) + Y(I) + X(I) = S + 10 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/slabad.f b/dspl/liblapack/SRC/slabad.f new file mode 100644 index 0000000..d6502bb --- /dev/null +++ b/dspl/liblapack/SRC/slabad.f @@ -0,0 +1,105 @@ +*> \brief \b SLABAD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLABAD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLABAD( SMALL, LARGE ) +* +* .. Scalar Arguments .. +* REAL LARGE, SMALL +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLABAD takes as input the values computed by SLAMCH for underflow and +*> overflow, and returns the square root of each of these values if the +*> log of LARGE is sufficiently large. This subroutine is intended to +*> identify machines with a large exponent range, such as the Crays, and +*> redefine the underflow and overflow limits to be the square roots of +*> the values computed by SLAMCH. This subroutine is needed because +*> SLAMCH does not compensate for poor arithmetic in the upper half of +*> the exponent range, as is found on a Cray. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] SMALL +*> \verbatim +*> SMALL is REAL +*> On entry, the underflow threshold as computed by SLAMCH. +*> On exit, if LOG10(LARGE) is sufficiently large, the square +*> root of SMALL, otherwise unchanged. +*> \endverbatim +*> +*> \param[in,out] LARGE +*> \verbatim +*> LARGE is REAL +*> On entry, the overflow threshold as computed by SLAMCH. +*> On exit, if LOG10(LARGE) is sufficiently large, the square +*> root of LARGE, otherwise unchanged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLABAD( SMALL, LARGE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL LARGE, SMALL +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LOG10, SQRT +* .. +* .. Executable Statements .. +* +* If it looks like we're on a Cray, take the square root of +* SMALL and LARGE to avoid overflow and underflow problems. +* + IF( LOG10( LARGE ).GT.2000. ) THEN + SMALL = SQRT( SMALL ) + LARGE = SQRT( LARGE ) + END IF +* + RETURN +* +* End of SLABAD +* + END diff --git a/dspl/liblapack/SRC/slabrd.f b/dspl/liblapack/SRC/slabrd.f new file mode 100644 index 0000000..f4e3d73 --- /dev/null +++ b/dspl/liblapack/SRC/slabrd.f @@ -0,0 +1,381 @@ +*> \brief \b SLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLABRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, +* LDY ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), +* $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLABRD reduces the first NB rows and columns of a real general +*> m by n matrix A to upper or lower bidiagonal form by an orthogonal +*> transformation Q**T * A * P, and returns the matrices X and Y which +*> are needed to apply the transformation to the unreduced part of A. +*> +*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +*> bidiagonal form. +*> +*> This is an auxiliary routine called by SGEBRD +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of leading rows and columns of A to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, the first NB rows and columns of the matrix are +*> overwritten; the rest of the array is unchanged. +*> If m >= n, elements on and below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors; and +*> elements above the diagonal in the first NB rows, with the +*> array TAUP, represent the orthogonal matrix P as a product +*> of elementary reflectors. +*> If m < n, elements below the diagonal in the first NB +*> columns, with the array TAUQ, represent the orthogonal +*> matrix Q as a product of elementary reflectors, and +*> elements on and above the diagonal in the first NB rows, +*> with the array TAUP, represent the orthogonal matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (NB) +*> The diagonal elements of the first NB rows and columns of +*> the reduced matrix. D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (NB) +*> The off-diagonal elements of the first NB rows and columns of +*> the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is REAL array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is REAL array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the orthogonal matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NB) +*> The m-by-nb matrix X required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,M). +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NB) +*> The n-by-nb matrix Y required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**T and G(i) = I - taup * u * u**T +*> +*> where tauq and taup are real scalars, and v and u are real vectors. +*> +*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The elements of the vectors v and u together form the m-by-nb matrix +*> V and the nb-by-n matrix U**T which are needed, with X and Y, to apply +*> the transformation to the unreduced part of the matrix, using a block +*> update of the form: A := A - V*Y**T - X*U**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with nb = 2: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +*> ( v1 v2 a a a ) ( v1 1 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix which is unchanged, +*> vi denotes an element of the vector defining H(i), and ui an element +*> of the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAUP( * ), + $ TAUQ( * ), X( LDX, * ), Y( LDY, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLARFG, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ), + $ LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX, + $ A( I, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL SGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ), + $ LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ), + $ LDA, TAUP( I ) ) + E( I ) = A( I, I+1 ) + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY, + $ A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL SGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA, + $ X( I, 1 ), LDX, ONE, A( I, I ), LDA ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = A( I, I ) + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY, + $ A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) +* +* Update A(i+1:m,i) +* + CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL SGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL SGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX, + $ A( I+1, I ), 1, ZERO, Y( 1, I ), 1 ) + CALL SGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA, + $ Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SLABRD +* + END diff --git a/dspl/liblapack/SRC/slacn2.f b/dspl/liblapack/SRC/slacn2.f new file mode 100644 index 0000000..7874281 --- /dev/null +++ b/dspl/liblapack/SRC/slacn2.f @@ -0,0 +1,294 @@ +*> \brief \b SLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLACN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* REAL EST +* .. +* .. Array Arguments .. +* INTEGER ISGN( * ), ISAVE( 3 ) +* REAL V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLACN2 estimates the 1-norm of a square, real matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**T * X, if KASE=2, +*> and SLACN2 must be re-called with all the other parameters +*> unchanged. +*> \endverbatim +*> +*> \param[out] ISGN +*> \verbatim +*> ISGN is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is REAL +*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +*> unchanged from the previous call to SLACN2. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to SLACN2, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**T * X. +*> On the final return from SLACN2, KASE will again be 0. +*> \endverbatim +*> +*> \param[in,out] ISAVE +*> \verbatim +*> ISAVE is INTEGER array, dimension (3) +*> ISAVE is used to save variables between calls to SLACN2 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Originally named SONEST, dated March 16, 1988. +*> +*> This is a thread safe version of SLACON, which uses the array ISAVE +*> in place of a SAVE statement, as follows: +*> +*> SLACON SLACN2 +*> JUMP ISAVE(1) +*> J ISAVE(2) +*> ITER ISAVE(3) +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE SLACN2( N, V, X, ISGN, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ), ISAVE( 3 ) + REAL V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + REAL ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM + EXTERNAL ISAMAX, SASUM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, NINT, REAL, SIGN +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / REAL( N ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = SASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = ISAMAX( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = ONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL SCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = ISAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL SCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of SLACN2 +* + END diff --git a/dspl/liblapack/SRC/slacon.f b/dspl/liblapack/SRC/slacon.f new file mode 100644 index 0000000..45c28d5 --- /dev/null +++ b/dspl/liblapack/SRC/slacon.f @@ -0,0 +1,275 @@ +*> \brief \b SLACON estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLACON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLACON( N, V, X, ISGN, EST, KASE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* REAL EST +* .. +* .. Array Arguments .. +* INTEGER ISGN( * ) +* REAL V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLACON estimates the 1-norm of a square, real matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**T * X, if KASE=2, +*> and SLACON must be re-called with all the other parameters +*> unchanged. +*> \endverbatim +*> +*> \param[out] ISGN +*> \verbatim +*> ISGN is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is REAL +*> On entry with KASE = 1 or 2 and JUMP = 3, EST should be +*> unchanged from the previous call to SLACON. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to SLACON, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**T * X. +*> On the final return from SLACON, KASE will again be 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester. \n +*> Originally named SONEST, dated March 16, 1988. +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE SLACON( N, V, X, ISGN, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + REAL EST +* .. +* .. Array Arguments .. + INTEGER ISGN( * ) + REAL V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + REAL ALTSGN, ESTOLD, TEMP +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM + EXTERNAL ISAMAX, SASUM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, NINT, REAL, SIGN +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = ONE / REAL( N ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 110, 140 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 150 + END IF + EST = SASUM( N, X, 1 ) +* + DO 30 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 40 CONTINUE + J = ISAMAX( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = ZERO + 60 CONTINUE + X( J ) = ONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL SCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = SASUM( N, V, 1 ) + DO 80 I = 1, N + IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) ) + $ GO TO 90 + 80 CONTINUE +* REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. + GO TO 120 +* + 90 CONTINUE +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 120 +* + DO 100 I = 1, N + X( I ) = SIGN( ONE, X( I ) ) + ISGN( I ) = NINT( X( I ) ) + 100 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. +* + 110 CONTINUE + JLAST = J + J = ISAMAX( N, X, 1 ) + IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 120 CONTINUE + ALTSGN = ONE + DO 130 I = 1, N + X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) + ALTSGN = -ALTSGN + 130 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 140 CONTINUE + TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL SCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 150 CONTINUE + KASE = 0 + RETURN +* +* End of SLACON +* + END diff --git a/dspl/liblapack/SRC/slacpy.f b/dspl/liblapack/SRC/slacpy.f new file mode 100644 index 0000000..a4bcde3 --- /dev/null +++ b/dspl/liblapack/SRC/slacpy.f @@ -0,0 +1,156 @@ +*> \brief \b SLACPY copies all or part of one two-dimensional array to another. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLACPY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLACPY copies all or part of a two-dimensional matrix A to another +*> matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper triangle +*> or trapezoid is accessed; if UPLO = 'L', only the lower +*> triangle or trapezoid is accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF + RETURN +* +* End of SLACPY +* + END diff --git a/dspl/liblapack/SRC/sladiv.f b/dspl/liblapack/SRC/sladiv.f new file mode 100644 index 0000000..a9a3603 --- /dev/null +++ b/dspl/liblapack/SRC/sladiv.f @@ -0,0 +1,256 @@ +*> \brief \b SLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* .. Scalar Arguments .. +* REAL A, B, C, D, P, Q +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLADIV performs complex division in real arithmetic +*> +*> a + i*b +*> p + i*q = --------- +*> c + i*d +*> +*> The algorithm is due to Michael Baudin and Robert L. Smith +*> and can be found in the paper +*> "A Robust Complex Division in Scilab" +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is REAL +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL +*> The scalars a, b, c, and d in the above expression. +*> \endverbatim +*> +*> \param[out] P +*> \verbatim +*> P is REAL +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL +*> The scalars p and q in the above expression. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2013 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + REAL A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL BS + PARAMETER ( BS = 2.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* +* .. Local Scalars .. + REAL AA, BB, CC, DD, AB, CD, S, OV, UN, BE, EPS +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLADIV1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + AA = A + BB = B + CC = C + DD = D + AB = MAX( ABS(A), ABS(B) ) + CD = MAX( ABS(C), ABS(D) ) + S = 1.0E0 + + OV = SLAMCH( 'Overflow threshold' ) + UN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Epsilon' ) + BE = BS / (EPS*EPS) + + IF( AB >= HALF*OV ) THEN + AA = HALF * AA + BB = HALF * BB + S = TWO * S + END IF + IF( CD >= HALF*OV ) THEN + CC = HALF * CC + DD = HALF * DD + S = HALF * S + END IF + IF( AB <= UN*BS/EPS ) THEN + AA = AA * BE + BB = BB * BE + S = S / BE + END IF + IF( CD <= UN*BS/EPS ) THEN + CC = CC * BE + DD = DD * BE + S = S * BE + END IF + IF( ABS( D ).LE.ABS( C ) ) THEN + CALL SLADIV1(AA, BB, CC, DD, P, Q) + ELSE + CALL SLADIV1(BB, AA, DD, CC, P, Q) + Q = -Q + END IF + P = P * S + Q = Q * S +* + RETURN +* +* End of SLADIV +* + END + +*> \ingroup realOTHERauxiliary + + + SUBROUTINE SLADIV1( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + REAL A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* +* .. Local Scalars .. + REAL R, T +* .. +* .. External Functions .. + REAL SLADIV2 + EXTERNAL SLADIV2 +* .. +* .. Executable Statements .. +* + R = D / C + T = ONE / (C + D * R) + P = SLADIV2(A, B, C, D, R, T) + A = -A + Q = SLADIV2(B, A, C, D, R, T) +* + RETURN +* +* End of SLADIV1 +* + END + +*> \ingroup realOTHERauxiliary + + REAL FUNCTION SLADIV2( A, B, C, D, R, T ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2013 +* +* .. Scalar Arguments .. + REAL A, B, C, D, R, T +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* +* .. Local Scalars .. + REAL BR +* .. +* .. Executable Statements .. +* + IF( R.NE.ZERO ) THEN + BR = B * R + if( BR.NE.ZERO ) THEN + SLADIV2 = (A + BR) * T + ELSE + SLADIV2 = A * T + (B * T) * R + END IF + ELSE + SLADIV2 = (A + D * (B / C)) * T + END IF +* + RETURN +* +* End of SLADIV +* + END diff --git a/dspl/liblapack/SRC/slae2.f b/dspl/liblapack/SRC/slae2.f new file mode 100644 index 0000000..2b7a28c --- /dev/null +++ b/dspl/liblapack/SRC/slae2.f @@ -0,0 +1,185 @@ +*> \brief \b SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAE2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAE2( A, B, C, RT1, RT2 ) +* +* .. Scalar Arguments .. +* REAL A, B, C, RT1, RT2 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAE2 computes the eigenvalues of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, and RT2 +*> is the eigenvalue of smaller absolute value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is REAL +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL +*> The (1,2) and (2,1) elements of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is REAL +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is REAL +*> The eigenvalue of smaller absolute value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAE2( A, B, C, RT1, RT2 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL A, B, C, RT1, RT2 +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + REAL AB, ACMN, ACMX, ADF, DF, RT, SM, TB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + END IF + RETURN +* +* End of SLAE2 +* + END diff --git a/dspl/liblapack/SRC/slaebz.f b/dspl/liblapack/SRC/slaebz.f new file mode 100644 index 0000000..5e23082 --- /dev/null +++ b/dspl/liblapack/SRC/slaebz.f @@ -0,0 +1,649 @@ +*> \brief \b SLAEBZ computes the number of eigenvalues of a real symmetric tridiagonal matrix which are less than or equal to a given value, and performs other tasks required by the routine sstebz. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAEBZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, +* RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, +* NAB, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX +* REAL ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) +* REAL AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAEBZ contains the iteration loops which compute and use the +*> function N(w), which is the count of eigenvalues of a symmetric +*> tridiagonal matrix T less than or equal to its argument w. It +*> performs a choice of two types of loops: +*> +*> IJOB=1, followed by +*> IJOB=2: It takes as input a list of intervals and returns a list of +*> sufficiently small intervals whose union contains the same +*> eigenvalues as the union of the original intervals. +*> The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP. +*> The output interval (AB(j,1),AB(j,2)] will contain +*> eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT. +*> +*> IJOB=3: It performs a binary search in each input interval +*> (AB(j,1),AB(j,2)] for a point w(j) such that +*> N(w(j))=NVAL(j), and uses C(j) as the starting point of +*> the search. If such a w(j) is found, then on output +*> AB(j,1)=AB(j,2)=w. If no such w(j) is found, then on output +*> (AB(j,1),AB(j,2)] will be a small interval containing the +*> point where N(w) jumps through NVAL(j), unless that point +*> lies outside the initial interval. +*> +*> Note that the intervals are in all cases half-open intervals, +*> i.e., of the form (a,b] , which includes b but not a . +*> +*> To avoid underflow, the matrix should be scaled so that its largest +*> element is no greater than overflow**(1/2) * underflow**(1/4) +*> in absolute value. To assure the most accurate computation +*> of small eigenvalues, the matrix should be scaled to be +*> not much smaller than that, either. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966 +*> +*> Note: the arguments are, in general, *not* checked for unreasonable +*> values. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what is to be done: +*> = 1: Compute NAB for the initial intervals. +*> = 2: Perform bisection iteration to find eigenvalues of T. +*> = 3: Perform bisection iteration to invert N(w), i.e., +*> to find a point which has a specified number of +*> eigenvalues of T to its left. +*> Other values will cause SLAEBZ to return with INFO=-1. +*> \endverbatim +*> +*> \param[in] NITMAX +*> \verbatim +*> NITMAX is INTEGER +*> The maximum number of "levels" of bisection to be +*> performed, i.e., an interval of width W will not be made +*> smaller than 2^(-NITMAX) * W. If not all intervals +*> have converged after NITMAX iterations, then INFO is set +*> to the number of non-converged intervals. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension n of the tridiagonal matrix T. It must be at +*> least 1. +*> \endverbatim +*> +*> \param[in] MMAX +*> \verbatim +*> MMAX is INTEGER +*> The maximum number of intervals. If more than MMAX intervals +*> are generated, then SLAEBZ will quit with INFO=MMAX+1. +*> \endverbatim +*> +*> \param[in] MINP +*> \verbatim +*> MINP is INTEGER +*> The initial number of intervals. It may not be greater than +*> MMAX. +*> \endverbatim +*> +*> \param[in] NBMIN +*> \verbatim +*> NBMIN is INTEGER +*> The smallest number of intervals that should be processed +*> using a vector loop. If zero, then only the scalar loop +*> will be used. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The minimum (absolute) width of an interval. When an +*> interval is narrower than ABSTOL, or than RELTOL times the +*> larger (in magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. This must be at least +*> zero. +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is REAL +*> The minimum relative width of an interval. When an interval +*> is narrower than ABSTOL, or than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum absolute value of a "pivot" in the Sturm +*> sequence loop. +*> This must be at least max |e(j)**2|*safe_min and at +*> least safe_min, where safe_min is at least +*> the smallest number that can divide one without overflow. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> The offdiagonal elements of the tridiagonal matrix T in +*> positions 1 through N-1. E(N) is arbitrary. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is REAL array, dimension (N) +*> The squares of the offdiagonal elements of the tridiagonal +*> matrix T. E2(N) is ignored. +*> \endverbatim +*> +*> \param[in,out] NVAL +*> \verbatim +*> NVAL is INTEGER array, dimension (MINP) +*> If IJOB=1 or 2, not referenced. +*> If IJOB=3, the desired values of N(w). The elements of NVAL +*> will be reordered to correspond with the intervals in AB. +*> Thus, NVAL(j) on output will not, in general be the same as +*> NVAL(j) on input, but it will correspond with the interval +*> (AB(j,1),AB(j,2)] on output. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (MMAX,2) +*> The endpoints of the intervals. AB(j,1) is a(j), the left +*> endpoint of the j-th interval, and AB(j,2) is b(j), the +*> right endpoint of the j-th interval. The input intervals +*> will, in general, be modified, split, and reordered by the +*> calculation. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (MMAX) +*> If IJOB=1, ignored. +*> If IJOB=2, workspace. +*> If IJOB=3, then on input C(j) should be initialized to the +*> first search point in the binary search. +*> \endverbatim +*> +*> \param[out] MOUT +*> \verbatim +*> MOUT is INTEGER +*> If IJOB=1, the number of eigenvalues in the intervals. +*> If IJOB=2 or 3, the number of intervals output. +*> If IJOB=3, MOUT will equal MINP. +*> \endverbatim +*> +*> \param[in,out] NAB +*> \verbatim +*> NAB is INTEGER array, dimension (MMAX,2) +*> If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)). +*> If IJOB=2, then on input, NAB(i,j) should be set. It must +*> satisfy the condition: +*> N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)), +*> which means that in interval i only eigenvalues +*> NAB(i,1)+1,...,NAB(i,2) will be considered. Usually, +*> NAB(i,j)=N(AB(i,j)), from a previous call to SLAEBZ with +*> IJOB=1. +*> On output, NAB(i,j) will contain +*> max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of +*> the input interval that the output interval +*> (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the +*> the input values of NAB(k,1) and NAB(k,2). +*> If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)), +*> unless N(w) > NVAL(i) for all search points w , in which +*> case NAB(i,1) will not be modified, i.e., the output +*> value will be the same as the input value (modulo +*> reorderings -- see NVAL and AB), or unless N(w) < NVAL(i) +*> for all search points w , in which case NAB(i,2) will +*> not be modified. Normally, NAB should be set to some +*> distinctive value(s) before SLAEBZ is called. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MMAX) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MMAX) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: All intervals converged. +*> = 1--MMAX: The last INFO intervals did not converge. +*> = MMAX+1: More than MMAX intervals were generated. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine is intended to be called only by other LAPACK +*> routines, thus the interface is less user-friendly. It is intended +*> for two purposes: +*> +*> (a) finding eigenvalues. In this case, SLAEBZ should have one or +*> more initial intervals set up in AB, and SLAEBZ should be called +*> with IJOB=1. This sets up NAB, and also counts the eigenvalues. +*> Intervals with no eigenvalues would usually be thrown out at +*> this point. Also, if not all the eigenvalues in an interval i +*> are desired, NAB(i,1) can be increased or NAB(i,2) decreased. +*> For example, set NAB(i,1)=NAB(i,2)-1 to get the largest +*> eigenvalue. SLAEBZ is then called with IJOB=2 and MMAX +*> no smaller than the value of MOUT returned by the call with +*> IJOB=1. After this (IJOB=2) call, eigenvalues NAB(i,1)+1 +*> through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the +*> tolerance specified by ABSTOL and RELTOL. +*> +*> (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l). +*> In this case, start with a Gershgorin interval (a,b). Set up +*> AB to contain 2 search intervals, both initially (a,b). One +*> NVAL element should contain f-1 and the other should contain l +*> , while C should contain a and b, resp. NAB(i,1) should be -1 +*> and NAB(i,2) should be N+1, to flag an error if the desired +*> interval does not lie in (a,b). SLAEBZ is then called with +*> IJOB=3. On exit, if w(f-1) < w(f), then one of the intervals -- +*> j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while +*> if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r +*> >= 0, then the interval will have N(AB(j,1))=NAB(j,1)=f-k and +*> N(AB(j,2))=NAB(j,2)=f+r. The cases w(l) < w(l+1) and +*> w(l-r)=...=w(l+k) are handled similarly. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, + $ RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT, + $ NAB, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX + REAL ABSTOL, PIVMIN, RELTOL +* .. +* .. Array Arguments .. + INTEGER IWORK( * ), NAB( MMAX, * ), NVAL( * ) + REAL AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0E0, TWO = 2.0E0, + $ HALF = 1.0E0 / TWO ) +* .. +* .. Local Scalars .. + INTEGER ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL, + $ KLNEW + REAL TMP1, TMP2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Check for Errors +* + INFO = 0 + IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN + INFO = -1 + RETURN + END IF +* +* Initialize NAB +* + IF( IJOB.EQ.1 ) THEN +* +* Compute the number of eigenvalues in the initial intervals. +* + MOUT = 0 + DO 30 JI = 1, MINP + DO 20 JP = 1, 2 + TMP1 = D( 1 ) - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + NAB( JI, JP ) = 0 + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = 1 +* + DO 10 J = 2, N + TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP ) + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NAB( JI, JP ) = NAB( JI, JP ) + 1 + 10 CONTINUE + 20 CONTINUE + MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 ) + 30 CONTINUE + RETURN + END IF +* +* Initialize for loop +* +* KF and KL have the following meaning: +* Intervals 1,...,KF-1 have converged. +* Intervals KF,...,KL still need to be refined. +* + KF = 1 + KL = MINP +* +* If IJOB=2, initialize C. +* If IJOB=3, use the user-supplied starting point. +* + IF( IJOB.EQ.2 ) THEN + DO 40 JI = 1, MINP + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 40 CONTINUE + END IF +* +* Iteration loop +* + DO 130 JIT = 1, NITMAX +* +* Loop over intervals +* + IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN +* +* Begin of Parallel Version of the loop +* + DO 60 JI = KF, KL +* +* Compute N(c), the number of eigenvalues less than c +* + WORK( JI ) = D( 1 ) - C( JI ) + IWORK( JI ) = 0 + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF +* + DO 50 J = 2, N + WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI ) + IF( WORK( JI ).LE.PIVMIN ) THEN + IWORK( JI ) = IWORK( JI ) + 1 + WORK( JI ) = MIN( WORK( JI ), -PIVMIN ) + END IF + 50 CONTINUE + 60 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* + KLNEW = KL + DO 70 JI = KF, KL +* +* Insure that N(w) is monotone +* + IWORK( JI ) = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), IWORK( JI ) ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = C( JI ) +* + ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = C( JI ) + ELSE + KLNEW = KLNEW + 1 + IF( KLNEW.LE.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to +* queue. +* + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = C( JI ) + NAB( KLNEW, 1 ) = IWORK( JI ) + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + ELSE + INFO = MMAX + 1 + END IF + END IF + 70 CONTINUE + IF( INFO.NE.0 ) + $ RETURN + KL = KLNEW + ELSE +* +* IJOB=3: Binary search. Keep only the interval containing +* w s.t. N(w) = NVAL +* + DO 80 JI = KF, KL + IF( IWORK( JI ).LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = C( JI ) + NAB( JI, 1 ) = IWORK( JI ) + END IF + IF( IWORK( JI ).GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = C( JI ) + NAB( JI, 2 ) = IWORK( JI ) + END IF + 80 CONTINUE + END IF +* + ELSE +* +* End of Parallel Version of the loop +* +* Begin of Serial Version of the loop +* + KLNEW = KL + DO 100 JI = KF, KL +* +* Compute N(w), the number of eigenvalues less than w +* + TMP1 = C( JI ) + TMP2 = D( 1 ) - TMP1 + ITMP1 = 0 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF +* + DO 90 J = 2, N + TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1 + IF( TMP2.LE.PIVMIN ) THEN + ITMP1 = ITMP1 + 1 + TMP2 = MIN( TMP2, -PIVMIN ) + END IF + 90 CONTINUE +* + IF( IJOB.LE.2 ) THEN +* +* IJOB=2: Choose all intervals containing eigenvalues. +* +* Insure that N(w) is monotone +* + ITMP1 = MIN( NAB( JI, 2 ), + $ MAX( NAB( JI, 1 ), ITMP1 ) ) +* +* Update the Queue -- add intervals if both halves +* contain eigenvalues. +* + IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN +* +* No eigenvalue in the upper interval: +* just use the lower interval. +* + AB( JI, 2 ) = TMP1 +* + ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN +* +* No eigenvalue in the lower interval: +* just use the upper interval. +* + AB( JI, 1 ) = TMP1 + ELSE IF( KLNEW.LT.MMAX ) THEN +* +* Eigenvalue in both intervals -- add upper to queue. +* + KLNEW = KLNEW + 1 + AB( KLNEW, 2 ) = AB( JI, 2 ) + NAB( KLNEW, 2 ) = NAB( JI, 2 ) + AB( KLNEW, 1 ) = TMP1 + NAB( KLNEW, 1 ) = ITMP1 + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + ELSE + INFO = MMAX + 1 + RETURN + END IF + ELSE +* +* IJOB=3: Binary search. Keep only the interval +* containing w s.t. N(w) = NVAL +* + IF( ITMP1.LE.NVAL( JI ) ) THEN + AB( JI, 1 ) = TMP1 + NAB( JI, 1 ) = ITMP1 + END IF + IF( ITMP1.GE.NVAL( JI ) ) THEN + AB( JI, 2 ) = TMP1 + NAB( JI, 2 ) = ITMP1 + END IF + END IF + 100 CONTINUE + KL = KLNEW +* + END IF +* +* Check for convergence +* + KFNEW = KF + DO 110 JI = KF, KL + TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) ) + TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) ) + IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR. + $ NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN +* +* Converged -- Swap with position KFNEW, +* then increment KFNEW +* + IF( JI.GT.KFNEW ) THEN + TMP1 = AB( JI, 1 ) + TMP2 = AB( JI, 2 ) + ITMP1 = NAB( JI, 1 ) + ITMP2 = NAB( JI, 2 ) + AB( JI, 1 ) = AB( KFNEW, 1 ) + AB( JI, 2 ) = AB( KFNEW, 2 ) + NAB( JI, 1 ) = NAB( KFNEW, 1 ) + NAB( JI, 2 ) = NAB( KFNEW, 2 ) + AB( KFNEW, 1 ) = TMP1 + AB( KFNEW, 2 ) = TMP2 + NAB( KFNEW, 1 ) = ITMP1 + NAB( KFNEW, 2 ) = ITMP2 + IF( IJOB.EQ.3 ) THEN + ITMP1 = NVAL( JI ) + NVAL( JI ) = NVAL( KFNEW ) + NVAL( KFNEW ) = ITMP1 + END IF + END IF + KFNEW = KFNEW + 1 + END IF + 110 CONTINUE + KF = KFNEW +* +* Choose Midpoints +* + DO 120 JI = KF, KL + C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) ) + 120 CONTINUE +* +* If no more intervals to refine, quit. +* + IF( KF.GT.KL ) + $ GO TO 140 + 130 CONTINUE +* +* Converged +* + 140 CONTINUE + INFO = MAX( KL+1-KF, 0 ) + MOUT = KL +* + RETURN +* +* End of SLAEBZ +* + END diff --git a/dspl/liblapack/SRC/slaed0.f b/dspl/liblapack/SRC/slaed0.f new file mode 100644 index 0000000..51aea74 --- /dev/null +++ b/dspl/liblapack/SRC/slaed0.f @@ -0,0 +1,434 @@ +*> \brief \b SLAED0 used by sstedc. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED0 computes all eigenvalues and corresponding eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> = 2: Compute eigenvalues and eigenvectors of tridiagonal +*> matrix. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the main diagonal of the tridiagonal matrix. +*> On exit, its eigenvalues. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, Q must contain an N-by-N orthogonal matrix. +*> If ICOMPQ = 0 Q is not referenced. +*> If ICOMPQ = 1 On entry, Q is a subset of the columns of the +*> orthogonal matrix used to reduce the full +*> matrix to tridiagonal form corresponding to +*> the subset of the full matrix which is being +*> decomposed at this time. +*> If ICOMPQ = 2 On entry, Q will be the identity matrix. +*> On exit, Q contains the eigenvectors of the +*> tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If eigenvectors are +*> desired, then LDQ >= max(1,N). In any case, LDQ >= 1. +*> \endverbatim +*> +*> \param[out] QSTORE +*> \verbatim +*> QSTORE is REAL array, dimension (LDQS, N) +*> Referenced only when ICOMPQ = 1. Used to store parts of +*> the eigenvector matrix when the updating matrix multiplies +*> take place. +*> \endverbatim +*> +*> \param[in] LDQS +*> \verbatim +*> LDQS is INTEGER +*> The leading dimension of the array QSTORE. If ICOMPQ = 1, +*> then LDQS >= max(1,N). In any case, LDQS >= 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> If ICOMPQ = 0 or 1, the dimension of WORK must be at least +*> 1 + 3*N + 2*N*lg N + 3*N**2 +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of WORK must be at least +*> 4*N + N**2. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> If ICOMPQ = 0 or 1, the dimension of IWORK must be at least +*> 6 + 6*N + 5*N*lg N. +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> If ICOMPQ = 2, the dimension of IWORK must be at least +*> 3 + 5*N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.E0, ONE = 1.E0, TWO = 2.E0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1, + $ SPM2, SUBMAT, SUBPBS, TLVLS + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLAED1, SLAED7, SSTEQR, + $ XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN + INFO = -1 + ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'SLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 + IF( ICOMPQ.NE.2 ) THEN +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( REAL( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* +* Initialize pointers +* + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 + END IF +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + IF( ICOMPQ.EQ.2 ) THEN + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + ELSE + CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 130 + IF( ICOMPQ.EQ.1 ) THEN + CALL SGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE, + $ Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+ + $ CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ), + $ LDQS ) + END IF + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. +* SLAED1 is used only for the full eigensystem of a tridiagonal +* matrix. +* SLAED7 handles the cases in which eigenvalues only or eigenvalues +* and eigenvectors of a full symmetric matrix (which was reduced to +* tridiagonal form) are desired. +* + IF( ICOMPQ.EQ.2 ) THEN + CALL SLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ), + $ LDQ, IWORK( INDXQ+SUBMAT ), + $ E( SUBMAT+MSD2-1 ), MSD2, WORK, + $ IWORK( SUBPBS+1 ), INFO ) + ELSE + CALL SLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ), + $ MSD2, WORK( IQ ), IWORK( IQPTR ), + $ IWORK( IPRMPT ), IWORK( IPERM ), + $ IWORK( IGIVPT ), IWORK( IGIVCL ), + $ WORK( IGIVNM ), WORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + END IF + IF( INFO.NE.0 ) + $ GO TO 130 + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + IF( ICOMPQ.EQ.1 ) THEN + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL SCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + ELSE IF( ICOMPQ.EQ.2 ) THEN + DO 110 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + CALL SCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 ) + 110 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + CALL SLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ ) + ELSE + DO 120 I = 1, N + J = IWORK( INDXQ+I ) + WORK( I ) = D( J ) + 120 CONTINUE + CALL SCOPY( N, WORK, 1, D, 1 ) + END IF + GO TO 140 +* + 130 CONTINUE + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 +* + 140 CONTINUE + RETURN +* +* End of SLAED0 +* + END diff --git a/dspl/liblapack/SRC/slaed1.f b/dspl/liblapack/SRC/slaed1.f new file mode 100644 index 0000000..d40cb02 --- /dev/null +++ b/dspl/liblapack/SRC/slaed1.f @@ -0,0 +1,274 @@ +*> \brief \b SLAED1 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, INFO, LDQ, N +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER INDXQ( * ), IWORK( * ) +* REAL D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED1 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and eigenvectors of a tridiagonal matrix. SLAED7 handles +*> the case in which eigenvalues only or eigenvalues and eigenvectors +*> of a full symmetric matrix (which was reduced to tridiagonal form) +*> are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**T*u, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine SLAED2. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine SLAED4 (as called by SLAED3). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> On entry, the permutation which separately sorts the two +*> subproblems in D into ascending order. +*> On exit, the permutation which will reintegrate the +*> subproblems back into sorted order, +*> i.e. D( INDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The subdiagonal entry used to create the rank-1 modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= CUTPNT <= N/2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N + N**2) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE SLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, INFO, LDQ, N + REAL RHO +* .. +* .. Array Arguments .. + INTEGER INDXQ( * ), IWORK( * ) + REAL D( * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CPP1, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, N1, N2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAED2, SLAED3, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED1', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are integer pointers which indicate +* the portion of the workspace +* used by a particular array in SLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + CALL SCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 ) + CPP1 = CUTPNT + 1 + CALL SCOPY( N-CUTPNT, Q( CPP1, CPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 ) +* +* Deflate eigenvalues. +* + CALL SLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ), + $ WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ), + $ IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ), + $ IWORK( COLTYP ), INFO ) +* + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT + + $ ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2 + CALL SLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ), + $ WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ), + $ WORK( IW ), WORK( IS ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 20 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + DO 10 I = 1, N + INDXQ( I ) = I + 10 CONTINUE + END IF +* + 20 CONTINUE + RETURN +* +* End of SLAED1 +* + END diff --git a/dspl/liblapack/SRC/slaed2.f b/dspl/liblapack/SRC/slaed2.f new file mode 100644 index 0000000..b039a31 --- /dev/null +++ b/dspl/liblapack/SRC/slaed2.f @@ -0,0 +1,539 @@ +*> \brief \b SLAED2 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, +* Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), +* $ INDXQ( * ) +* REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED2 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny entry in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. 0 <= K <=N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading sub-matrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D contains the eigenvalues of the two submatrices to +*> be combined. +*> On exit, D contains the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, Q contains the eigenvectors of two submatrices in +*> the two square blocks with corners at (1,1), (N1,N1) +*> and (N1+1, N1+1), (N,N). +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have N1 added to their +*> values. Destroyed on exit. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is REAL +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> SLAED3. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On entry, Z contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> On exit, the contents of Z have been destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is REAL array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> SLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first k values of the final deflation-altered z-vector +*> which will be passed to SLAED3. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is REAL array, dimension (N1**2+(N-N1)**2) +*> A copy of the first K eigenvectors which will be used by +*> SLAED3 in a matrix multiply (SGEMM) to solve for the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of DLAMDA into +*> ascending order. +*> \endverbatim +*> +*> \param[out] INDXC +*> \verbatim +*> INDXC is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups: the first group contains non-zero +*> elements only at and above N1, the second contains +*> non-zero elements only below N1, and the third is dense. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] COLTYP +*> \verbatim +*> COLTYP is INTEGER array, dimension (N) +*> During execution, a label which will indicate which of the +*> following types a column in the Q2 matrix is: +*> 1 : non-zero in the upper half only; +*> 2 : dense; +*> 3 : non-zero in the lower half only; +*> 4 : deflated. +*> On exit, COLTYP(i) is the number of columns of type i, +*> for i=1 to 4 only. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W, + $ Q2, INDX, INDXC, INDXP, COLTYP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + REAL RHO +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ), + $ INDXQ( * ) + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1, + $ N2, NJ, PJ + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1. Since z is the concatenation of +* two normalized vectors, norm2(z) = sqrt(2). +* + T = ONE / SQRT( TWO ) + CALL SSCAL( N, T, Z, 1 ) +* +* RHO = ABS( norm(z)**2 * RHO ) +* + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 10 I = N1P1, N + INDXQ( I ) = INDXQ( I ) + N1 + 10 CONTINUE +* +* re-integrate the deflated parts from the last pass +* + DO 20 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + 20 CONTINUE + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDXC ) + DO 30 I = 1, N + INDX( I ) = INDXQ( INDXC( I ) ) + 30 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IQ2 = 1 + DO 40 J = 1, N + I = INDX( J ) + CALL SCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 ) + DLAMDA( J ) = D( I ) + IQ2 = IQ2 + N + 40 CONTINUE + CALL SLACPY( 'A', N, N, Q2, N, Q, LDQ ) + CALL SCOPY( N, DLAMDA, 1, D, 1 ) + GO TO 190 + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + DO 50 I = 1, N1 + COLTYP( I ) = 1 + 50 CONTINUE + DO 60 I = N1P1, N + COLTYP( I ) = 3 + 60 CONTINUE +* +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + NJ = INDX( J ) + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + IF( J.EQ.N ) + $ GO TO 100 + ELSE + PJ = NJ + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + NJ = INDX( J ) + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + COLTYP( NJ ) = 4 + INDXP( K2 ) = NJ + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( PJ ) + C = Z( NJ ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( NJ ) - D( PJ ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( NJ ) = TAU + Z( PJ ) = ZERO + IF( COLTYP( NJ ).NE.COLTYP( PJ ) ) + $ COLTYP( NJ ) = 2 + COLTYP( PJ ) = 4 + CALL SROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S ) + T = D( PJ )*C**2 + D( NJ )*S**2 + D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2 + D( PJ ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = PJ + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = PJ + END IF + ELSE + INDXP( K2+I-1 ) = PJ + END IF + PJ = NJ + ELSE + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ + PJ = NJ + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + DLAMDA( K ) = D( PJ ) + W( K ) = Z( PJ ) + INDXP( K ) = PJ +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four uniform groups (although one or more of these groups may be +* empty). +* + DO 110 J = 1, 4 + CTOT( J ) = 0 + 110 CONTINUE + DO 120 J = 1, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 120 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 1 + PSM( 2 ) = 1 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) + K = N - CTOT( 4 ) +* +* Fill out the INDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's. +* + DO 130 J = 1, N + JS = INDXP( J ) + CT = COLTYP( JS ) + INDX( PSM( CT ) ) = JS + INDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 130 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + I = 1 + IQ1 = 1 + IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1 + DO 140 J = 1, CTOT( 1 ) + JS = INDX( I ) + CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + 140 CONTINUE +* + DO 150 J = 1, CTOT( 2 ) + JS = INDX( I ) + CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 ) + CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ1 = IQ1 + N1 + IQ2 = IQ2 + N2 + 150 CONTINUE +* + DO 160 J = 1, CTOT( 3 ) + JS = INDX( I ) + CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 ) + Z( I ) = D( JS ) + I = I + 1 + IQ2 = IQ2 + N2 + 160 CONTINUE +* + IQ1 = IQ2 + DO 170 J = 1, CTOT( 4 ) + JS = INDX( I ) + CALL SCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 ) + IQ2 = IQ2 + N + Z( I ) = D( JS ) + I = I + 1 + 170 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL SLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, + $ Q( 1, K+1 ), LDQ ) + CALL SCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 ) + END IF +* +* Copy CTOT into COLTYP for referencing in SLAED3. +* + DO 180 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 180 CONTINUE +* + 190 CONTINUE + RETURN +* +* End of SLAED2 +* + END diff --git a/dspl/liblapack/SRC/slaed3.f b/dspl/liblapack/SRC/slaed3.f new file mode 100644 index 0000000..dffd23e --- /dev/null +++ b/dspl/liblapack/SRC/slaed3.f @@ -0,0 +1,353 @@ +*> \brief \b SLAED3 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is tridiagonal. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, +* CTOT, W, S, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, N, N1 +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER CTOT( * ), INDX( * ) +* REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), +* $ S( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED3 finds the roots of the secular equation, as defined by the +*> values in D, W, and RHO, between 1 and K. It makes the +*> appropriate calls to SLAED4 and then updates the eigenvectors by +*> multiplying the matrix of eigenvectors of the pair of eigensystems +*> being combined by the matrix of eigenvectors of the K-by-K system +*> which is solved here. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> SLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (deflation may result in N>K). +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The location of the last eigenvalue in the leading submatrix. +*> min(1,N) <= N1 <= N/2. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> D(I) contains the updated eigenvalues for +*> 1 <= I <= K. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> Initially the first K columns are used as workspace. +*> On output the columns 1 to K contain +*> the updated eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in,out] DLAMDA +*> \verbatim +*> DLAMDA is REAL array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. May be changed on output by +*> having lowest order bit set to zero on Cray X-MP, Cray Y-MP, +*> Cray-2, or Cray C-90, as described above. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is REAL array, dimension (LDQ2*N) +*> The first K columns of this matrix contain the non-deflated +*> eigenvectors for the split problem. +*> \endverbatim +*> +*> \param[in] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of the deflated +*> Q matrix into three groups (see SLAED2). +*> The rows of the eigenvectors found by SLAED4 must be likewise +*> permuted before the matrix multiply can take place. +*> \endverbatim +*> +*> \param[in] CTOT +*> \verbatim +*> CTOT is INTEGER array, dimension (4) +*> A count of the total number of the various types of columns +*> in Q, as described in INDX. The fourth column type is any +*> column which has been deflated. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is REAL array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. Destroyed on +*> output. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N1 + 1)*K +*> Will contain the eigenvectors of the repaired matrix which +*> will be multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE SLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX, + $ CTOT, W, S, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, N, N1 + REAL RHO +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), INDX( * ) + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ), + $ S( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, IQ2, J, N12, N2, N23 + REAL TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLAED4, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.K ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = 1, K + CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 ) + $ GO TO 110 + IF( K.EQ.2 ) THEN + DO 30 J = 1, K + W( 1 ) = Q( 1, J ) + W( 2 ) = Q( 2, J ) + II = INDX( 1 ) + Q( 1, J ) = W( II ) + II = INDX( 2 ) + Q( 2, J ) = W( II ) + 30 CONTINUE + GO TO 110 + END IF +* +* Compute updated W. +* + CALL SCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL SCOPY( K, Q, LDQ+1, W, 1 ) + DO 60 J = 1, K + DO 40 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 40 CONTINUE + DO 50 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + 60 CONTINUE + DO 70 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I ) ) + 70 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 100 J = 1, K + DO 80 I = 1, K + S( I ) = W( I ) / Q( I, J ) + 80 CONTINUE + TEMP = SNRM2( K, S, 1 ) + DO 90 I = 1, K + II = INDX( I ) + Q( I, J ) = S( II ) / TEMP + 90 CONTINUE + 100 CONTINUE +* +* Compute the updated eigenvectors. +* + 110 CONTINUE +* + N2 = N - N1 + N12 = CTOT( 1 ) + CTOT( 2 ) + N23 = CTOT( 2 ) + CTOT( 3 ) +* + CALL SLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 ) + IQ2 = N1*N12 + 1 + IF( N23.NE.0 ) THEN + CALL SGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23, + $ ZERO, Q( N1+1, 1 ), LDQ ) + ELSE + CALL SLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ ) + END IF +* + CALL SLACPY( 'A', N12, K, Q, LDQ, S, N12 ) + IF( N12.NE.0 ) THEN + CALL SGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q, + $ LDQ ) + ELSE + CALL SLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ ) + END IF +* +* + 120 CONTINUE + RETURN +* +* End of SLAED3 +* + END diff --git a/dspl/liblapack/SRC/slaed4.f b/dspl/liblapack/SRC/slaed4.f new file mode 100644 index 0000000..c65cba7 --- /dev/null +++ b/dspl/liblapack/SRC/slaed4.f @@ -0,0 +1,917 @@ +*> \brief \b SLAED4 used by sstedc. Finds a single root of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER I, INFO, N +* REAL DLAM, RHO +* .. +* .. Array Arguments .. +* REAL D( * ), DELTA( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th updated eigenvalue of a symmetric +*> rank-one modification to a diagonal matrix whose elements are +*> given in the array d, and that +*> +*> D(i) < D(j) for i < j +*> +*> and that RHO > 0. This is arranged by the calling routine, and is +*> no loss in generality. The rank-one modified system is thus +*> +*> diag( D ) + RHO * Z * Z_transpose. +*> +*> where we assume the Euclidean norm of Z is 1. +*> +*> The method consists of approximating the rational functions in the +*> secular equation by simpler interpolating rational functions. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of all arrays. +*> \endverbatim +*> +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. 1 <= I <= N. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The original eigenvalues. It is assumed that they are in +*> order, D(I) < D(J) for I < J. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is REAL array, dimension (N) +*> If N .GT. 2, DELTA contains (D(j) - lambda_I) in its j-th +*> component. If N = 1, then DELTA(1) = 1. If N = 2, see SLAED5 +*> for detail. The vector DELTA contains the information necessary +*> to construct the eigenvectors by SLAED3 and SLAED9. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is REAL +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, the updating process failed. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> Logical variable ORGATI (origin-at-i?) is used for distinguishing +*> whether D(i) or D(i+1) is treated as the origin. +*> +*> ORGATI = .true. origin at i +*> ORGATI = .false. origin at i+1 +*> +*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting +*> if we are working with THREE poles! +*> +*> MAXIT is the maximum number of iterations allowed for each +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + REAL DLAM, RHO +* .. +* .. Array Arguments .. + REAL D( * ), DELTA( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0, + $ TEN = 10.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3 + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + REAL A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW, + $ EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI, + $ RHOINV, TAU, TEMP, TEMP1, W +* .. +* .. Local Arrays .. + REAL ZZ( 3 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLAED5, SLAED6 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 ) + DELTA( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL SLAED5( I, D, Z, DELTA, RHO, DLAM ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = SLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + MIDPT = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + DO 10 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / DELTA( II ) + + $ Z( N )*Z( N ) / DELTA( N ) +* + IF( W.LE.ZERO ) THEN + TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) + + $ Z( N )*Z( N ) / RHO + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + END IF +* +* It can be proved that +* D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO +* + DLTLB = MIDPT + DLTUB = RHO + ELSE + DEL = D( N ) - D( N-1 ) + A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF +* +* It can be proved that +* D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2 +* + DLTLB = ZERO + DLTUB = MIDPT + END IF +* + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN +* ETA = B/A +* ETA = RHO - TAU + ETA = DLTUB - TAU + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 50 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + DLAM = D( I ) + TAU + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI + A = ( DELTA( N-1 )+DELTA( N ) )*W - + $ DELTA( N-1 )*DELTA( N )*( DPSI+DPHI ) + B = DELTA( N-1 )*DELTA( N )*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 70 CONTINUE +* + TAU = TAU + ETA +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / DELTA( N ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV + + $ ABS( TAU )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + DLAM = D( I ) + TAU + GO TO 250 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DEL = D( IP1 ) - D( I ) + MIDPT = DEL / TWO + DO 100 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - MIDPT + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / DELTA( J ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / DELTA( J ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / DELTA( I ) + + $ Z( IP1 )*Z( IP1 ) / DELTA( IP1 ) +* + IF( W.GT.ZERO ) THEN +* +* d(i)< the ith eigenvalue < (d(i)+d(i+1))/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DEL + IF( A.GT.ZERO ) THEN + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = ZERO + DLTUB = MIDPT + ELSE +* +* (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1) +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DEL + IF( A.LT.ZERO ) THEN + TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF + DLTLB = -MIDPT + DLTUB = ZERO + END IF +* + IF( ORGATI ) THEN + DO 130 J = 1, N + DELTA( J ) = ( D( J )-D( I ) ) - TAU + 130 CONTINUE + ELSE + DO 140 J = 1, N + DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU + 140 CONTINUE + END IF + IF( ORGATI ) THEN + II = I + ELSE + II = I + 1 + END IF + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )* + $ ( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )* + $ ( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + PREW = W +* + DO 180 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 180 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 190 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 190 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 200 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 200 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* + TAU = TAU + ETA +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 240 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF + GO TO 250 + END IF +* + IF( W.LE.ZERO ) THEN + DLTLB = MAX( DLTLB, TAU ) + ELSE + DLTUB = MIN( DLTUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DELTA( IP1 )*DW - + $ ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2 + ELSE + C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )* + $ ( Z( IP1 ) / DELTA( IP1 ) )**2 + END IF + ELSE + TEMP = Z( II ) / DELTA( II ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI + END IF + A = ( DELTA( I )+DELTA( IP1 ) )*W - + $ DELTA( I )*DELTA( IP1 )*DW + B = DELTA( I )*DELTA( IP1 )*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DELTA( IP1 )* + $ DELTA( IP1 )*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DELTA( I )*DELTA( I )*( DPSI+DPHI ) + END IF + ELSE + A = DELTA( I )*DELTA( I )*DPSI + + $ DELTA( IP1 )*DELTA( IP1 )*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DELTA( IIM1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) - + $ ( D( IIM1 )-D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )* + $ ( ( DPSI-TEMP1 )+DPHI ) + ELSE + TEMP1 = Z( IIP1 ) / DELTA( IIP1 ) + TEMP1 = TEMP1*TEMP1 + C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) - + $ ( D( IIP1 )-D( IIM1 ) )*TEMP1 + ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )* + $ ( DPSI+( DPHI-TEMP1 ) ) + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA, + $ INFO ) + IF( INFO.NE.0 ) + $ GO TO 250 + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW + TEMP = TAU + ETA + IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( DLTUB-TAU ) / TWO + ELSE + ETA = ( DLTLB-TAU ) / TWO + END IF + END IF +* + DO 210 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + 210 CONTINUE +* + TAU = TAU + ETA + PREW = W +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 220 J = 1, IIM1 + TEMP = Z( J ) / DELTA( J ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 220 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 230 J = N, IIP1, -1 + TEMP = Z( J ) / DELTA( J ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 230 CONTINUE +* + TEMP = Z( II ) / DELTA( II ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + + $ THREE*ABS( TEMP ) + ABS( TAU )*DW + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 240 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + IF( ORGATI ) THEN + DLAM = D( I ) + TAU + ELSE + DLAM = D( IP1 ) + TAU + END IF +* + END IF +* + 250 CONTINUE +* + RETURN +* +* End of SLAED4 +* + END diff --git a/dspl/liblapack/SRC/slaed5.f b/dspl/liblapack/SRC/slaed5.f new file mode 100644 index 0000000..29551da --- /dev/null +++ b/dspl/liblapack/SRC/slaed5.f @@ -0,0 +1,189 @@ +*> \brief \b SLAED5 used by sstedc. Solves the 2-by-2 secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* .. Scalar Arguments .. +* INTEGER I +* REAL DLAM, RHO +* .. +* .. Array Arguments .. +* REAL D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the I-th eigenvalue of a symmetric rank-one +*> modification of a 2-by-2 diagonal matrix +*> +*> diag( D ) + RHO * Z * transpose(Z) . +*> +*> The diagonal elements in the array D are assumed to satisfy +*> +*> D(i) < D(j) for i < j . +*> +*> We also assume RHO > 0 and that the Euclidean norm of the vector +*> Z is one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. I = 1 or I = 2. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (2) +*> The original eigenvalues. We assume D(1) < D(2). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (2) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is REAL array, dimension (2) +*> The vector DELTA contains the information necessary +*> to construct the eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DLAM +*> \verbatim +*> DLAM is REAL +*> The computed lambda_I, the I-th updated eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLAED5( I, D, Z, DELTA, RHO, DLAM ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I + REAL DLAM, RHO +* .. +* .. Array Arguments .. + REAL D( 2 ), DELTA( 2 ), Z( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL B, C, DEL, TAU, TEMP, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + IF( I.EQ.1 ) THEN + W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DEL +* +* B > ZERO, always +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) + DLAM = D( 1 ) + TAU + DELTA( 1 ) = -Z( 1 ) / TAU + DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + END IF + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DEL + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF + DLAM = D( 2 ) + TAU + DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) + DELTA( 2 ) = -Z( 2 ) / TAU + TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) + DELTA( 1 ) = DELTA( 1 ) / TEMP + DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End OF SLAED5 +* + END diff --git a/dspl/liblapack/SRC/slaed6.f b/dspl/liblapack/SRC/slaed6.f new file mode 100644 index 0000000..69b94d4 --- /dev/null +++ b/dspl/liblapack/SRC/slaed6.f @@ -0,0 +1,410 @@ +*> \brief \b SLAED6 used by sstedc. Computes one Newton step in solution of the secular equation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL ORGATI +* INTEGER INFO, KNITER +* REAL FINIT, RHO, TAU +* .. +* .. Array Arguments .. +* REAL D( 3 ), Z( 3 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED6 computes the positive or negative root (closest to the origin) +*> of +*> z(1) z(2) z(3) +*> f(x) = rho + --------- + ---------- + --------- +*> d(1)-x d(2)-x d(3)-x +*> +*> It is assumed that +*> +*> if ORGATI = .true. the root is between d(2) and d(3); +*> otherwise it is between d(1) and d(2) +*> +*> This routine will be called by SLAED4 when necessary. In most cases, +*> the root sought is the smallest in magnitude, though it might not be +*> in some extremely rare situations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] KNITER +*> \verbatim +*> KNITER is INTEGER +*> Refer to SLAED4 for its significance. +*> \endverbatim +*> +*> \param[in] ORGATI +*> \verbatim +*> ORGATI is LOGICAL +*> If ORGATI is true, the needed root is between d(2) and +*> d(3); otherwise it is between d(1) and d(2). See +*> SLAED4 for further details. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> Refer to the equation f(x) above. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (3) +*> D satisfies d(1) < d(2) < d(3). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (3) +*> Each of the elements in z must be positive. +*> \endverbatim +*> +*> \param[in] FINIT +*> \verbatim +*> FINIT is REAL +*> The value of f at 0. It is more accurate than the one +*> evaluated inside this routine (if someone wants to do +*> so). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL +*> The root of the equation f(x). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, failure to converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 10/02/03: This version has a few statements commented out for thread +*> safety (machine parameters are computed on each entry). SJH. +*> +*> 05/10/06: Modified from a new version of Ren-Cang Li, use +*> Gragg-Thornton-Warner cubic convergent scheme for better stability. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL ORGATI + INTEGER INFO, KNITER + REAL FINIT, RHO, TAU +* .. +* .. Array Arguments .. + REAL D( 3 ), Z( 3 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Local Arrays .. + REAL DSCALE( 3 ), ZSCALE( 3 ) +* .. +* .. Local Scalars .. + LOGICAL SCALE + INTEGER I, ITER, NITER + REAL A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F, + $ FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1, + $ SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4, + $ LBD, UBD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* + IF( ORGATI ) THEN + LBD = D(2) + UBD = D(3) + ELSE + LBD = D(1) + UBD = D(2) + END IF + IF( FINIT .LT. ZERO )THEN + LBD = ZERO + ELSE + UBD = ZERO + END IF +* + NITER = 1 + TAU = ZERO + IF( KNITER.EQ.2 ) THEN + IF( ORGATI ) THEN + TEMP = ( D( 3 )-D( 2 ) ) / TWO + C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP ) + A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 ) + B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 ) + ELSE + TEMP = ( D( 1 )-D( 2 ) ) / TWO + C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP ) + A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 ) + B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 ) + END IF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + TAU = B / A + ELSE IF( A.LE.ZERO ) THEN + TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD+UBD )/TWO + IF( D(1).EQ.TAU .OR. D(2).EQ.TAU .OR. D(3).EQ.TAU ) THEN + TAU = ZERO + ELSE + TEMP = FINIT + TAU*Z(1)/( D(1)*( D( 1 )-TAU ) ) + + $ TAU*Z(2)/( D(2)*( D( 2 )-TAU ) ) + + $ TAU*Z(3)/( D(3)*( D( 3 )-TAU ) ) + IF( TEMP .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + IF( ABS( FINIT ).LE.ABS( TEMP ) ) + $ TAU = ZERO + END IF + END IF +* +* get machine parameters for possible scaling to avoid overflow +* +* modified by Sven: parameters SMALL1, SMINV1, SMALL2, +* SMINV2, EPS are not SAVEd anymore between one call to the +* others but recomputed at each call +* + EPS = SLAMCH( 'Epsilon' ) + BASE = SLAMCH( 'Base' ) + SMALL1 = BASE**( INT( LOG( SLAMCH( 'SafMin' ) ) / LOG( BASE ) / + $ THREE ) ) + SMINV1 = ONE / SMALL1 + SMALL2 = SMALL1*SMALL1 + SMINV2 = SMINV1*SMINV1 +* +* Determine if scaling of inputs necessary to avoid overflow +* when computing 1/TEMP**3 +* + IF( ORGATI ) THEN + TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) ) + ELSE + TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) ) + END IF + SCALE = .FALSE. + IF( TEMP.LE.SMALL1 ) THEN + SCALE = .TRUE. + IF( TEMP.LE.SMALL2 ) THEN +* +* Scale up by power of radix nearest 1/SAFMIN**(2/3) +* + SCLFAC = SMINV2 + SCLINV = SMALL2 + ELSE +* +* Scale up by power of radix nearest 1/SAFMIN**(1/3) +* + SCLFAC = SMINV1 + SCLINV = SMALL1 + END IF +* +* Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) +* + DO 10 I = 1, 3 + DSCALE( I ) = D( I )*SCLFAC + ZSCALE( I ) = Z( I )*SCLFAC + 10 CONTINUE + TAU = TAU*SCLFAC + LBD = LBD*SCLFAC + UBD = UBD*SCLFAC + ELSE +* +* Copy D and Z to DSCALE and ZSCALE +* + DO 20 I = 1, 3 + DSCALE( I ) = D( I ) + ZSCALE( I ) = Z( I ) + 20 CONTINUE + END IF +* + FC = ZERO + DF = ZERO + DDF = ZERO + DO 30 I = 1, 3 + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + FC = FC + TEMP1 / DSCALE( I ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + 30 CONTINUE + F = FINIT + TAU*FC +* + IF( ABS( F ).LE.ZERO ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF +* +* Iteration begins -- Use Gragg-Thornton-Warner cubic convergent +* scheme +* +* It is not hard to see that +* +* 1) Iterations will go up monotonically +* if FINIT < 0; +* +* 2) Iterations will go down monotonically +* if FINIT > 0. +* + ITER = NITER + 1 +* + DO 50 NITER = ITER, MAXIT +* + IF( ORGATI ) THEN + TEMP1 = DSCALE( 2 ) - TAU + TEMP2 = DSCALE( 3 ) - TAU + ELSE + TEMP1 = DSCALE( 1 ) - TAU + TEMP2 = DSCALE( 2 ) - TAU + END IF + A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF + B = TEMP1*TEMP2*F + C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF + TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) ) + A = A / TEMP + B = B / TEMP + C = C / TEMP + IF( C.EQ.ZERO ) THEN + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + IF( F*ETA.GE.ZERO ) THEN + ETA = -F / DF + END IF +* + TAU = TAU + ETA + IF( TAU .LT. LBD .OR. TAU .GT. UBD ) + $ TAU = ( LBD + UBD )/TWO +* + FC = ZERO + ERRETM = ZERO + DF = ZERO + DDF = ZERO + DO 40 I = 1, 3 + IF ( ( DSCALE( I )-TAU ).NE.ZERO ) THEN + TEMP = ONE / ( DSCALE( I )-TAU ) + TEMP1 = ZSCALE( I )*TEMP + TEMP2 = TEMP1*TEMP + TEMP3 = TEMP2*TEMP + TEMP4 = TEMP1 / DSCALE( I ) + FC = FC + TEMP4 + ERRETM = ERRETM + ABS( TEMP4 ) + DF = DF + TEMP2 + DDF = DDF + TEMP3 + ELSE + GO TO 60 + END IF + 40 CONTINUE + F = FINIT + TAU*FC + ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) + + $ ABS( TAU )*DF + IF( ( ABS( F ).LE.FOUR*EPS*ERRETM ) .OR. + $ ( (UBD-LBD).LE.FOUR*EPS*ABS(TAU) ) ) + $ GO TO 60 + IF( F .LE. ZERO )THEN + LBD = TAU + ELSE + UBD = TAU + END IF + 50 CONTINUE + INFO = 1 + 60 CONTINUE +* +* Undo scaling +* + IF( SCALE ) + $ TAU = TAU*SCLINV + RETURN +* +* End of SLAED6 +* + END diff --git a/dspl/liblapack/SRC/slaed7.f b/dspl/liblapack/SRC/slaed7.f new file mode 100644 index 0000000..55e6e63 --- /dev/null +++ b/dspl/liblapack/SRC/slaed7.f @@ -0,0 +1,407 @@ +*> \brief \b SLAED7 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, +* LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, +* PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, +* $ QSIZ, TLVLS +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), +* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) +* REAL D( * ), GIVNUM( 2, * ), Q( LDQ, * ), +* $ QSTORE( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED7 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and optionally eigenvectors of a dense symmetric matrix +*> that has been reduced to tridiagonal form. SLAED1 handles +*> the case in which all eigenvalues and eigenvectors of a symmetric +*> tridiagonal matrix are desired. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**T ) Q**T(in) = Q(out) * D(out) * Q**T(out) +*> +*> where Z = Q**Tu, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine SLAED8. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine SLAED4 (as called by SLAED9). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= CURLVL <= TLVLS. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which will reintegrate the subproblem just +*> solved back into sorted order, i.e., D( INDXQ( I = 1, N ) ) +*> will be in ascending order. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The subdiagonal element used to create the rank-1 +*> modification. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in,out] QSTORE +*> \verbatim +*> QSTORE is REAL array, dimension (N**2+1) +*> Stores eigenvectors of submatrices encountered during +*> divide and conquer, packed together. QPTR points to +*> beginning of the submatrices. +*> \endverbatim +*> +*> \param[in,out] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> List of indices pointing to beginning of submatrices stored +*> in QSTORE. The submatrices are numbered starting at the +*> bottom left of the divide and conquer tree, from left to +*> right and bottom to top. +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and also the size of +*> the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N+2*QSIZ*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR, + $ PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N, + $ QSIZ, TLVLS + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + REAL D( * ), GIVNUM( 2, * ), Q( LDQ, * ), + $ QSTORE( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP, + $ IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLAED8 and SLAED9. +* + IF( ICOMPQ.EQ.1 ) THEN + LDQ2 = QSIZ + ELSE + LDQ2 = N + END IF +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ2 = IW + N + IS = IQ2 + N*LDQ2 +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ), + $ WORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT, + $ WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2, + $ WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ), + $ IWORK( INDX ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL SLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ), + $ WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( ICOMPQ.EQ.1 ) THEN + CALL SGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2, + $ QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ ) + END IF + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 +* +* Prepare the INDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + 30 CONTINUE + RETURN +* +* End of SLAED7 +* + END diff --git a/dspl/liblapack/SRC/slaed8.f b/dspl/liblapack/SRC/slaed8.f new file mode 100644 index 0000000..5ec117c --- /dev/null +++ b/dspl/liblapack/SRC/slaed8.f @@ -0,0 +1,524 @@ +*> \brief \b SLAED8 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, +* CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, +* GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, +* $ QSIZ +* REAL RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), +* $ INDXQ( * ), PERM( * ) +* REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), +* $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED8 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny element in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> = 0: Compute eigenvalues only. +*> = 1: Compute eigenvectors of original dense symmetric matrix +*> also. On entry, Q contains the orthogonal matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> The number of non-deflated eigenvalues, and the order of the +*> related secular equation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the orthogonal matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the eigenvalues of the two submatrices to be +*> combined. On exit, the trailing (N-K) updated eigenvalues +*> (those which were deflated) sorted into increasing order. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> If ICOMPQ = 0, Q is not referenced. Otherwise, +*> on entry, Q contains the eigenvectors of the partially solved +*> system which has been previously updated in matrix +*> multiplies with other partially solved eigensystems. +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> The permutation which separately sorts the two sub-problems +*> in D into ascending order. Note that elements in the second +*> half of this permutation must first have CUTPNT added to +*> their values in order to be accurate. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is REAL +*> On entry, the off-diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. +*> On exit, RHO has been modified to the value required by +*> SLAED3. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> The location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On entry, Z contains the updating vector (the last row of +*> the first sub-eigenvector matrix and the first row of the +*> second sub-eigenvector matrix). +*> On exit, the contents of Z are destroyed by the updating +*> process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is REAL array, dimension (N) +*> A copy of the first K eigenvalues which will be used by +*> SLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is REAL array, dimension (LDQ2,N) +*> If ICOMPQ = 0, Q2 is not referenced. Otherwise, +*> a copy of the first K eigenvectors which will be used by +*> SLAED7 in a matrix multiply (SGEMM) to update the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of the array Q2. LDQ2 >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first k values of the final deflation-altered z-vector and +*> will be passed to SLAED3. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N) +*> The permutations (from deflation and sorting) to be applied +*> to each eigenblock. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension (2, N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> The permutation used to place deflated values of D at the end +*> of the array. INDXP(1:K) points to the nondeflated D-values +*> and INDXP(K+1:N) points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> The permutation used to sort the contents of D into ascending +*> order. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, + $ CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INDXP, INDX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N, + $ QSIZ + REAL RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + REAL D( * ), DLAMDA( * ), GIVNUM( 2, * ), + $ Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, EIGHT = 8.0E0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + REAL C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLAPY2 + EXTERNAL ISAMAX, SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -10 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED8', -INFO ) + RETURN + END IF +* +* Need to initialize GIVPTR to O here in case of quick exit +* to prevent an unspecified code behavior (usually sigfault) +* when IWORK array on entry to *stedc is not zeroed +* (or at least some IWORK entries which used in *laed7 for GIVPTR). +* + GIVPTR = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL SSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL SSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerence +* + IMAX = ISAMAX( N, Z, 1 ) + JMAX = ISAMAX( N, D, 1 ) + EPS = SLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + IF( ICOMPQ.EQ.0 ) THEN + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + 50 CONTINUE + ELSE + DO 60 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 60 CONTINUE + CALL SLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), + $ LDQ ) + END IF + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + K2 = N + 1 + DO 70 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 110 + ELSE + JLAM = J + GO TO 80 + END IF + 70 CONTINUE + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 100 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + IF( ICOMPQ.EQ.1 ) THEN + CALL SROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + END IF + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 90 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 90 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 80 + 100 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 110 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + IF( ICOMPQ.EQ.0 ) THEN + DO 120 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + 120 CONTINUE + ELSE + DO 130 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 130 CONTINUE + END IF +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + ELSE + CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL SLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, + $ Q( 1, K+1 ), LDQ ) + END IF + END IF +* + RETURN +* +* End of SLAED8 +* + END diff --git a/dspl/liblapack/SRC/slaed9.f b/dspl/liblapack/SRC/slaed9.f new file mode 100644 index 0000000..780d265 --- /dev/null +++ b/dspl/liblapack/SRC/slaed9.f @@ -0,0 +1,294 @@ +*> \brief \b SLAED9 used by sstedc. Finds the roots of the secular equation and updates the eigenvectors. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAED9 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, +* S, LDS, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N +* REAL RHO +* .. +* .. Array Arguments .. +* REAL D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), +* $ W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAED9 finds the roots of the secular equation, as defined by the +*> values in D, Z, and RHO, between KSTART and KSTOP. It makes the +*> appropriate calls to SLAED4 and then stores the new matrix of +*> eigenvectors for use in calculating the next level of Z vectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved by +*> SLAED4. K >= 0. +*> \endverbatim +*> +*> \param[in] KSTART +*> \verbatim +*> KSTART is INTEGER +*> \endverbatim +*> +*> \param[in] KSTOP +*> \verbatim +*> KSTOP is INTEGER +*> The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP +*> are to be computed. 1 <= KSTART <= KSTOP <= K. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the Q matrix. +*> N >= K (delation may result in N > K). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> D(I) contains the updated eigenvalues +*> for KSTART <= I <= KSTOP. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max( 1, N ). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The value of the parameter in the rank one update equation. +*> RHO >= 0 required. +*> \endverbatim +*> +*> \param[in] DLAMDA +*> \verbatim +*> DLAMDA is REAL array, dimension (K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating vector. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (LDS, K) +*> Will contain the eigenvectors of the repaired matrix which +*> will be stored for subsequent Z vector calculation and +*> multiplied by the previously accumulated eigenvectors +*> to update the system. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of S. LDS >= max( 1, K ). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W, + $ S, LDS, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, KSTART, KSTOP, LDQ, LDS, N + REAL RHO +* .. +* .. Array Arguments .. + REAL D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ), + $ W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + REAL TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAED4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( K.LT.0 ) THEN + INFO = -1 + ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN + INFO = -2 + ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.K ) THEN + INFO = -4 + ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDS.LT.MAX( 1, K ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAED9', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) + $ RETURN +* +* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), +* which on any of these machines zeros out the bottommost +* bit of DLAMDA(I) if it is 1; this makes the subsequent +* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DLAMDA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DLAMDA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, N + DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I ) + 10 CONTINUE +* + DO 20 J = KSTART, KSTOP + CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO ) +* +* If the zero finder fails, the computation is terminated. +* + IF( INFO.NE.0 ) + $ GO TO 120 + 20 CONTINUE +* + IF( K.EQ.1 .OR. K.EQ.2 ) THEN + DO 40 I = 1, K + DO 30 J = 1, K + S( J, I ) = Q( J, I ) + 30 CONTINUE + 40 CONTINUE + GO TO 120 + END IF +* +* Compute updated W. +* + CALL SCOPY( K, W, 1, S, 1 ) +* +* Initialize W(I) = Q(I,I) +* + CALL SCOPY( K, Q, LDQ+1, W, 1 ) + DO 70 J = 1, K + DO 50 I = 1, J - 1 + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 50 CONTINUE + DO 60 I = J + 1, K + W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) ) + 60 CONTINUE + 70 CONTINUE + DO 80 I = 1, K + W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) ) + 80 CONTINUE +* +* Compute eigenvectors of the modified rank-1 modification. +* + DO 110 J = 1, K + DO 90 I = 1, K + Q( I, J ) = W( I ) / Q( I, J ) + 90 CONTINUE + TEMP = SNRM2( K, Q( 1, J ), 1 ) + DO 100 I = 1, K + S( I, J ) = Q( I, J ) / TEMP + 100 CONTINUE + 110 CONTINUE +* + 120 CONTINUE + RETURN +* +* End of SLAED9 +* + END diff --git a/dspl/liblapack/SRC/slaeda.f b/dspl/liblapack/SRC/slaeda.f new file mode 100644 index 0000000..30d7a2b --- /dev/null +++ b/dspl/liblapack/SRC/slaeda.f @@ -0,0 +1,308 @@ +*> \brief \b SLAEDA used by sstedc. Computes the Z vector determining the rank-one modification of the diagonal matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAEDA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, +* GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), +* $ PRMPTR( * ), QPTR( * ) +* REAL GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAEDA computes the Z vector corresponding to the merge step in the +*> CURLVLth step of the merge process with TLVLS steps for the CURPBMth +*> problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= curlvl <= tlvls. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and incidentally the +*> size of the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is REAL array, dimension (N**2) +*> Contains the square eigenblocks from previous levels, the +*> starting positions for blocks are given by QPTR. +*> \endverbatim +*> +*> \param[in] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> Contains a list of pointers which indicate where in Q an +*> eigenblock is stored. SQRT( QPTR(i+1) - QPTR(i) ) indicates +*> the size of the block. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On output this vector contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). +*> \endverbatim +*> +*> \param[out] ZTEMP +*> \verbatim +*> ZTEMP is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, INFO, N, TLVLS +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), PERM( * ), + $ PRMPTR( * ), QPTR( * ) + REAL GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2, + $ PTR, ZPTR1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAEDA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine location of first number in second half. +* + MID = N / 2 + 1 +* +* Gather last/first rows of appropriate eigenblocks into center of Z +* + PTR = 1 +* +* Determine location of lowest level subproblem in the full storage +* scheme +* + CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these square +* roots. +* + BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) ) + DO 10 K = 1, MID - BSIZ1 - 1 + Z( K ) = ZERO + 10 CONTINUE + CALL SCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1, + $ Z( MID-BSIZ1 ), 1 ) + CALL SCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 ) + DO 20 K = MID + BSIZ2, N + Z( K ) = ZERO + 20 CONTINUE +* +* Loop through remaining levels 1 -> CURLVL applying the Givens +* rotations and permutation and then multiplying the center matrices +* against the current Z. +* + PTR = 2**TLVLS + 1 + DO 70 K = 1, CURLVL - 1 + CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1 + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + ZPTR1 = MID - PSIZ1 +* +* Apply Givens at CURR and CURR+1 +* + DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1 + CALL SROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1, + $ Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 30 CONTINUE + DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1 + CALL SROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1, + $ Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ), + $ GIVNUM( 2, I ) ) + 40 CONTINUE + PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR ) + PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 ) + DO 50 I = 0, PSIZ1 - 1 + ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 ) + 50 CONTINUE + DO 60 I = 0, PSIZ2 - 1 + ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 ) + 60 CONTINUE +* +* Multiply Blocks at CURR and CURR+1 +* +* Determine size of these matrices. We add HALF to the value of +* the SQRT in case the machine underestimates one of these +* square roots. +* + BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) ) + BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+ + $ 1 ) ) ) ) + IF( BSIZ1.GT.0 ) THEN + CALL SGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ), + $ BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 ) + END IF + CALL SCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ), + $ 1 ) + IF( BSIZ2.GT.0 ) THEN + CALL SGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ), + $ BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 ) + END IF + CALL SCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1, + $ Z( MID+BSIZ2 ), 1 ) +* + PTR = PTR + 2**( TLVLS-K ) + 70 CONTINUE +* + RETURN +* +* End of SLAEDA +* + END diff --git a/dspl/liblapack/SRC/slaein.f b/dspl/liblapack/SRC/slaein.f new file mode 100644 index 0000000..e9defea --- /dev/null +++ b/dspl/liblapack/SRC/slaein.f @@ -0,0 +1,632 @@ +*> \brief \b SLAEIN computes a specified right or left eigenvector of an upper Hessenberg matrix by inverse iteration. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, +* LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL NOINIT, RIGHTV +* INTEGER INFO, LDB, LDH, N +* REAL BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAEIN uses inverse iteration to find a right or left eigenvector +*> corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg +*> matrix H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RIGHTV +*> \verbatim +*> RIGHTV is LOGICAL +*> = .TRUE. : compute right eigenvector; +*> = .FALSE.: compute left eigenvector. +*> \endverbatim +*> +*> \param[in] NOINIT +*> \verbatim +*> NOINIT is LOGICAL +*> = .TRUE. : no initial vector supplied in (VR,VI). +*> = .FALSE.: initial vector supplied in (VR,VI). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in] WR +*> \verbatim +*> WR is REAL +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is REAL +*> The real and imaginary parts of the eigenvalue of H whose +*> corresponding right or left eigenvector is to be computed. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[in,out] VI +*> \verbatim +*> VI is REAL array, dimension (N) +*> On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain +*> a real starting vector for inverse iteration using the real +*> eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI +*> must contain the real and imaginary parts of a complex +*> starting vector for inverse iteration using the complex +*> eigenvalue (WR,WI); otherwise VR and VI need not be set. +*> On exit, if WI = 0.0 (real eigenvalue), VR contains the +*> computed real eigenvector; if WI.ne.0.0 (complex eigenvalue), +*> VR and VI contain the real and imaginary parts of the +*> computed complex eigenvector. The eigenvector is normalized +*> so that the component of largest magnitude has magnitude 1; +*> here the magnitude of a complex number (x,y) is taken to be +*> |x| + |y|. +*> VI is not referenced if WI = 0.0. +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= N+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[in] EPS3 +*> \verbatim +*> EPS3 is REAL +*> A small machine-dependent value which is used to perturb +*> close eigenvalues, and to replace zero pivots. +*> \endverbatim +*> +*> \param[in] SMLNUM +*> \verbatim +*> SMLNUM is REAL +*> A machine-dependent value close to the underflow threshold. +*> \endverbatim +*> +*> \param[in] BIGNUM +*> \verbatim +*> BIGNUM is REAL +*> A machine-dependent value close to the overflow threshold. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: inverse iteration did not converge; VR is set to the +*> last iterate, and so is VI if WI.ne.0.0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B, + $ LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + REAL BIGNUM, EPS3, SMLNUM, WI, WR +* .. +* .. Array Arguments .. + REAL B( LDB, * ), H( LDH, * ), VI( * ), VR( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TENTH + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TENTH = 1.0E-1 ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, I1, I2, I3, IERR, ITS, J + REAL ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML, + $ REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W, + $ W1, X, XI, XR, Y +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SLAPY2, SNRM2 + EXTERNAL ISAMAX, SASUM, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLADIV, SLATRS, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( REAL( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - (WR,WI)*I (except that the subdiagonal elements and +* the imaginary parts of the diagonal elements are not stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - WR + 20 CONTINUE +* + IF( WI.EQ.ZERO ) THEN +* +* Real eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 30 I = 1, N + VR( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = SNRM2( N, VR, 1 ) + CALL SSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR, + $ 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = B( I, I ) / EI + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = EI / B( I, I ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = B( J, J ) / EJ + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = EJ / B( J, J ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'T' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U**T*x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL SLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, + $ VR, SCALE, WORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = SASUM( N, VR, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + TEMP = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + DO 100 I = 2, N + VR( I ) = TEMP + 100 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = ISAMAX( N, VR, 1 ) + CALL SSCAL( N, ONE / ABS( VR( I ) ), VR, 1 ) + ELSE +* +* Complex eigenvalue. +* + IF( NOINIT ) THEN +* +* Set initial vector. +* + DO 130 I = 1, N + VR( I ) = EPS3 + VI( I ) = ZERO + 130 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + NORM = SLAPY2( SNRM2( N, VR, 1 ), SNRM2( N, VI, 1 ) ) + REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML ) + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( 2, 1 ) = -WI + DO 140 I = 2, N + B( I+1, 1 ) = ZERO + 140 CONTINUE +* + DO 170 I = 1, N - 1 + ABSBII = SLAPY2( B( I, I ), B( I+1, I ) ) + EI = H( I+1, I ) + IF( ABSBII.LT.ABS( EI ) ) THEN +* +* Interchange rows and eliminate. +* + XR = B( I, I ) / EI + XI = B( I+1, I ) / EI + B( I, I ) = EI + B( I+1, I ) = ZERO + DO 150 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - XR*TEMP + B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 150 CONTINUE + B( I+2, I ) = -WI + B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI + B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI + ELSE +* +* Eliminate without interchanging rows. +* + IF( ABSBII.EQ.ZERO ) THEN + B( I, I ) = EPS3 + B( I+1, I ) = ZERO + ABSBII = EPS3 + END IF + EI = ( EI / ABSBII ) / ABSBII + XR = B( I, I )*EI + XI = -B( I+1, I )*EI + DO 160 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J ) + 160 CONTINUE + B( I+2, I+1 ) = B( I+2, I+1 ) - WI + END IF +* +* Compute 1-norm of offdiagonal elements of i-th row. +* + WORK( I ) = SASUM( N-I, B( I, I+1 ), LDB ) + + $ SASUM( N-I, B( I+2, I ), 1 ) + 170 CONTINUE + IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 + WORK( N ) = ZERO +* + I1 = N + I2 = 1 + I3 = -1 + ELSE +* +* UL decomposition with partial pivoting of conjg(B), +* replacing zero pivots by EPS3. +* +* The imaginary part of the (i,j)-th element of U is stored in +* B(j+1,i). +* + B( N+1, N ) = WI + DO 180 J = 1, N - 1 + B( N+1, J ) = ZERO + 180 CONTINUE +* + DO 210 J = N, 2, -1 + EJ = H( J, J-1 ) + ABSBJJ = SLAPY2( B( J, J ), B( J+1, J ) ) + IF( ABSBJJ.LT.ABS( EJ ) ) THEN +* +* Interchange columns and eliminate +* + XR = B( J, J ) / EJ + XI = B( J+1, J ) / EJ + B( J, J ) = EJ + B( J+1, J ) = ZERO + DO 190 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - XR*TEMP + B( J, I ) = B( J+1, I ) - XI*TEMP + B( I, J ) = TEMP + B( J+1, I ) = ZERO + 190 CONTINUE + B( J+1, J-1 ) = WI + B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI + B( J, J-1 ) = B( J, J-1 ) - XR*WI + ELSE +* +* Eliminate without interchange. +* + IF( ABSBJJ.EQ.ZERO ) THEN + B( J, J ) = EPS3 + B( J+1, J ) = ZERO + ABSBJJ = EPS3 + END IF + EJ = ( EJ / ABSBJJ ) / ABSBJJ + XR = B( J, J )*EJ + XI = -B( J+1, J )*EJ + DO 200 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) + + $ XI*B( J+1, I ) + B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J ) + 200 CONTINUE + B( J, J-1 ) = B( J, J-1 ) + WI + END IF +* +* Compute 1-norm of offdiagonal elements of j-th column. +* + WORK( J ) = SASUM( J-1, B( 1, J ), 1 ) + + $ SASUM( J-1, B( J+1, 1 ), LDB ) + 210 CONTINUE + IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 + WORK( 1 ) = ZERO +* + I1 = 1 + I2 = N + I3 = 1 + END IF +* + DO 270 ITS = 1, N + SCALE = ONE + VMAX = ONE + VCRIT = BIGNUM +* +* Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector, +* or U**T*(xr,xi) = scale*(vr,vi) for a left eigenvector, +* overwriting (xr,xi) on (vr,vi). +* + DO 250 I = I1, I2, I3 +* + IF( WORK( I ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + SCALE = SCALE*REC + VMAX = ONE + VCRIT = BIGNUM + END IF +* + XR = VR( I ) + XI = VI( I ) + IF( RIGHTV ) THEN + DO 220 J = I + 1, N + XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J ) + XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J ) + 220 CONTINUE + ELSE + DO 230 J = 1, I - 1 + XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J ) + XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J ) + 230 CONTINUE + END IF +* + W = ABS( B( I, I ) ) + ABS( B( I+1, I ) ) + IF( W.GT.SMLNUM ) THEN + IF( W.LT.ONE ) THEN + W1 = ABS( XR ) + ABS( XI ) + IF( W1.GT.W*BIGNUM ) THEN + REC = ONE / W1 + CALL SSCAL( N, REC, VR, 1 ) + CALL SSCAL( N, REC, VI, 1 ) + XR = VR( I ) + XI = VI( I ) + SCALE = SCALE*REC + VMAX = VMAX*REC + END IF + END IF +* +* Divide by diagonal element of B. +* + CALL SLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ), + $ VI( I ) ) + VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX ) + VCRIT = BIGNUM / VMAX + ELSE + DO 240 J = 1, N + VR( J ) = ZERO + VI( J ) = ZERO + 240 CONTINUE + VR( I ) = ONE + VI( I ) = ONE + SCALE = ZERO + VMAX = ONE + VCRIT = BIGNUM + END IF + 250 CONTINUE +* +* Test for sufficient growth in the norm of (VR,VI). +* + VNORM = SASUM( N, VR, 1 ) + SASUM( N, VI, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 280 +* +* Choose a new orthogonal starting vector and try again. +* + Y = EPS3 / ( ROOTN+ONE ) + VR( 1 ) = EPS3 + VI( 1 ) = ZERO +* + DO 260 I = 2, N + VR( I ) = Y + VI( I ) = ZERO + 260 CONTINUE + VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN + 270 CONTINUE +* +* Failure to find eigenvector in N iterations +* + INFO = 1 +* + 280 CONTINUE +* +* Normalize eigenvector. +* + VNORM = ZERO + DO 290 I = 1, N + VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) ) + 290 CONTINUE + CALL SSCAL( N, ONE / VNORM, VR, 1 ) + CALL SSCAL( N, ONE / VNORM, VI, 1 ) +* + END IF +* + RETURN +* +* End of SLAEIN +* + END diff --git a/dspl/liblapack/SRC/slaev2.f b/dspl/liblapack/SRC/slaev2.f new file mode 100644 index 0000000..f4028e1 --- /dev/null +++ b/dspl/liblapack/SRC/slaev2.f @@ -0,0 +1,238 @@ +*> \brief \b SLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAEV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* .. Scalar Arguments .. +* REAL A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix +*> [ A B ] +*> [ B C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +*> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +*> eigenvector for RT1, giving the decomposition +*> +*> [ CS1 SN1 ] [ A B ] [ CS1 -SN1 ] = [ RT1 0 ] +*> [-SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is REAL +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL +*> The (1,2) element and the conjugate of the (2,1) element of +*> the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is REAL +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is REAL +*> The eigenvalue of smaller absolute value. +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is REAL +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is REAL +*> The vector (CS1, SN1) is a unit right eigenvector for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> CS1 and SN1 are accurate to a few ulps barring over/underflow. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL A, B, C, CS1, RT1, RT2, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) +* .. +* .. Local Scalars .. + INTEGER SGN1, SGN2 + REAL AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM, + $ TB, TN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* +* Compute the eigenvalues +* + SM = A + C + DF = A - C + ADF = ABS( DF ) + TB = B + B + AB = ABS( TB ) + IF( ABS( A ).GT.ABS( C ) ) THEN + ACMX = A + ACMN = C + ELSE + ACMX = C + ACMN = A + END IF + IF( ADF.GT.AB ) THEN + RT = ADF*SQRT( ONE+( AB / ADF )**2 ) + ELSE IF( ADF.LT.AB ) THEN + RT = AB*SQRT( ONE+( ADF / AB )**2 ) + ELSE +* +* Includes case AB=ADF=0 +* + RT = AB*SQRT( TWO ) + END IF + IF( SM.LT.ZERO ) THEN + RT1 = HALF*( SM-RT ) + SGN1 = -1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE IF( SM.GT.ZERO ) THEN + RT1 = HALF*( SM+RT ) + SGN1 = 1 +* +* Order of execution important. +* To get fully accurate smaller eigenvalue, +* next line needs to be executed in higher precision. +* + RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B + ELSE +* +* Includes case RT1 = RT2 = 0 +* + RT1 = HALF*RT + RT2 = -HALF*RT + SGN1 = 1 + END IF +* +* Compute the eigenvector +* + IF( DF.GE.ZERO ) THEN + CS = DF + RT + SGN2 = 1 + ELSE + CS = DF - RT + SGN2 = -1 + END IF + ACS = ABS( CS ) + IF( ACS.GT.AB ) THEN + CT = -TB / CS + SN1 = ONE / SQRT( ONE+CT*CT ) + CS1 = CT*SN1 + ELSE + IF( AB.EQ.ZERO ) THEN + CS1 = ONE + SN1 = ZERO + ELSE + TN = -CS / TB + CS1 = ONE / SQRT( ONE+TN*TN ) + SN1 = TN*CS1 + END IF + END IF + IF( SGN1.EQ.SGN2 ) THEN + TN = CS1 + CS1 = -SN1 + SN1 = TN + END IF + RETURN +* +* End of SLAEV2 +* + END diff --git a/dspl/liblapack/SRC/slaexc.f b/dspl/liblapack/SRC/slaexc.f new file mode 100644 index 0000000..7e3c6c4 --- /dev/null +++ b/dspl/liblapack/SRC/slaexc.f @@ -0,0 +1,435 @@ +*> \brief \b SLAEXC swaps adjacent diagonal blocks of a real upper quasi-triangular matrix in Schur canonical form, by an orthogonal similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ +* INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. +* REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in +*> an upper quasi-triangular matrix T by an orthogonal similarity +*> transformation. +*> +*> T must be in Schur canonical form, that is, block upper triangular +*> with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block +*> has its diagonal elemnts equal and its off-diagonal elements of +*> opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> = .TRUE. : accumulate the transformation in the matrix Q; +*> = .FALSE.: do not accumulate the transformation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> canonical form. +*> On exit, the updated matrix T, again in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if WANTQ is .TRUE., the orthogonal matrix Q. +*> On exit, if WANTQ is .TRUE., the updated matrix Q. +*> If WANTQ is .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index of the first row of the first block T11. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The order of the first block T11. N1 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> The order of the second block T22. N2 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: the transformed matrix T would be too far from Schur +*> form; the blocks are not swapped and T and Q are +*> unchanged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ + INTEGER INFO, J1, LDQ, LDT, N, N1, N2 +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TEN + PARAMETER ( TEN = 1.0E+1 ) + INTEGER LDD, LDX + PARAMETER ( LDD = 4, LDX = 2 ) +* .. +* .. Local Scalars .. + INTEGER IERR, J2, J3, J4, K, ND + REAL CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22, + $ T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2, + $ WR1, WR2, XNORM +* .. +* .. Local Arrays .. + REAL D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ), + $ X( LDX, 2 ) +* .. +* .. External Functions .. + REAL SLAMCH, SLANGE + EXTERNAL SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLANV2, SLARFG, SLARFX, SLARTG, SLASY2, + $ SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN + IF( J1+N1.GT.N ) + $ RETURN +* + J2 = J1 + 1 + J3 = J1 + 2 + J4 = J1 + 3 +* + IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + T11 = T( J1, J1 ) + T22 = T( J2, J2 ) +* +* Determine the transformation to perform the interchange. +* + CALL SLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( J3.LE.N ) + $ CALL SROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS, + $ SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) +* + T( J1, J1 ) = T22 + T( J2, J2 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + ELSE +* +* Swapping involves at least one 2-by-2 block. +* +* Copy the diagonal block of order N1+N2 to the local array D +* and compute its norm. +* + ND = N1 + N2 + CALL SLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD ) + DNORM = SLANGE( 'Max', ND, ND, D, LDD, WORK ) +* +* Compute machine-dependent threshold for test for accepting +* swap. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + THRESH = MAX( TEN*EPS*DNORM, SMLNUM ) +* +* Solve T11*X - X*T22 = scale*T12 for X. +* + CALL SLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD, + $ D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X, + $ LDX, XNORM, IERR ) +* +* Swap the adjacent diagonal blocks. +* + K = N1 + N1 + N2 - 3 + GO TO ( 10, 20, 30 )K +* + 10 CONTINUE +* +* N1 = 1, N2 = 2: generate elementary reflector H so that: +* +* ( scale, X11, X12 ) H = ( 0, 0, * ) +* + U( 1 ) = SCALE + U( 2 ) = X( 1, 1 ) + U( 3 ) = X( 1, 2 ) + CALL SLARFG( 3, U( 3 ), U, 1, TAU ) + U( 3 ) = ONE + T11 = T( J1, J1 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3, + $ 3 )-T11 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J3, J3 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 20 CONTINUE +* +* N1 = 2, N2 = 1: generate elementary reflector H so that: +* +* H ( -X11 ) = ( * ) +* ( -X21 ) = ( 0 ) +* ( scale ) = ( 0 ) +* + U( 1 ) = -X( 1, 1 ) + U( 2 ) = -X( 2, 1 ) + U( 3 ) = SCALE + CALL SLARFG( 3, U( 1 ), U( 2 ), 1, TAU ) + U( 1 ) = ONE + T33 = T( J3, J3 ) +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK ) + CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1, + $ 1 )-T33 ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK ) +* + T( J1, J1 ) = T33 + T( J2, J1 ) = ZERO + T( J3, J1 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK ) + END IF + GO TO 40 +* + 30 CONTINUE +* +* N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so +* that: +* +* H(2) H(1) ( -X11 -X12 ) = ( * * ) +* ( -X21 -X22 ) ( 0 * ) +* ( scale 0 ) ( 0 0 ) +* ( 0 scale ) ( 0 0 ) +* + U1( 1 ) = -X( 1, 1 ) + U1( 2 ) = -X( 2, 1 ) + U1( 3 ) = SCALE + CALL SLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 ) + U1( 1 ) = ONE +* + TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) ) + U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 ) + U2( 2 ) = -TEMP*U1( 3 ) + U2( 3 ) = SCALE + CALL SLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 ) + U2( 1 ) = ONE +* +* Perform swap provisionally on diagonal block in D. +* + CALL SLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK ) + CALL SLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK ) + CALL SLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK ) +* +* Test whether to reject swap. +* + IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ), + $ ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50 +* +* Accept swap: apply transformation to the entire matrix T. +* + CALL SLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK ) + CALL SLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK ) + CALL SLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK ) +* + T( J3, J1 ) = ZERO + T( J3, J2 ) = ZERO + T( J4, J1 ) = ZERO + T( J4, J2 ) = ZERO +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL SLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK ) + CALL SLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK ) + END IF +* + 40 CONTINUE +* + IF( N2.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T11 +* + CALL SLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ), + $ T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN ) + CALL SROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT, + $ CS, SN ) + CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN ) + END IF +* + IF( N1.EQ.2 ) THEN +* +* Standardize new 2-by-2 block T22 +* + J3 = J1 + N2 + J4 = J3 + 1 + CALL SLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ), + $ T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN ) + IF( J3+2.LE.N ) + $ CALL SROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ), + $ LDT, CS, SN ) + CALL SROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN ) + END IF +* + END IF + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 50 INFO = 1 + RETURN +* +* End of SLAEXC +* + END diff --git a/dspl/liblapack/SRC/slag2.f b/dspl/liblapack/SRC/slag2.f new file mode 100644 index 0000000..ac16fb1 --- /dev/null +++ b/dspl/liblapack/SRC/slag2.f @@ -0,0 +1,379 @@ +*> \brief \b SLAG2 computes the eigenvalues of a 2-by-2 generalized eigenvalue problem, with scaling as necessary to avoid over-/underflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAG2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, +* WR2, WI ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB +* REAL SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue +*> problem A - w B, with scaling as necessary to avoid over-/underflow. +*> +*> The scaling factor "s" results in a modified eigenvalue equation +*> +*> s A - w B +*> +*> where s is a non-negative scaling factor chosen so that w, w B, +*> and s A do not overflow and, if possible, do not underflow, either. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA, 2) +*> On entry, the 2 x 2 matrix A. It is assumed that its 1-norm +*> is less than 1/SAFMIN. Entries less than +*> sqrt(SAFMIN)*norm(A) are subject to being treated as zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= 2. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB, 2) +*> On entry, the 2 x 2 upper triangular matrix B. It is +*> assumed that the one-norm of B is less than 1/SAFMIN. The +*> diagonals should be at least sqrt(SAFMIN) times the largest +*> element of B (in absolute value); if a diagonal is smaller +*> than that, then +/- sqrt(SAFMIN) will be used instead of +*> that diagonal. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= 2. +*> \endverbatim +*> +*> \param[in] SAFMIN +*> \verbatim +*> SAFMIN is REAL +*> The smallest positive number s.t. 1/SAFMIN does not +*> overflow. (This should always be SLAMCH('S') -- it is an +*> argument in order to avoid having to call SLAMCH frequently.) +*> \endverbatim +*> +*> \param[out] SCALE1 +*> \verbatim +*> SCALE1 is REAL +*> A scaling factor used to avoid over-/underflow in the +*> eigenvalue equation which defines the first eigenvalue. If +*> the eigenvalues are complex, then the eigenvalues are +*> ( WR1 +/- WI i ) / SCALE1 (which may lie outside the +*> exponent range of the machine), SCALE1=SCALE2, and SCALE1 +*> will always be positive. If the eigenvalues are real, then +*> the first (real) eigenvalue is WR1 / SCALE1 , but this may +*> overflow or underflow, and in fact, SCALE1 may be zero or +*> less than the underflow threshold if the exact eigenvalue +*> is sufficiently large. +*> \endverbatim +*> +*> \param[out] SCALE2 +*> \verbatim +*> SCALE2 is REAL +*> A scaling factor used to avoid over-/underflow in the +*> eigenvalue equation which defines the second eigenvalue. If +*> the eigenvalues are complex, then SCALE2=SCALE1. If the +*> eigenvalues are real, then the second (real) eigenvalue is +*> WR2 / SCALE2 , but this may overflow or underflow, and in +*> fact, SCALE2 may be zero or less than the underflow +*> threshold if the exact eigenvalue is sufficiently large. +*> \endverbatim +*> +*> \param[out] WR1 +*> \verbatim +*> WR1 is REAL +*> If the eigenvalue is real, then WR1 is SCALE1 times the +*> eigenvalue closest to the (2,2) element of A B**(-1). If the +*> eigenvalue is complex, then WR1=WR2 is SCALE1 times the real +*> part of the eigenvalues. +*> \endverbatim +*> +*> \param[out] WR2 +*> \verbatim +*> WR2 is REAL +*> If the eigenvalue is real, then WR2 is SCALE2 times the +*> other eigenvalue. If the eigenvalue is complex, then +*> WR1=WR2 is SCALE1 times the real part of the eigenvalues. +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL +*> If the eigenvalue is real, then WI is zero. If the +*> eigenvalue is complex, then WI is SCALE1 times the imaginary +*> part of the eigenvalues. WI will always be non-negative. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, + $ WR2, WI ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + REAL SAFMIN, SCALE1, SCALE2, WI, WR1, WR2 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) + REAL HALF + PARAMETER ( HALF = ONE / TWO ) + REAL FUZZY1 + PARAMETER ( FUZZY1 = ONE+1.0E-5 ) +* .. +* .. Local Scalars .. + REAL A11, A12, A21, A22, ABI22, ANORM, AS11, AS12, + $ AS22, ASCALE, B11, B12, B22, BINV11, BINV22, + $ BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5, + $ DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2, + $ SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET, + $ WSCALE, WSIZE, WSMALL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + RTMIN = SQRT( SAFMIN ) + RTMAX = ONE / RTMIN + SAFMAX = ONE / SAFMIN +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A11 = ASCALE*A( 1, 1 ) + A21 = ASCALE*A( 2, 1 ) + A12 = ASCALE*A( 1, 2 ) + A22 = ASCALE*A( 2, 2 ) +* +* Perturb B if necessary to insure non-singularity +* + B11 = B( 1, 1 ) + B12 = B( 1, 2 ) + B22 = B( 2, 2 ) + BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN ) + IF( ABS( B11 ).LT.BMIN ) + $ B11 = SIGN( BMIN, B11 ) + IF( ABS( B22 ).LT.BMIN ) + $ B22 = SIGN( BMIN, B22 ) +* +* Scale B +* + BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN ) + BSIZE = MAX( ABS( B11 ), ABS( B22 ) ) + BSCALE = ONE / BSIZE + B11 = B11*BSCALE + B12 = B12*BSCALE + B22 = B22*BSCALE +* +* Compute larger eigenvalue by method described by C. van Loan +* +* ( AS is A shifted by -SHIFT*B ) +* + BINV11 = ONE / B11 + BINV22 = ONE / B22 + S1 = A11*BINV11 + S2 = A22*BINV22 + IF( ABS( S1 ).LE.ABS( S2 ) ) THEN + AS12 = A12 - S1*B12 + AS22 = A22 - S1*B22 + SS = A21*( BINV11*BINV22 ) + ABI22 = AS22*BINV22 - SS*B12 + PP = HALF*ABI22 + SHIFT = S1 + ELSE + AS12 = A12 - S2*B12 + AS11 = A11 - S2*B11 + SS = A21*( BINV11*BINV22 ) + ABI22 = -SS*B12 + PP = HALF*( AS11*BINV11+ABI22 ) + SHIFT = S2 + END IF + QQ = SS*AS12 + IF( ABS( PP*RTMIN ).GE.ONE ) THEN + DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN + R = SQRT( ABS( DISCR ) )*RTMAX + ELSE + IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN + DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX + R = SQRT( ABS( DISCR ) )*RTMIN + ELSE + DISCR = PP**2 + QQ + R = SQRT( ABS( DISCR ) ) + END IF + END IF +* +* Note: the test of R in the following IF is to cover the case when +* DISCR is small and negative and is flushed to zero during +* the calculation of R. On machines which have a consistent +* flush-to-zero threshold and handle numbers above that +* threshold correctly, it would not be necessary. +* + IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN + SUM = PP + SIGN( R, PP ) + DIFF = PP - SIGN( R, PP ) + WBIG = SHIFT + SUM +* +* Compute smaller eigenvalue +* + WSMALL = SHIFT + DIFF + IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN + WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 ) + WSMALL = WDET / WBIG + END IF +* +* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1) +* for WR1. +* + IF( PP.GT.ABI22 ) THEN + WR1 = MIN( WBIG, WSMALL ) + WR2 = MAX( WBIG, WSMALL ) + ELSE + WR1 = MAX( WBIG, WSMALL ) + WR2 = MIN( WBIG, WSMALL ) + END IF + WI = ZERO + ELSE +* +* Complex eigenvalues +* + WR1 = SHIFT + PP + WR2 = WR1 + WI = R + END IF +* +* Further scaling to avoid underflow and overflow in computing +* SCALE1 and overflow in computing w*B. +* +* This scale factor (WSCALE) is bounded from above using C1 and C2, +* and from below using C3 and C4. +* C1 implements the condition s A must never overflow. +* C2 implements the condition w B must never overflow. +* C3, with C2, +* implement the condition that s A - w B must never overflow. +* C4 implements the condition s should not underflow. +* C5 implements the condition max(s,|w|) should be at least 2. +* + C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) ) + C2 = SAFMIN*MAX( ONE, BNORM ) + C3 = BSIZE*SAFMIN + IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN + C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE ) + ELSE + C4 = ONE + END IF + IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN + C5 = MIN( ONE, ASCALE*BSIZE ) + ELSE + C5 = ONE + END IF +* +* Scale first eigenvalue +* + WABS = ABS( WR1 ) + ABS( WI ) + WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ), + $ MIN( C4, HALF*MAX( WABS, C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR1 = WR1*WSCALE + IF( WI.NE.ZERO ) THEN + WI = WI*WSCALE + WR2 = WR1 + SCALE2 = SCALE1 + END IF + ELSE + SCALE1 = ASCALE*BSIZE + SCALE2 = SCALE1 + END IF +* +* Scale second eigenvalue (if real) +* + IF( WI.EQ.ZERO ) THEN + WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ), + $ MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) ) + IF( WSIZE.NE.ONE ) THEN + WSCALE = ONE / WSIZE + IF( WSIZE.GT.ONE ) THEN + SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )* + $ MIN( ASCALE, BSIZE ) + ELSE + SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )* + $ MAX( ASCALE, BSIZE ) + END IF + WR2 = WR2*WSCALE + ELSE + SCALE2 = ASCALE*BSIZE + END IF + END IF +* +* End of SLAG2 +* + RETURN + END diff --git a/dspl/liblapack/SRC/slag2d.f b/dspl/liblapack/SRC/slag2d.f new file mode 100644 index 0000000..01515eb --- /dev/null +++ b/dspl/liblapack/SRC/slag2d.f @@ -0,0 +1,137 @@ +*> \brief \b SLAG2D converts a single precision matrix to a double precision matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAG2D + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAG2D( M, N, SA, LDSA, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. +* REAL SA( LDSA, * ) +* DOUBLE PRECISION A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAG2D converts a SINGLE PRECISION matrix, SA, to a DOUBLE +*> PRECISION matrix, A. +*> +*> Note that while it is possible to overflow while converting +*> from double to single, it is not possible to overflow when +*> converting from single to double. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of lines of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL array, dimension (LDSA,N) +*> On entry, the M-by-N coefficient matrix SA. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On exit, the M-by-N coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAG2D( M, N, SA, LDSA, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. + REAL SA( LDSA, * ) + DOUBLE PRECISION A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + INFO = 0 + DO 20 J = 1, N + DO 10 I = 1, M + A( I, J ) = SA( I, J ) + 10 CONTINUE + 20 CONTINUE + RETURN +* +* End of SLAG2D +* + END diff --git a/dspl/liblapack/SRC/slags2.f b/dspl/liblapack/SRC/slags2.f new file mode 100644 index 0000000..4076f39 --- /dev/null +++ b/dspl/liblapack/SRC/slags2.f @@ -0,0 +1,362 @@ +*> \brief \b SLAGS2 computes 2-by-2 orthogonal matrices U, V, and Q, and applies them to matrices A and B such that the rows of the transformed A and B are parallel. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, +* SNV, CSQ, SNQ ) +* +* .. Scalar Arguments .. +* LOGICAL UPPER +* REAL A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, +* $ SNU, SNV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such +*> that if ( UPPER ) then +*> +*> U**T *A*Q = U**T *( A1 A2 )*Q = ( x 0 ) +*> ( 0 A3 ) ( x x ) +*> and +*> V**T*B*Q = V**T *( B1 B2 )*Q = ( x 0 ) +*> ( 0 B3 ) ( x x ) +*> +*> or if ( .NOT.UPPER ) then +*> +*> U**T *A*Q = U**T *( A1 0 )*Q = ( x x ) +*> ( A2 A3 ) ( 0 x ) +*> and +*> V**T*B*Q = V**T*( B1 0 )*Q = ( x x ) +*> ( B2 B3 ) ( 0 x ) +*> +*> The rows of the transformed A and B are parallel, where +*> +*> U = ( CSU SNU ), V = ( CSV SNV ), Q = ( CSQ SNQ ) +*> ( -SNU CSU ) ( -SNV CSV ) ( -SNQ CSQ ) +*> +*> Z**T denotes the transpose of Z. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPPER +*> \verbatim +*> UPPER is LOGICAL +*> = .TRUE.: the input matrices A and B are upper triangular. +*> = .FALSE.: the input matrices A and B are lower triangular. +*> \endverbatim +*> +*> \param[in] A1 +*> \verbatim +*> A1 is REAL +*> \endverbatim +*> +*> \param[in] A2 +*> \verbatim +*> A2 is REAL +*> \endverbatim +*> +*> \param[in] A3 +*> \verbatim +*> A3 is REAL +*> On entry, A1, A2 and A3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix A. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is REAL +*> \endverbatim +*> +*> \param[in] B2 +*> \verbatim +*> B2 is REAL +*> \endverbatim +*> +*> \param[in] B3 +*> \verbatim +*> B3 is REAL +*> On entry, B1, B2 and B3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix B. +*> \endverbatim +*> +*> \param[out] CSU +*> \verbatim +*> CSU is REAL +*> \endverbatim +*> +*> \param[out] SNU +*> \verbatim +*> SNU is REAL +*> The desired orthogonal matrix U. +*> \endverbatim +*> +*> \param[out] CSV +*> \verbatim +*> CSV is REAL +*> \endverbatim +*> +*> \param[out] SNV +*> \verbatim +*> SNV is REAL +*> The desired orthogonal matrix V. +*> \endverbatim +*> +*> \param[out] CSQ +*> \verbatim +*> CSQ is REAL +*> \endverbatim +*> +*> \param[out] SNQ +*> \verbatim +*> SNQ is REAL +*> The desired orthogonal matrix Q. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL UPPER + REAL A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ, + $ SNU, SNV +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12, + $ AVB21, AVB22, CSL, CSR, D, S1, S2, SNL, + $ SNR, UA11R, UA22R, VB11R, VB22R, B, C, R, UA11, + $ UA12, UA21, UA22, VB11, VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL SLARTG, SLASV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U**T *A and V**T *B, +* and (1,2) element of |U|**T *|A| and |V|**T *|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + SNR*B3 +* + AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U**T *A and V**T *B +* + IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS( VB12 ) ) ) THEN + CALL SLARTG( -UA11R, UA12, CSQ, SNQ, R ) + ELSE + CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R ) + END IF +* + CSU = CSL + SNU = -SNL + CSV = CSR + SNV = -SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U**T *A and V**T *B, +* and (2,2) element of |U|**T *|A| and |V|**T *|B|. +* + UA21 = -SNL*A1 + UA22 = -SNL*A2 + CSL*A3 +* + VB21 = -SNR*B1 + VB22 = -SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U**T*A and V**T*B, and then swap. +* + IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN + IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 / + $ ( ABS( VB21 )+ABS( VB22 ) ) ) THEN + CALL SLARTG( -UA21, UA22, CSQ, SNQ, R ) + ELSE + CALL SLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( -VB21, VB22, CSQ, SNQ, R ) + END IF +* + CSU = SNL + SNU = CSL + CSV = SNR + SNV = CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 +* +* The SVD of real 2-by-2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL SLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U**T *A and V**T *B, +* and (2,1) element of |U|**T *|A| and |V|**T *|B|. +* + UA21 = -SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 ) +* +* zero (2,1) elements of U**T *A and V**T *B. +* + IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN + IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS( VB21 )+ABS( VB22R ) ) ) THEN + CALL SLARTG( UA22R, UA21, CSQ, SNQ, R ) + ELSE + CALL SLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( VB22R, VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -SNR + CSV = CSL + SNV = -SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U**T *A and V**T *B, +* and (1,1) element of |U|**T *|A| and |V|**T *|B|. +* + UA11 = CSR*A1 + SNR*A2 + UA12 = SNR*A3 +* + VB11 = CSL*B1 + SNL*B2 + VB12 = SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 ) +* +* zero (1,1) elements of U**T*A and V**T*B, and then swap. +* + IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN + IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 / + $ ( ABS( VB11 )+ABS( VB12 ) ) ) THEN + CALL SLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL SLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF + ELSE + CALL SLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = CSR + CSV = SNL + SNV = CSL +* + END IF +* + END IF +* + RETURN +* +* End of SLAGS2 +* + END diff --git a/dspl/liblapack/SRC/slagtf.f b/dspl/liblapack/SRC/slagtf.f new file mode 100644 index 0000000..d3f0b68 --- /dev/null +++ b/dspl/liblapack/SRC/slagtf.f @@ -0,0 +1,266 @@ +*> \brief \b SLAGTF computes an LU factorization of a matrix T-λI, where T is a general tridiagonal matrix, and λ a scalar, using partial pivoting with row interchanges. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAGTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* REAL LAMBDA, TOL +* .. +* .. Array Arguments .. +* INTEGER IN( * ) +* REAL A( * ), B( * ), C( * ), D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAGTF factorizes the matrix (T - lambda*I), where T is an n by n +*> tridiagonal matrix and lambda is a scalar, as +*> +*> T - lambda*I = PLU, +*> +*> where P is a permutation matrix, L is a unit lower tridiagonal matrix +*> with at most one non-zero sub-diagonal elements per column and U is +*> an upper triangular matrix with at most two non-zero super-diagonal +*> elements per column. +*> +*> The factorization is obtained by Gaussian elimination with partial +*> pivoting and implicit row scaling. +*> +*> The parameter LAMBDA is included in the routine so that SLAGTF may +*> be used, in conjunction with SLAGTS, to obtain eigenvectors of T by +*> inverse iteration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (N) +*> On entry, A must contain the diagonal elements of T. +*> +*> On exit, A is overwritten by the n diagonal elements of the +*> upper triangular matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is REAL +*> On entry, the scalar lambda. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (N-1) +*> On entry, B must contain the (n-1) super-diagonal elements of +*> T. +*> +*> On exit, B is overwritten by the (n-1) super-diagonal +*> elements of the matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (N-1) +*> On entry, C must contain the (n-1) sub-diagonal elements of +*> T. +*> +*> On exit, C is overwritten by the (n-1) sub-diagonal elements +*> of the matrix L of the factorization of T. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> On entry, a relative tolerance used to indicate whether or +*> not the matrix (T - lambda*I) is nearly singular. TOL should +*> normally be chose as approximately the largest relative error +*> in the elements of T. For example, if the elements of T are +*> correct to about 4 significant figures, then TOL should be +*> set to about 5*10**(-4). If TOL is supplied as less than eps, +*> where eps is the relative machine precision, then the value +*> eps is used in place of TOL. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N-2) +*> On exit, D is overwritten by the (n-2) second super-diagonal +*> elements of the matrix U of the factorization of T. +*> \endverbatim +*> +*> \param[out] IN +*> \verbatim +*> IN is INTEGER array, dimension (N) +*> On exit, IN contains details of the permutation matrix P. If +*> an interchange occurred at the kth step of the elimination, +*> then IN(k) = 1, otherwise IN(k) = 0. The element IN(n) +*> returns the smallest positive integer j such that +*> +*> abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL, +*> +*> where norm( A(j) ) denotes the sum of the absolute values of +*> the jth row of the matrix A. If no such j exists then IN(n) +*> is returned as zero. If IN(n) is returned as positive, then a +*> diagonal element of U is small, indicating that +*> (T - lambda*I) is singular or nearly singular, +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> .lt. 0: if INFO = -k, the kth argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL LAMBDA, TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + REAL A( * ), B( * ), C( * ), D( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER K + REAL EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLAGTF', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + A( 1 ) = A( 1 ) - LAMBDA + IN( N ) = 0 + IF( N.EQ.1 ) THEN + IF( A( 1 ).EQ.ZERO ) + $ IN( 1 ) = 1 + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* + TL = MAX( TOL, EPS ) + SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) ) + DO 10 K = 1, N - 1 + A( K+1 ) = A( K+1 ) - LAMBDA + SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) ) + IF( K.LT.( N-1 ) ) + $ SCALE2 = SCALE2 + ABS( B( K+1 ) ) + IF( A( K ).EQ.ZERO ) THEN + PIV1 = ZERO + ELSE + PIV1 = ABS( A( K ) ) / SCALE1 + END IF + IF( C( K ).EQ.ZERO ) THEN + IN( K ) = 0 + PIV2 = ZERO + SCALE1 = SCALE2 + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + PIV2 = ABS( C( K ) ) / SCALE2 + IF( PIV2.LE.PIV1 ) THEN + IN( K ) = 0 + SCALE1 = SCALE2 + C( K ) = C( K ) / A( K ) + A( K+1 ) = A( K+1 ) - C( K )*B( K ) + IF( K.LT.( N-1 ) ) + $ D( K ) = ZERO + ELSE + IN( K ) = 1 + MULT = A( K ) / C( K ) + A( K ) = C( K ) + TEMP = A( K+1 ) + A( K+1 ) = B( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + D( K ) = B( K+1 ) + B( K+1 ) = -MULT*D( K ) + END IF + B( K ) = TEMP + C( K ) = MULT + END IF + END IF + IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = K + 10 CONTINUE + IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) ) + $ IN( N ) = N +* + RETURN +* +* End of SLAGTF +* + END diff --git a/dspl/liblapack/SRC/slagtm.f b/dspl/liblapack/SRC/slagtm.f new file mode 100644 index 0000000..e423dc9 --- /dev/null +++ b/dspl/liblapack/SRC/slagtm.f @@ -0,0 +1,278 @@ +*> \brief \b SLAGTM performs a matrix-matrix product of the form C = αAB+βC, where A is a tridiagonal matrix, B and C are rectangular matrices, and α and β are scalars, which may be 0, 1, or -1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAGTM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER LDB, LDX, N, NRHS +* REAL ALPHA, BETA +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), D( * ), DL( * ), DU( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAGTM performs a matrix-vector product of the form +*> +*> B := alpha * A * X + beta * B +*> +*> where A is a tridiagonal matrix of order N, B and X are N by NRHS +*> matrices, and alpha and beta are real scalars, each of which may be +*> 0., 1., or -1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': No transpose, B := alpha * A * X + beta * B +*> = 'T': Transpose, B := alpha * A'* X + beta * B +*> = 'C': Conjugate transpose = Transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices X and B. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) sub-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of T. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) super-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> The N by NRHS matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(N,1). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> The scalar beta. BETA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix B. +*> On exit, B is overwritten by the matrix expression +*> B := alpha * A * X + beta * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(N,1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE +* +* Compute B := B + A**T*X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + ELSE +* +* Compute B := B - A**T*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + RETURN +* +* End of SLAGTM +* + END diff --git a/dspl/liblapack/SRC/slagts.f b/dspl/liblapack/SRC/slagts.f new file mode 100644 index 0000000..0c3c523 --- /dev/null +++ b/dspl/liblapack/SRC/slagts.f @@ -0,0 +1,383 @@ +*> \brief \b SLAGTS solves the system of equations (T-λI)x = y or (T-λI)Tx = y,where T is a general tridiagonal matrix and λ a scalar, using the LU factorization computed by slagtf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAGTS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, JOB, N +* REAL TOL +* .. +* .. Array Arguments .. +* INTEGER IN( * ) +* REAL A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAGTS may be used to solve one of the systems of equations +*> +*> (T - lambda*I)*x = y or (T - lambda*I)**T*x = y, +*> +*> where T is an n by n tridiagonal matrix, for x, following the +*> factorization of (T - lambda*I) as +*> +*> (T - lambda*I) = P*L*U , +*> +*> by routine SLAGTF. The choice of equation to be solved is +*> controlled by the argument JOB, and in each case there is an option +*> to perturb zero or very small diagonal elements of U, this option +*> being intended for use in applications such as inverse iteration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> Specifies the job to be performed by SLAGTS as follows: +*> = 1: The equations (T - lambda*I)x = y are to be solved, +*> but diagonal elements of U are not to be perturbed. +*> = -1: The equations (T - lambda*I)x = y are to be solved +*> and, if overflow would otherwise occur, the diagonal +*> elements of U are to be perturbed. See argument TOL +*> below. +*> = 2: The equations (T - lambda*I)**Tx = y are to be solved, +*> but diagonal elements of U are not to be perturbed. +*> = -2: The equations (T - lambda*I)**Tx = y are to be solved +*> and, if overflow would otherwise occur, the diagonal +*> elements of U are to be perturbed. See argument TOL +*> below. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (N) +*> On entry, A must contain the diagonal elements of U as +*> returned from SLAGTF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (N-1) +*> On entry, B must contain the first super-diagonal elements of +*> U as returned from SLAGTF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N-1) +*> On entry, C must contain the sub-diagonal elements of L as +*> returned from SLAGTF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N-2) +*> On entry, D must contain the second super-diagonal elements +*> of U as returned from SLAGTF. +*> \endverbatim +*> +*> \param[in] IN +*> \verbatim +*> IN is INTEGER array, dimension (N) +*> On entry, IN must contain details of the matrix P as returned +*> from SLAGTF. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, dimension (N) +*> On entry, the right hand side vector y. +*> On exit, Y is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[in,out] TOL +*> \verbatim +*> TOL is REAL +*> On entry, with JOB .lt. 0, TOL should be the minimum +*> perturbation to be made to very small diagonal elements of U. +*> TOL should normally be chosen as about eps*norm(U), where eps +*> is the relative machine precision, but if TOL is supplied as +*> non-positive, then it is reset to eps*max( abs( u(i,j) ) ). +*> If JOB .gt. 0 then TOL is not referenced. +*> +*> On exit, TOL is changed as described above, only if TOL is +*> non-positive on entry. Otherwise TOL is unchanged. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> .lt. 0: if INFO = -i, the i-th argument had an illegal value +*> .gt. 0: overflow would occur when computing the INFO(th) +*> element of the solution vector x. This can only occur +*> when JOB is supplied as positive and either means +*> that a diagonal element of U is very small, or that +*> the elements of the right-hand side vector y are very +*> large. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, JOB, N + REAL TOL +* .. +* .. Array Arguments .. + INTEGER IN( * ) + REAL A( * ), B( * ), C( * ), D( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER K + REAL ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAGTS', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + EPS = SLAMCH( 'Epsilon' ) + SFMIN = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SFMIN +* + IF( JOB.LT.0 ) THEN + IF( TOL.LE.ZERO ) THEN + TOL = ABS( A( 1 ) ) + IF( N.GT.1 ) + $ TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) ) + DO 10 K = 3, N + TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ), + $ ABS( D( K-2 ) ) ) + 10 CONTINUE + TOL = TOL*EPS + IF( TOL.EQ.ZERO ) + $ TOL = EPS + END IF + END IF +* + IF( ABS( JOB ).EQ.1 ) THEN + DO 20 K = 2, N + IF( IN( K-1 ).EQ.0 ) THEN + Y( K ) = Y( K ) - C( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 20 CONTINUE + IF( JOB.EQ.1 ) THEN + DO 30 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 30 CONTINUE + ELSE + DO 50 K = N, 1, -1 + IF( K.LE.N-2 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 ) + ELSE IF( K.EQ.N-1 ) THEN + TEMP = Y( K ) - B( K )*Y( K+1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 40 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 40 + END IF + END IF + Y( K ) = TEMP / AK + 50 CONTINUE + END IF + ELSE +* +* Come to here if JOB = 2 or -2 +* + IF( JOB.EQ.2 ) THEN + DO 60 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + INFO = K + RETURN + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + INFO = K + RETURN + END IF + END IF + Y( K ) = TEMP / AK + 60 CONTINUE + ELSE + DO 80 K = 1, N + IF( K.GE.3 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 ) + ELSE IF( K.EQ.2 ) THEN + TEMP = Y( K ) - B( K-1 )*Y( K-1 ) + ELSE + TEMP = Y( K ) + END IF + AK = A( K ) + PERT = SIGN( TOL, AK ) + 70 CONTINUE + ABSAK = ABS( AK ) + IF( ABSAK.LT.ONE ) THEN + IF( ABSAK.LT.SFMIN ) THEN + IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK ) + $ THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + ELSE + TEMP = TEMP*BIGNUM + AK = AK*BIGNUM + END IF + ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN + AK = AK + PERT + PERT = 2*PERT + GO TO 70 + END IF + END IF + Y( K ) = TEMP / AK + 80 CONTINUE + END IF +* + DO 90 K = N, 2, -1 + IF( IN( K-1 ).EQ.0 ) THEN + Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K ) + ELSE + TEMP = Y( K-1 ) + Y( K-1 ) = Y( K ) + Y( K ) = TEMP - C( K-1 )*Y( K ) + END IF + 90 CONTINUE + END IF +* +* End of SLAGTS +* + END diff --git a/dspl/liblapack/SRC/slagv2.f b/dspl/liblapack/SRC/slagv2.f new file mode 100644 index 0000000..638c7df --- /dev/null +++ b/dspl/liblapack/SRC/slagv2.f @@ -0,0 +1,374 @@ +*> \brief \b SLAGV2 computes the Generalized Schur factorization of a real 2-by-2 matrix pencil (A,B) where B is upper triangular. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAGV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, +* CSR, SNR ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB +* REAL CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), +* $ B( LDB, * ), BETA( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAGV2 computes the Generalized Schur factorization of a real 2-by-2 +*> matrix pencil (A,B) where B is upper triangular. This routine +*> computes orthogonal (rotation) matrices given by CSL, SNL and CSR, +*> SNR such that +*> +*> 1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0 +*> types), then +*> +*> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +*> [ 0 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +*> +*> [ b11 b12 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +*> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ], +*> +*> 2) if the pencil (A,B) has a pair of complex conjugate eigenvalues, +*> then +*> +*> [ a11 a12 ] := [ CSL SNL ] [ a11 a12 ] [ CSR -SNR ] +*> [ a21 a22 ] [ -SNL CSL ] [ a21 a22 ] [ SNR CSR ] +*> +*> [ b11 0 ] := [ CSL SNL ] [ b11 b12 ] [ CSR -SNR ] +*> [ 0 b22 ] [ -SNL CSL ] [ 0 b22 ] [ SNR CSR ] +*> +*> where b11 >= b22 > 0. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, 2) +*> On entry, the 2 x 2 matrix A. +*> On exit, A is overwritten by the ``A-part'' of the +*> generalized Schur form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> THe leading dimension of the array A. LDA >= 2. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, 2) +*> On entry, the upper triangular 2 x 2 matrix B. +*> On exit, B is overwritten by the ``B-part'' of the +*> generalized Schur form. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> THe leading dimension of the array B. LDB >= 2. +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (2) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (2) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (2) +*> (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the +*> pencil (A,B), k=1,2, i = sqrt(-1). Note that BETA(k) may +*> be zero. +*> \endverbatim +*> +*> \param[out] CSL +*> \verbatim +*> CSL is REAL +*> The cosine of the left rotation matrix. +*> \endverbatim +*> +*> \param[out] SNL +*> \verbatim +*> SNL is REAL +*> The sine of the left rotation matrix. +*> \endverbatim +*> +*> \param[out] CSR +*> \verbatim +*> CSR is REAL +*> The cosine of the right rotation matrix. +*> \endverbatim +*> +*> \param[out] SNR +*> \verbatim +*> SNR is REAL +*> The sine of the right rotation matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL, + $ CSR, SNR ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB + REAL CSL, CSR, SNL, SNR +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ), + $ B( LDB, * ), BETA( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ, + $ R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1, + $ WR2 +* .. +* .. External Subroutines .. + EXTERNAL SLAG2, SLARTG, SLASV2, SROT +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + SAFMIN = SLAMCH( 'S' ) + ULP = SLAMCH( 'P' ) +* +* Scale A +* + ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ), + $ ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN ) + ASCALE = ONE / ANORM + A( 1, 1 ) = ASCALE*A( 1, 1 ) + A( 1, 2 ) = ASCALE*A( 1, 2 ) + A( 2, 1 ) = ASCALE*A( 2, 1 ) + A( 2, 2 ) = ASCALE*A( 2, 2 ) +* +* Scale B +* + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ), + $ SAFMIN ) + BSCALE = ONE / BNORM + B( 1, 1 ) = BSCALE*B( 1, 1 ) + B( 1, 2 ) = BSCALE*B( 1, 2 ) + B( 2, 2 ) = BSCALE*B( 2, 2 ) +* +* Check if A can be deflated +* + IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN + CSL = ONE + SNL = ZERO + CSR = ONE + SNR = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + WI = ZERO +* +* Check if B is singular +* + ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN + CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) + CSR = ONE + SNR = ZERO + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + A( 2, 1 ) = ZERO + B( 1, 1 ) = ZERO + B( 2, 1 ) = ZERO + WI = ZERO +* + ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN + CALL SLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T ) + SNR = -SNR + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) + CSL = ONE + SNL = ZERO + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO + B( 2, 2 ) = ZERO + WI = ZERO +* + ELSE +* +* B is nonsingular, first compute the eigenvalues of (A,B) +* + CALL SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2, + $ WI ) +* + IF( WI.EQ.ZERO ) THEN +* +* two real eigenvalues, compute s*A-w*B +* + H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 ) + H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 ) + H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 ) +* + RR = SLAPY2( H1, H2 ) + QQ = SLAPY2( SCALE1*A( 2, 1 ), H3 ) +* + IF( RR.GT.QQ ) THEN +* +* find right rotation matrix to zero 1,1 element of +* (sA - wB) +* + CALL SLARTG( H2, H1, CSR, SNR, T ) +* + ELSE +* +* find right rotation matrix to zero 2,1 element of +* (sA - wB) +* + CALL SLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T ) +* + END IF +* + SNR = -SNR + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* +* compute inf norms of A and B +* + H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ), + $ ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) ) + H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) +* + IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN +* +* find left rotation matrix Q to zero out B(2,1) +* + CALL SLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R ) +* + ELSE +* +* find left rotation matrix Q to zero out A(2,1) +* + CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R ) +* + END IF +* + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) +* + A( 2, 1 ) = ZERO + B( 2, 1 ) = ZERO +* + ELSE +* +* a pair of complex conjugate eigenvalues +* first compute the SVD of the matrix B +* + CALL SLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR, + $ CSR, SNL, CSL ) +* +* Form (A,B) := Q(A,B)Z**T where Q is left rotation matrix and +* Z is right rotation matrix computed from SLASV2 +* + CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL ) + CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL ) + CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR ) + CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR ) +* + B( 2, 1 ) = ZERO + B( 1, 2 ) = ZERO +* + END IF +* + END IF +* +* Unscaling +* + A( 1, 1 ) = ANORM*A( 1, 1 ) + A( 2, 1 ) = ANORM*A( 2, 1 ) + A( 1, 2 ) = ANORM*A( 1, 2 ) + A( 2, 2 ) = ANORM*A( 2, 2 ) + B( 1, 1 ) = BNORM*B( 1, 1 ) + B( 2, 1 ) = BNORM*B( 2, 1 ) + B( 1, 2 ) = BNORM*B( 1, 2 ) + B( 2, 2 ) = BNORM*B( 2, 2 ) +* + IF( WI.EQ.ZERO ) THEN + ALPHAR( 1 ) = A( 1, 1 ) + ALPHAR( 2 ) = A( 2, 2 ) + ALPHAI( 1 ) = ZERO + ALPHAI( 2 ) = ZERO + BETA( 1 ) = B( 1, 1 ) + BETA( 2 ) = B( 2, 2 ) + ELSE + ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM + ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM + ALPHAR( 2 ) = ALPHAR( 1 ) + ALPHAI( 2 ) = -ALPHAI( 1 ) + BETA( 1 ) = ONE + BETA( 2 ) = ONE + END IF +* + RETURN +* +* End of SLAGV2 +* + END diff --git a/dspl/liblapack/SRC/slahqr.f b/dspl/liblapack/SRC/slahqr.f new file mode 100644 index 0000000..d91826e --- /dev/null +++ b/dspl/liblapack/SRC/slahqr.f @@ -0,0 +1,613 @@ +*> \brief \b SLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix, using the double-shift/single-shift QR algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAHQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAHQR is an auxiliary routine called by SHSEQR to update the +*> eigenvalues and Schur decomposition already computed by SHSEQR, by +*> dealing with the Hessenberg submatrix in rows and columns ILO to +*> IHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper quasi-triangular in +*> rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless +*> ILO = 1). SLAHQR works primarily with the Hessenberg +*> submatrix in rows and columns ILO to IHI, but applies +*> transformations to all of H if WANTT is .TRUE.. +*> 1 <= ILO <= max(1,IHI); IHI <= N. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO is zero and if WANTT is .TRUE., H is upper +*> quasi-triangular in rows and columns ILO:IHI, with any +*> 2-by-2 diagonal blocks in standard form. If INFO is zero +*> and WANTT is .FALSE., the contents of H are unspecified on +*> exit. The output state of H if INFO is nonzero is given +*> below under the description of INFO. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues ILO to IHI are stored in the corresponding +*> elements of WR and WI. If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the +*> eigenvalues are stored in the same order as on the diagonal +*> of the Schur form returned in H, with WR(i) = H(i,i), and, if +*> H(i:i+1,i:i+1) is a 2-by-2 diagonal block, +*> WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> If WANTZ is .TRUE., on entry Z must contain the current +*> matrix Z of transformations accumulated by SHSEQR, and on +*> exit Z has been updated; transformations are applied only to +*> the submatrix Z(ILOZ:IHIZ,ILO:IHI). +*> If WANTZ is .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: If INFO = i, SLAHQR failed to compute all the +*> eigenvalues ILO to IHI in a total of 30 iterations +*> per eigenvalue; elements i+1:ihi of WR and WI +*> contain those eigenvalues which have been +*> successfully computed. +*> +*> If INFO .GT. 0 and WANTT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the +*> eigenvalues of the upper Hessenberg matrix rows +*> and columns ILO thorugh INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> (*) (initial value of H)*U = U*(final value of H) +*> where U is an orthognal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> (final value of Z) = (initial value of Z)*U +*> where U is the orthogonal matrix in (*) +*> (regardless of the value of WANTT.) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 02-96 Based on modifications by +*> David Day, Sandia National Laboratory, USA +*> +*> 12-04 Further modifications by +*> Ralph Byers, University of Kansas, USA +*> This is a modified version of SLAHQR from LAPACK version 3.0. +*> It is (1) more robust against overflow and underflow and +*> (2) adopts the more conservative Ahues & Tisseur stopping +*> criterion (LAWN 122, 1997). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * ) +* .. +* +* ========================================================= +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0, TWO = 2.0e0 ) + REAL DAT1, DAT2 + PARAMETER ( DAT1 = 3.0e0 / 4.0e0, DAT2 = -0.4375e0 ) +* .. +* .. Local Scalars .. + REAL AA, AB, BA, BB, CS, DET, H11, H12, H21, H21S, + $ H22, RT1I, RT1R, RT2I, RT2R, RTDISC, S, SAFMAX, + $ SAFMIN, SMLNUM, SN, SUM, T1, T2, T3, TR, TST, + $ ULP, V2, V3 + INTEGER I, I1, I2, ITS, ITMAX, J, K, L, M, NH, NR, NZ +* .. +* .. Local Arrays .. + REAL V( 3 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLABAD, SLANV2, SLARFG, SROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + WR( ILO ) = H( ILO, ILO ) + WI( ILO ) = ZERO + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITMAX is the total number of QR iterations allowed. +* + ITMAX = 30 * MAX( 10, NH ) +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1 or 2. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 20 CONTINUE + L = ILO + IF( I.LT.ILO ) + $ GO TO 160 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 or 2 splits off at the bottom because a +* subdiagonal element has become negligible. +* + DO 140 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 30 K = I, L + 1, -1 + IF( ABS( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 40 + TST = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( H( K-1, K-2 ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( H( K+1, K ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some cases. ==== + IF( ABS( H( K, K-1 ) ).LE.ULP*TST ) THEN + AB = MAX( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + BA = MIN( ABS( H( K, K-1 ) ), ABS( H( K-1, K ) ) ) + AA = MAX( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( ABS( H( K, K ) ), + $ ABS( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 40 + END IF + 30 CONTINUE + 40 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 or 2 has split off. +* + IF( L.GE.I-1 ) + $ GO TO 150 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 ) THEN +* +* Exceptional shift. +* + S = ABS( H( L+1, L ) ) + ABS( H( L+2, L+1 ) ) + H11 = DAT1*S + H( L, L ) + H12 = DAT2*S + H21 = S + H22 = H11 + ELSE IF( ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + H11 = DAT1*S + H( I, I ) + H12 = DAT2*S + H21 = S + H22 = H11 + ELSE +* +* Prepare to use Francis' double shift +* (i.e. 2nd degree generalized Rayleigh quotient) +* + H11 = H( I-1, I-1 ) + H21 = H( I, I-1 ) + H12 = H( I-1, I ) + H22 = H( I, I ) + END IF + S = ABS( H11 ) + ABS( H12 ) + ABS( H21 ) + ABS( H22 ) + IF( S.EQ.ZERO ) THEN + RT1R = ZERO + RT1I = ZERO + RT2R = ZERO + RT2I = ZERO + ELSE + H11 = H11 / S + H21 = H21 / S + H12 = H12 / S + H22 = H22 / S + TR = ( H11+H22 ) / TWO + DET = ( H11-TR )*( H22-TR ) - H12*H21 + RTDISC = SQRT( ABS( DET ) ) + IF( DET.GE.ZERO ) THEN +* +* ==== complex conjugate shifts ==== +* + RT1R = TR*S + RT2R = RT1R + RT1I = RTDISC*S + RT2I = -RT1I + ELSE +* +* ==== real shifts (use only one of them) ==== +* + RT1R = TR + RTDISC + RT2R = TR - RTDISC + IF( ABS( RT1R-H22 ).LE.ABS( RT2R-H22 ) ) THEN + RT1R = RT1R*S + RT2R = RT1R + ELSE + RT2R = RT2R*S + RT1R = RT2R + END IF + RT1I = ZERO + RT2I = ZERO + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 50 M = I - 2, L, -1 +* Determine the effect of starting the double-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. (The following uses scaling to avoid +* overflows and most underflows.) +* + H21S = H( M+1, M ) + S = ABS( H( M, M )-RT2R ) + ABS( RT2I ) + ABS( H21S ) + H21S = H( M+1, M ) / S + V( 1 ) = H21S*H( M, M+1 ) + ( H( M, M )-RT1R )* + $ ( ( H( M, M )-RT2R ) / S ) - RT1I*( RT2I / S ) + V( 2 ) = H21S*( H( M, M )+H( M+1, M+1 )-RT1R-RT2R ) + V( 3 ) = H21S*H( M+2, M+1 ) + S = ABS( V( 1 ) ) + ABS( V( 2 ) ) + ABS( V( 3 ) ) + V( 1 ) = V( 1 ) / S + V( 2 ) = V( 2 ) / S + V( 3 ) = V( 3 ) / S + IF( M.EQ.L ) + $ GO TO 60 + IF( ABS( H( M, M-1 ) )*( ABS( V( 2 ) )+ABS( V( 3 ) ) ).LE. + $ ULP*ABS( V( 1 ) )*( ABS( H( M-1, M-1 ) )+ABS( H( M, + $ M ) )+ABS( H( M+1, M+1 ) ) ) )GO TO 60 + 50 CONTINUE + 60 CONTINUE +* +* Double-shift QR step +* + DO 130 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. NR is the order of G. +* + NR = MIN( 3, I-K+1 ) + IF( K.GT.M ) + $ CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 ) + CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + IF( K.LT.I-1 ) + $ H( K+2, K-1 ) = ZERO + ELSE IF( M.GT.L ) THEN +* ==== Use the following instead of +* . H( K, K-1 ) = -H( K, K-1 ) to +* . avoid a bug when v(2) and v(3) +* . underflow. ==== + H( K, K-1 ) = H( K, K-1 )*( ONE-T1 ) + END IF + V2 = V( 2 ) + T2 = T1*V2 + IF( NR.EQ.3 ) THEN + V3 = V( 3 ) + T3 = T1*V3 +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 70 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + H( K+2, J ) = H( K+2, J ) - SUM*T3 + 70 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 80 J = I1, MIN( K+3, I ) + SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + H( J, K+2 ) = H( J, K+2 ) - SUM*T3 + 80 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 90 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3 + 90 CONTINUE + END IF + ELSE IF( NR.EQ.2 ) THEN +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 100 J = K, I2 + SUM = H( K, J ) + V2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM*T1 + H( K+1, J ) = H( K+1, J ) - SUM*T2 + 100 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+3,I). +* + DO 110 J = I1, I + SUM = H( J, K ) + V2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM*T1 + H( J, K+1 ) = H( J, K+1 ) - SUM*T2 + 110 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 120 J = ILOZ, IHIZ + SUM = Z( J, K ) + V2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM*T1 + Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2 + 120 CONTINUE + END IF + END IF + 130 CONTINUE +* + 140 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 150 CONTINUE +* + IF( L.EQ.I ) THEN +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + WR( I ) = H( I, I ) + WI( I ) = ZERO + ELSE IF( L.EQ.I-1 ) THEN +* +* H(I-1,I-2) is negligible: a pair of eigenvalues have converged. +* +* Transform the 2-by-2 submatrix to standard Schur form, +* and compute and store the eigenvalues. +* + CALL SLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ), + $ H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ), + $ CS, SN ) +* + IF( WANTT ) THEN +* +* Apply the transformation to the rest of H. +* + IF( I2.GT.I ) + $ CALL SROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH, + $ CS, SN ) + CALL SROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN ) + END IF + IF( WANTZ ) THEN +* +* Apply the transformation to Z. +* + CALL SROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN ) + END IF + END IF +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 20 +* + 160 CONTINUE + RETURN +* +* End of SLAHQR +* + END diff --git a/dspl/liblapack/SRC/slahr2.f b/dspl/liblapack/SRC/slahr2.f new file mode 100644 index 0000000..656d679 --- /dev/null +++ b/dspl/liblapack/SRC/slahr2.f @@ -0,0 +1,326 @@ +*> \brief \b SLAHR2 reduces the specified number of first columns of a general rectangular matrix A so that elements below the specified subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAHR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an orthogonal similarity transformation +*> Q**T * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**T, and also the matrix Y = A * V * T. +*> +*> This is an auxiliary routine called by SGEHRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> K < N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is REAL array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**T) * (A - Y*V**T). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a a a a a ) +*> ( a a a a a ) +*> ( a a a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This subroutine is a slight modification of LAPACK-3.0's DLAHRD +*> incorporating improvements proposed by Quintana-Orti and Van de +*> Gejin. Note that the entries of A(1:K,2:NB) differ from those +*> returned by the original LAPACK-3.0's DLAHRD routine. (This +*> subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) +*> \endverbatim +* +*> \par References: +* ================ +*> +*> Gregorio Quintana-Orti and Robert van de Geijn, "Improving the +*> performance of reduction to Hessenberg form," ACM Transactions on +*> Mathematical Software, 32(2):180-194, June 2006. +*> +* ===================================================================== + SUBROUTINE SLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMM, SGEMV, SLACPY, + $ SLARFG, SSCAL, STRMM, STRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V**T +* + CALL SGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) +* +* Apply I - V * T**T * V**T to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**T * b1 +* + CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL STRMV( 'Lower', 'Transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**T * b2 +* + CALL SGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**T * w +* + CALL STRMV( 'Upper', 'Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL SGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL STRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL SGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL SGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL SSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL STRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL SLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL STRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL SGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL STRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of SLAHR2 +* + END diff --git a/dspl/liblapack/SRC/slaic1.f b/dspl/liblapack/SRC/slaic1.f new file mode 100644 index 0000000..44ab4b0 --- /dev/null +++ b/dspl/liblapack/SRC/slaic1.f @@ -0,0 +1,367 @@ +*> \brief \b SLAIC1 applies one step of incremental condition estimation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAIC1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* .. Scalar Arguments .. +* INTEGER J, JOB +* REAL C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. +* REAL W( J ), X( J ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAIC1 applies one step of incremental condition estimation in +*> its simplest version: +*> +*> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +*> lower triangular matrix L, such that +*> twonorm(L*x) = sest +*> Then SLAIC1 computes sestpr, s, c such that +*> the vector +*> [ s*x ] +*> xhat = [ c ] +*> is an approximate singular vector of +*> [ L 0 ] +*> Lhat = [ w**T gamma ] +*> in the sense that +*> twonorm(Lhat*xhat) = sestpr. +*> +*> Depending on JOB, an estimate for the largest or smallest singular +*> value is computed. +*> +*> Note that [s c]**T and sestpr**2 is an eigenpair of the system +*> +*> diag(sest*sest, 0) + [alpha gamma] * [ alpha ] +*> [ gamma ] +*> +*> where alpha = x**T*w. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> = 1: an estimate for the largest singular value is computed. +*> = 2: an estimate for the smallest singular value is computed. +*> \endverbatim +*> +*> \param[in] J +*> \verbatim +*> J is INTEGER +*> Length of X and W +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (J) +*> The j-vector x. +*> \endverbatim +*> +*> \param[in] SEST +*> \verbatim +*> SEST is REAL +*> Estimated singular value of j by j matrix L +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension (J) +*> The j-vector w. +*> \endverbatim +*> +*> \param[in] GAMMA +*> \verbatim +*> GAMMA is REAL +*> The diagonal element gamma. +*> \endverbatim +*> +*> \param[out] SESTPR +*> \verbatim +*> SESTPR is REAL +*> Estimated singular value of (j+1) by (j+1) matrix Lhat. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> Sine needed in forming xhat. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL +*> Cosine needed in forming xhat. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER J, JOB + REAL C, GAMMA, S, SEST, SESTPR +* .. +* .. Array Arguments .. + REAL W( J ), X( J ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) + REAL HALF, FOUR + PARAMETER ( HALF = 0.5E0, FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + REAL ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS, + $ NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. External Functions .. + REAL SDOT, SLAMCH + EXTERNAL SDOT, SLAMCH +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Epsilon' ) + ALPHA = SDOT( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + S = SQRT( ONE+TMP*TMP ) + SESTPR = S2*S + C = ( GAMMA / S2 ) / S + S = SIGN( ONE, ALPHA ) / S + ELSE + TMP = S2 / S1 + C = SQRT( ONE+TMP*TMP ) + SESTPR = S1*C + S = ( ALPHA / S1 ) / C + C = SIGN( ONE, GAMMA ) / C + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -GAMMA + COSINE = ALPHA + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*S+C*C ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + C = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / C ) + S = -( GAMMA / S2 ) / C + C = SIGN( ONE, ALPHA ) / C + ELSE + TMP = S2 / S1 + S = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / S + C = ( ALPHA / S1 ) / S + S = -SIGN( ONE, GAMMA ) / S + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ALPHA / ABSEST + ZETA2 = GAMMA / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ), + $ ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ZETA1 / ( ONE-T ) + COSINE = -ZETA2 / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -ZETA1 / T + COSINE = -ZETA2 / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*SINE+COSINE*COSINE ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of SLAIC1 +* + END diff --git a/dspl/liblapack/SRC/slaisnan.f b/dspl/liblapack/SRC/slaisnan.f new file mode 100644 index 0000000..568d632 --- /dev/null +++ b/dspl/liblapack/SRC/slaisnan.f @@ -0,0 +1,91 @@ +*> \brief \b SLAISNAN tests input for NaN by comparing two arguments for inequality. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAISNAN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* LOGICAL FUNCTION SLAISNAN( SIN1, SIN2 ) +* +* .. Scalar Arguments .. +* REAL, INTENT(IN) :: SIN1, SIN2 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This routine is not for general use. It exists solely to avoid +*> over-optimization in SISNAN. +*> +*> SLAISNAN checks for NaNs by comparing its two arguments for +*> inequality. NaN is the only floating-point value where NaN != NaN +*> returns .TRUE. To check for NaNs, pass the same variable as both +*> arguments. +*> +*> A compiler must assume that the two arguments are +*> not the same variable, and the test will not be optimized away. +*> Interprocedural or whole-program optimization may delete this +*> test. The ISNAN functions will be replaced by the correct +*> Fortran 03 intrinsic once the intrinsic is widely available. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIN1 +*> \verbatim +*> SIN1 is REAL +*> \endverbatim +*> +*> \param[in] SIN2 +*> \verbatim +*> SIN2 is REAL +*> Two numbers to compare for inequality. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + LOGICAL FUNCTION SLAISNAN( SIN1, SIN2 ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL, INTENT(IN) :: SIN1, SIN2 +* .. +* +* ===================================================================== +* +* .. Executable Statements .. + SLAISNAN = (SIN1.NE.SIN2) + RETURN + END diff --git a/dspl/liblapack/SRC/slaln2.f b/dspl/liblapack/SRC/slaln2.f new file mode 100644 index 0000000..f9ceee7 --- /dev/null +++ b/dspl/liblapack/SRC/slaln2.f @@ -0,0 +1,611 @@ +*> \brief \b SLALN2 solves a 1-by-1 or 2-by-2 linear system of equations of the specified form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLALN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, +* LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LTRANS +* INTEGER INFO, LDA, LDB, LDX, NA, NW +* REAL CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLALN2 solves a system of the form (ca A - w D ) X = s B +*> or (ca A**T - w D) X = s B with possible scaling ("s") and +*> perturbation of A. (A**T means A-transpose.) +*> +*> A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA +*> real diagonal matrix, w is a real or complex value, and X and B are +*> NA x 1 matrices -- real if w is real, complex if w is complex. NA +*> may be 1 or 2. +*> +*> If w is complex, X and B are represented as NA x 2 matrices, +*> the first column of each being the real part and the second +*> being the imaginary part. +*> +*> "s" is a scaling factor (.LE. 1), computed by SLALN2, which is +*> so chosen that X can be computed without overflow. X is further +*> scaled if necessary to assure that norm(ca A - w D)*norm(X) is less +*> than overflow. +*> +*> If both singular values of (ca A - w D) are less than SMIN, +*> SMIN*identity will be used instead of (ca A - w D). If only one +*> singular value is less than SMIN, one element of (ca A - w D) will be +*> perturbed enough to make the smallest singular value roughly SMIN. +*> If both singular values are at least SMIN, (ca A - w D) will not be +*> perturbed. In any case, the perturbation will be at most some small +*> multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values +*> are computed by infinity-norm approximations, and thus will only be +*> correct to a factor of 2 or so. +*> +*> Note: all input quantities are assumed to be smaller than overflow +*> by a reasonable factor. (See BIGNUM.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRANS +*> \verbatim +*> LTRANS is LOGICAL +*> =.TRUE.: A-transpose will be used. +*> =.FALSE.: A will be used (not transposed.) +*> \endverbatim +*> +*> \param[in] NA +*> \verbatim +*> NA is INTEGER +*> The size of the matrix A. It may (only) be 1 or 2. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> 1 if "w" is real, 2 if "w" is complex. It may only be 1 +*> or 2. +*> \endverbatim +*> +*> \param[in] SMIN +*> \verbatim +*> SMIN is REAL +*> The desired lower bound on the singular values of A. This +*> should be a safe distance away from underflow or overflow, +*> say, between (underflow/machine precision) and (machine +*> precision * overflow ). (See BIGNUM and ULP.) +*> \endverbatim +*> +*> \param[in] CA +*> \verbatim +*> CA is REAL +*> The coefficient c, which A is multiplied by. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,NA) +*> The NA x NA matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. It must be at least NA. +*> \endverbatim +*> +*> \param[in] D1 +*> \verbatim +*> D1 is REAL +*> The 1,1 element in the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] D2 +*> \verbatim +*> D2 is REAL +*> The 2,2 element in the diagonal matrix D. Not used if NA=1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NW) +*> The NA x NW matrix B (right-hand side). If NW=2 ("w" is +*> complex), column 1 contains the real part of B and column 2 +*> contains the imaginary part. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. It must be at least NA. +*> \endverbatim +*> +*> \param[in] WR +*> \verbatim +*> WR is REAL +*> The real part of the scalar "w". +*> \endverbatim +*> +*> \param[in] WI +*> \verbatim +*> WI is REAL +*> The imaginary part of the scalar "w". Not used if NW=1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NW) +*> The NA x NW matrix X (unknowns), as computed by SLALN2. +*> If NW=2 ("w" is complex), on exit, column 1 will contain +*> the real part of X and column 2 will contain the imaginary +*> part. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of X. It must be at least NA. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scale factor that B must be multiplied by to insure +*> that overflow does not occur when computing X. Thus, +*> (ca A - w D) X will be SCALE*B, not B (ignoring +*> perturbations of A.) It will be at most 1. +*> \endverbatim +*> +*> \param[out] XNORM +*> \verbatim +*> XNORM is REAL +*> The infinity-norm of X, when X is regarded as an NA x NW +*> real matrix. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> An error flag. It will be set to zero if no error occurs, +*> a negative number if an argument is in error, or a positive +*> number if ca A - w D had to be perturbed. +*> The possible values are: +*> = 0: No error occurred, and (ca A - w D) did not have to be +*> perturbed. +*> = 1: (ca A - w D) had to be perturbed to make its smallest +*> (or only) singular value greater than SMIN. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B, + $ LDB, WR, WI, X, LDX, SCALE, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL LTRANS + INTEGER INFO, LDA, LDB, LDX, NA, NW + REAL CA, D1, D2, SCALE, SMIN, WI, WR, XNORM +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER ICMAX, J + REAL BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21, + $ CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21, + $ LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R, + $ UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S, + $ UR22, XI1, XI2, XR1, XR2 +* .. +* .. Local Arrays .. + LOGICAL CSWAP( 4 ), RSWAP( 4 ) + INTEGER IPIVOT( 4, 4 ) + REAL CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Equivalences .. + EQUIVALENCE ( CI( 1, 1 ), CIV( 1 ) ), + $ ( CR( 1, 1 ), CRV( 1 ) ) +* .. +* .. Data statements .. + DATA CSWAP / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. / + DATA IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4, + $ 3, 2, 1 / +* .. +* .. Executable Statements .. +* +* Compute BIGNUM +* + SMLNUM = TWO*SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + SMINI = MAX( SMIN, SMLNUM ) +* +* Don't check for input errors +* + INFO = 0 +* +* Standard Initializations +* + SCALE = ONE +* + IF( NA.EQ.1 ) THEN +* +* 1 x 1 (i.e., scalar) system C X = B +* + IF( NW.EQ.1 ) THEN +* +* Real 1x1 system. +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CNORM = ABS( CSR ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR + XNORM = ABS( X( 1, 1 ) ) + ELSE +* +* Complex 1x1 system (w is complex) +* +* C = ca A - w D +* + CSR = CA*A( 1, 1 ) - WR*D1 + CSI = -WI*D1 + CNORM = ABS( CSR ) + ABS( CSI ) +* +* If | C | < SMINI, use C = SMINI +* + IF( CNORM.LT.SMINI ) THEN + CSR = SMINI + CSI = ZERO + CNORM = SMINI + INFO = 1 + END IF +* +* Check scaling for X = B / C +* + BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) ) + IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*CNORM ) + $ SCALE = ONE / BNORM + END IF +* +* Compute X +* + CALL SLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI, + $ X( 1, 1 ), X( 1, 2 ) ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + END IF +* + ELSE +* +* 2x2 System +* +* Compute the real part of C = ca A - w D (or ca A**T - w D ) +* + CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1 + CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2 + IF( LTRANS ) THEN + CR( 1, 2 ) = CA*A( 2, 1 ) + CR( 2, 1 ) = CA*A( 1, 2 ) + ELSE + CR( 2, 1 ) = CA*A( 2, 1 ) + CR( 1, 2 ) = CA*A( 1, 2 ) + END IF +* + IF( NW.EQ.1 ) THEN +* +* Real 2x2 system (w is real) +* +* Find the largest element in C +* + CMAX = ZERO + ICMAX = 0 +* + DO 10 J = 1, 4 + IF( ABS( CRV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ICMAX = J + END IF + 10 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + UR11R = ONE / UR11 + LR21 = UR11R*CR21 + UR22 = CR22 - UR12*LR21 +* +* If smaller pivot < SMINI, use SMINI +* + IF( ABS( UR22 ).LT.SMINI ) THEN + UR22 = SMINI + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR1 = B( 2, 1 ) + BR2 = B( 1, 1 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + END IF + BR2 = BR2 - LR21*BR1 + BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) ) + IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN + IF( BBND.GE.BIGNUM*ABS( UR22 ) ) + $ SCALE = ONE / BBND + END IF +* + XR2 = ( BR2*SCALE ) / UR22 + XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 ) + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + END IF + XNORM = MAX( ABS( XR1 ), ABS( XR2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + ELSE +* +* Complex 2x2 system (w is complex) +* +* Find the largest element in C +* + CI( 1, 1 ) = -WI*D1 + CI( 2, 1 ) = ZERO + CI( 1, 2 ) = ZERO + CI( 2, 2 ) = -WI*D2 + CMAX = ZERO + ICMAX = 0 +* + DO 20 J = 1, 4 + IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN + CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) ) + ICMAX = J + END IF + 20 CONTINUE +* +* If norm(C) < SMINI, use SMINI*identity. +* + IF( CMAX.LT.SMINI ) THEN + BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ), + $ ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) ) + IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN + IF( BNORM.GT.BIGNUM*SMINI ) + $ SCALE = ONE / BNORM + END IF + TEMP = SCALE / SMINI + X( 1, 1 ) = TEMP*B( 1, 1 ) + X( 2, 1 ) = TEMP*B( 2, 1 ) + X( 1, 2 ) = TEMP*B( 1, 2 ) + X( 2, 2 ) = TEMP*B( 2, 2 ) + XNORM = TEMP*BNORM + INFO = 1 + RETURN + END IF +* +* Gaussian elimination with complete pivoting. +* + UR11 = CRV( ICMAX ) + UI11 = CIV( ICMAX ) + CR21 = CRV( IPIVOT( 2, ICMAX ) ) + CI21 = CIV( IPIVOT( 2, ICMAX ) ) + UR12 = CRV( IPIVOT( 3, ICMAX ) ) + UI12 = CIV( IPIVOT( 3, ICMAX ) ) + CR22 = CRV( IPIVOT( 4, ICMAX ) ) + CI22 = CIV( IPIVOT( 4, ICMAX ) ) + IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN +* +* Code when off-diagonals of pivoted C are real +* + IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN + TEMP = UI11 / UR11 + UR11R = ONE / ( UR11*( ONE+TEMP**2 ) ) + UI11R = -TEMP*UR11R + ELSE + TEMP = UR11 / UI11 + UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) ) + UR11R = -TEMP*UI11R + END IF + LR21 = CR21*UR11R + LI21 = CR21*UI11R + UR12S = UR12*UR11R + UI12S = UR12*UI11R + UR22 = CR22 - UR12*LR21 + UI22 = CI22 - UR12*LI21 + ELSE +* +* Code when diagonals of pivoted C are real +* + UR11R = ONE / UR11 + UI11R = ZERO + LR21 = CR21*UR11R + LI21 = CI21*UR11R + UR12S = UR12*UR11R + UI12S = UI12*UR11R + UR22 = CR22 - UR12*LR21 + UI12*LI21 + UI22 = -UR12*LI21 - UI12*LR21 + END IF + U22ABS = ABS( UR22 ) + ABS( UI22 ) +* +* If smaller pivot < SMINI, use SMINI +* + IF( U22ABS.LT.SMINI ) THEN + UR22 = SMINI + UI22 = ZERO + INFO = 1 + END IF + IF( RSWAP( ICMAX ) ) THEN + BR2 = B( 1, 1 ) + BR1 = B( 2, 1 ) + BI2 = B( 1, 2 ) + BI1 = B( 2, 2 ) + ELSE + BR1 = B( 1, 1 ) + BR2 = B( 2, 1 ) + BI1 = B( 1, 2 ) + BI2 = B( 2, 2 ) + END IF + BR2 = BR2 - LR21*BR1 + LI21*BI1 + BI2 = BI2 - LI21*BR1 - LR21*BI1 + BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )* + $ ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ), + $ ABS( BR2 )+ABS( BI2 ) ) + IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN + IF( BBND.GE.BIGNUM*U22ABS ) THEN + SCALE = ONE / BBND + BR1 = SCALE*BR1 + BI1 = SCALE*BI1 + BR2 = SCALE*BR2 + BI2 = SCALE*BI2 + END IF + END IF +* + CALL SLADIV( BR2, BI2, UR22, UI22, XR2, XI2 ) + XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2 + XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2 + IF( CSWAP( ICMAX ) ) THEN + X( 1, 1 ) = XR2 + X( 2, 1 ) = XR1 + X( 1, 2 ) = XI2 + X( 2, 2 ) = XI1 + ELSE + X( 1, 1 ) = XR1 + X( 2, 1 ) = XR2 + X( 1, 2 ) = XI1 + X( 2, 2 ) = XI2 + END IF + XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) ) +* +* Further scaling if norm(A) norm(X) > overflow +* + IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN + IF( XNORM.GT.BIGNUM / CMAX ) THEN + TEMP = CMAX / BIGNUM + X( 1, 1 ) = TEMP*X( 1, 1 ) + X( 2, 1 ) = TEMP*X( 2, 1 ) + X( 1, 2 ) = TEMP*X( 1, 2 ) + X( 2, 2 ) = TEMP*X( 2, 2 ) + XNORM = TEMP*XNORM + SCALE = TEMP*SCALE + END IF + END IF + END IF + END IF +* + RETURN +* +* End of SLALN2 +* + END diff --git a/dspl/liblapack/SRC/slals0.f b/dspl/liblapack/SRC/slals0.f new file mode 100644 index 0000000..bb1c2d7 --- /dev/null +++ b/dspl/liblapack/SRC/slals0.f @@ -0,0 +1,499 @@ +*> \brief \b SLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLALS0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, +* $ LDGNUM, NL, NR, NRHS, SQRE +* REAL C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), PERM( * ) +* REAL B( LDB, * ), BX( LDBX, * ), DIFL( * ), +* $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), +* $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLALS0 applies back the multiplying factors of either the left or the +*> right singular vector matrix of a diagonal matrix appended by a row +*> to the right hand side matrix B in solving the least squares problem +*> using the divide-and-conquer SVD approach. +*> +*> For the left singular vector matrix, three types of orthogonal +*> matrices are involved: +*> +*> (1L) Givens rotations: the number of such rotations is GIVPTR; the +*> pairs of columns/rows they were applied to are stored in GIVCOL; +*> and the C- and S-values of these rotations are stored in GIVNUM. +*> +*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +*> row, and for J=2:N, PERM(J)-th row of B is to be moved to the +*> J-th row. +*> +*> (3L) The left singular vector matrix of the remaining matrix. +*> +*> For the right singular vector matrix, four types of orthogonal +*> matrices are involved: +*> +*> (1R) The right singular vector matrix of the remaining matrix. +*> +*> (2R) If SQRE = 1, one extra Givens rotation to generate the right +*> null space. +*> +*> (3R) The inverse transformation of (2L). +*> +*> (4R) The inverse transformation of (1L). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Left singular vector matrix. +*> = 1: Right singular vector matrix. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. On output, B contains +*> the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB must be at least +*> max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is REAL array, dimension ( LDBX, NRHS ) +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) applied +*> to the two blocks. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of rows/columns +*> involved in a Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of arrays DIFR, POLES and +*> GIVNUM, must be at least K. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is REAL array, dimension ( LDGNUM, 2 ) +*> On entry, POLES(1:K, 1) contains the new singular +*> values obtained from solving the secular equation, and +*> POLES(1:K, 2) is an array containing the poles in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( K ). +*> On entry, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is REAL array, dimension ( LDGNUM, 2 ). +*> On entry, DIFR(I, 1) contains the distances between I-th +*> updated (undeflated) singular value and the I+1-th +*> (undeflated) old singular value. And DIFR(I, 2) is the +*> normalizing factor for the I-th right singular vector. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( K ) +*> Contain the components of the deflation-adjusted updating row +*> vector. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ( K ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + REAL C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + REAL B( LDB, * ), BX( LDBX, * ), DIFL( * ), + $ DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ), + $ POLES( LDGNUM, * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, M, N, NLP1 + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SROT, SSCAL, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL SCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL SCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL SSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 50 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( SLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + WORK( 1 ) = NEGONE + TEMP = SNRM2( K, WORK, 1 ) + CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, + $ B( J, 1 ), LDB ) + CALL SLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 50 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL SCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 80 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + WORK( J ) = ZERO + ELSE + WORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 60 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 60 CONTINUE + DO 70 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + WORK( I ) = ZERO + ELSE + WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 70 CONTINUE + CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, + $ BX( J, 1 ), LDBX ) + 80 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL SROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL SLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL SCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL SCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 90 I = 2, N + CALL SCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 90 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 100 I = GIVPTR, 1, -1 + CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 100 CONTINUE + END IF +* + RETURN +* +* End of SLALS0 +* + END diff --git a/dspl/liblapack/SRC/slalsa.f b/dspl/liblapack/SRC/slalsa.f new file mode 100644 index 0000000..135a673 --- /dev/null +++ b/dspl/liblapack/SRC/slalsa.f @@ -0,0 +1,493 @@ +*> \brief \b SLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLALSA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, +* LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, +* GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, +* $ SMLSIZ +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* REAL B( LDB, * ), BX( LDBX, * ), C( * ), +* $ DIFL( LDU, * ), DIFR( LDU, * ), +* $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), +* $ U( LDU, * ), VT( LDU, * ), WORK( * ), +* $ Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLALSA is an itermediate step in solving the least squares problem +*> by computing the SVD of the coefficient matrix in compact form (The +*> singular vectors are computed as products of simple orthorgonal +*> matrices.). +*> +*> If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector +*> matrix of an upper bidiagonal matrix to the right hand side; and if +*> ICOMPQ = 1, SLALSA applies the right singular vector matrix to the +*> right hand side. The singular vector matrices were generated in +*> compact form by SLALSA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether the left or the right singular vector +*> matrix is involved. +*> = 0: Left singular vector matrix +*> = 1: Right singular vector matrix +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row and column dimensions of the upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. +*> On output, B contains the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is REAL array, dimension ( LDBX, NRHS ) +*> On exit, the result of applying the left or right singular +*> vector matrix to B. +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] U +*> \verbatim +*> U is REAL array, dimension ( LDU, SMLSIZ ). +*> On entry, U contains the left singular vector matrices of all +*> subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, +*> POLES, GIVNUM, and Z. +*> \endverbatim +*> +*> \param[in] VT +*> \verbatim +*> VT is REAL array, dimension ( LDU, SMLSIZ+1 ). +*> On entry, VT**T contains the right singular vector matrices of +*> all subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER array, dimension ( N ). +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( LDU, NLVL ). +*> where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +*> distances between singular values on the I-th level and +*> singular values on the (I -1)-th level, and DIFR(*, 2 * I) +*> record the normalizing factors of the right singular vectors +*> matrices of subproblems on I-th level. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( LDU, NLVL ). +*> On entry, Z(1, I) contains the components of the deflation- +*> adjusted updating row vector for subproblems on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +*> singular values involved in the secular equations on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension ( N ). +*> On entry, GIVPTR( I ) records the number of Givens +*> rotations performed on the I-th problem on the computation +*> tree. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +*> On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +*> locations of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( LDGCOL, NLVL ). +*> On entry, PERM(*, I) records permutations done on the I-th +*> level of the computation tree. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDU, 2 * NLVL ). +*> On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +*> values of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> S( I ) contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE SLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL B( LDB, * ), BX( LDBX, * ), C( * ), + $ DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), S( * ), + $ U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2, + $ ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL, + $ NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLALS0, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 50. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 50 + END IF +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 10 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 10 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 20 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL SCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 20 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 40 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 30 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 30 CONTINUE + 40 CONTINUE + GO TO 90 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 50 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 70 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 60 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK, + $ INFO ) + 60 CONTINUE + 70 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by SLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 80 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 + CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) + CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) + 80 CONTINUE +* + 90 CONTINUE +* + RETURN +* +* End of SLALSA +* + END diff --git a/dspl/liblapack/SRC/slalsd.f b/dspl/liblapack/SRC/slalsd.f new file mode 100644 index 0000000..a669660 --- /dev/null +++ b/dspl/liblapack/SRC/slalsd.f @@ -0,0 +1,523 @@ +*> \brief \b SLALSD uses the singular value decomposition of A to solve the least squares problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLALSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, +* RANK, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLALSD uses the singular value decomposition of A to solve the least +*> squares problem of finding X to minimize the Euclidean norm of each +*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +*> are N-by-NRHS. The solution X overwrites B. +*> +*> The singular values of A smaller than RCOND times the largest +*> singular value are treated as zero in solving the least squares +*> problem; in this case a minimum norm solution is returned. +*> The actual singular values are returned in D in ascending order. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': D and E define an upper bidiagonal matrix. +*> = 'L': D and E define a lower bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit, if INFO = 0, D contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> Contains the super-diagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On input, B contains the right hand sides of the least +*> squares problem. On output, B contains the solution X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,N). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is REAL +*> The singular values of A less than or equal to RCOND times +*> the largest singular value are treated as zero in solving +*> the least squares problem. If RCOND is negative, +*> machine precision is used instead. +*> For example, if diag(S)*X=B were the least squares problem, +*> where diag(S) is a diagonal matrix of singular values, the +*> solution would be X(i) = B(i) / S(i) if S(i) is greater than +*> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +*> RCOND*max(S). +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The number of singular values of A greater than RCOND times +*> the largest singular value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension at least +*> (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2), +*> where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension at least +*> (3*N*NLVL + 11*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through MOD(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE SLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL B( LDB, * ), D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL, + $ NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI, + $ SMLSZP, SQRE, ST, ST1, U, VT, Z + REAL CS, EPS, ORGNRM, R, RCND, SN, TOL +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SLANST + EXTERNAL ISAMAX, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLALSA, SLARTG, SLASCL, + $ SLASDA, SLASDQ, SLASET, SLASRT, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, REAL, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLALSD', -INFO ) + RETURN + END IF +* + EPS = SLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB ) + ELSE + RANK = 1 + CALL SLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL SROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + WORK( I*2-1 ) = CS + WORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = WORK( J*2-1 ) + SN = WORK( J*2 ) + CALL SROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL SLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB ) + RETURN + END IF +* + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + NWORK = 1 + N*N + CALL SLASET( 'A', N, N, ZERO, ONE, WORK, N ) + CALL SLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B, + $ LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) + DO 40 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB ) + ELSE + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 40 CONTINUE + CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, + $ WORK( NWORK ), N ) + CALL SLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB ) +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + BX = GIVNUM + 2*NLVL*N + NWORK = BX + N*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 50 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 50 CONTINUE +* + DO 60 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL SCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL SCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by SLASDQ. +* + CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ WORK( VT+ST1 ), N ) + CALL SLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ), + $ E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ), + $ N, B( ST, 1 ), LDB, WORK( NWORK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + CALL SLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ), + $ IWORK( K+ST1 ), WORK( DIFL+ST1 ), + $ WORK( DIFR+ST1 ), WORK( Z+ST1 ), + $ WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ WORK( GIVNUM+ST1 ), WORK( C+ST1 ), + $ WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 60 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( ISAMAX( N, D, 1 ) ) ) +* + DO 70 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 70 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 80 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL SCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN + CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO, + $ B( ST, 1 ), LDB ) + ELSE + CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, WORK( U+ST1 ), N, + $ WORK( VT+ST1 ), IWORK( K+ST1 ), + $ WORK( DIFL+ST1 ), WORK( DIFR+ST1 ), + $ WORK( Z+ST1 ), WORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ), + $ WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 80 CONTINUE +* +* Unscale and sort the singular values. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL SLASRT( 'D', N, D, INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of SLALSD +* + END diff --git a/dspl/liblapack/SRC/slamrg.f b/dspl/liblapack/SRC/slamrg.f new file mode 100644 index 0000000..649d2c5 --- /dev/null +++ b/dspl/liblapack/SRC/slamrg.f @@ -0,0 +1,171 @@ +*> \brief \b SLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single set sorted in ascending order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAMRG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAMRG( N1, N2, A, STRD1, STRD2, INDEX ) +* +* .. Scalar Arguments .. +* INTEGER N1, N2, STRD1, STRD2 +* .. +* .. Array Arguments .. +* INTEGER INDEX( * ) +* REAL A( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMRG will create a permutation list which will merge the elements +*> of A (which is composed of two independently sorted sets) into a +*> single set which is sorted in ascending order. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> These arguments contain the respective lengths of the two +*> sorted lists to be merged. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (N1+N2) +*> The first N1 elements of A contain a list of numbers which +*> are sorted in either ascending or descending order. Likewise +*> for the final N2 elements. +*> \endverbatim +*> +*> \param[in] STRD1 +*> \verbatim +*> STRD1 is INTEGER +*> \endverbatim +*> +*> \param[in] STRD2 +*> \verbatim +*> STRD2 is INTEGER +*> These are the strides to be taken through the array A. +*> Allowable strides are 1 and -1. They indicate whether a +*> subset of A is sorted in ascending (STRDx = 1) or descending +*> (STRDx = -1) order. +*> \endverbatim +*> +*> \param[out] INDEX +*> \verbatim +*> INDEX is INTEGER array, dimension (N1+N2) +*> On exit this array will contain a permutation such that +*> if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be +*> sorted in ascending order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLAMRG( N1, N2, A, STRD1, STRD2, INDEX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER N1, N2, STRD1, STRD2 +* .. +* .. Array Arguments .. + INTEGER INDEX( * ) + REAL A( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IND1, IND2, N1SV, N2SV +* .. +* .. Executable Statements .. +* + N1SV = N1 + N2SV = N2 + IF( STRD1.GT.0 ) THEN + IND1 = 1 + ELSE + IND1 = N1 + END IF + IF( STRD2.GT.0 ) THEN + IND2 = 1 + N1 + ELSE + IND2 = N1 + N2 + END IF + I = 1 +* while ( (N1SV > 0) & (N2SV > 0) ) + 10 CONTINUE + IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN + IF( A( IND1 ).LE.A( IND2 ) ) THEN + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + N1SV = N1SV - 1 + ELSE + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + N2SV = N2SV - 1 + END IF + GO TO 10 + END IF +* end while + IF( N1SV.EQ.0 ) THEN + DO 20 N1SV = 1, N2SV + INDEX( I ) = IND2 + I = I + 1 + IND2 = IND2 + STRD2 + 20 CONTINUE + ELSE +* N2SV .EQ. 0 + DO 30 N2SV = 1, N1SV + INDEX( I ) = IND1 + I = I + 1 + IND1 = IND1 + STRD1 + 30 CONTINUE + END IF +* + RETURN +* +* End of SLAMRG +* + END diff --git a/dspl/liblapack/SRC/slamswlq.f b/dspl/liblapack/SRC/slamswlq.f new file mode 100644 index 0000000..b13d02b --- /dev/null +++ b/dspl/liblapack/SRC/slamswlq.f @@ -0,0 +1,418 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMSWLQ overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product of blocked +*> elementary reflectors computed by short wide LQ +*> factorization (SLASWLQ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> M >= K >= 0; +*> +*> \endverbatim +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > M. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the blocked +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SLASWLQ in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension +*> ( M * Number of blocks(CEIL(N-K/NB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,NB) * MB; +*> if SIDE = 'R', LWORK >= max(1,M) * MB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL STPMLQT, SGEMLQT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * MB + ELSE + LW = M * MB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, MB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAMSWLQ', -INFO ) + WORK(1) = LW + RETURN + ELSE IF (LQUERY) THEN + WORK(1) = LW + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN + CALL SGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(NB-K)) + CTR = (M-K)/(NB-K) +* + IF (KK.GT.0) THEN + II=M-KK+1 + CALL STPMLQT('L','T',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+NB) +* + CTR = CTR - 1 + CALL STPMLQT('L','T',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + END DO +* +* Multiply Q to the first block of C (1:M,1:NB) +* + CALL SGEMLQT('L','T',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(NB-K)) + II=M-KK+1 + CTR = 1 + CALL SGEMLQT('L','N',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (I:I+NB,1:N) +* + CALL STPMLQT('L','N',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1,CTR * K+1), LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL STPMLQT('L','N',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(NB-K)) + CTR = (N-K)/(NB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL STPMLQT('R','N',M , KK, K, 0, MB, A(1, II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL STPMLQT('R','N', M, NB-K, K, 0, MB, A(1, I), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL SGEMLQT('R','N',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(NB-K)) + II=N-KK+1 + CTR = 1 + CALL SGEMLQT('R','T',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL STPMLQT('R','T',M , NB-K, K, 0,MB, A(1,I), LDA, + $ T(1, CTR*K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL STPMLQT('R','T',M , KK, K, 0,MB, A(1,II), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of SLAMSWLQ +* + END diff --git a/dspl/liblapack/SRC/slamtsqr.f b/dspl/liblapack/SRC/slamtsqr.f new file mode 100644 index 0000000..84ac86e --- /dev/null +++ b/dspl/liblapack/SRC/slamtsqr.f @@ -0,0 +1,415 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* DOUBLE A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAMTSQR overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (DLATSQR) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> N >= K >= 0; +*> +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > N. (must be the same as DLATSQR) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> blockedelementary reflector H(i), for i = 1,2,...,k, as +*> returned by DLATSQR in the first k columns of +*> its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension +*> ( N * Number of blocks(CEIL(M-K/MB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If SIDE = 'L', LWORK >= max(1,N)*NB; +*> if SIDE = 'R', LWORK >= max(1,MB)*NB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL SGEMQRT, STPMQRT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'T' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * NB + ELSE + LW = MB * NB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, NB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* +* Determine the block size if it is tall skinny or short and wide +* + IF( INFO.EQ.0) THEN + WORK(1) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAMTSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((MB.LE.K).OR.(MB.GE.MAX(M,N,K))) THEN + CALL SGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(MB-K)) + CTR = (M-K)/(MB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL STPMQRT('L','N',KK , N, K, 0, NB, A(II,1), LDA, + $ T(1,CTR*K+1),LDT , C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CTR = CTR - 1 + CALL STPMQRT('L','N',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) +* + END DO +* +* Multiply Q to the first block of C (1:MB,1:N) +* + CALL SGEMQRT('L','N',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(MB-K)) + II=M-KK+1 + CTR = 1 + CALL SGEMQRT('L','T',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CALL STPMQRT('L','T',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR * K + 1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL STPMQRT('L','T',KK , N, K, 0,NB, A(II,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(MB-K)) + CTR = (N-K)/(MB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL STPMQRT('R','T',M , KK, K, 0, NB, A(II,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL STPMQRT('R','T',M , MB-K, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) +* + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL SGEMQRT('R','T',M , MB, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(MB-K)) + II=N-KK+1 + CTR = 1 + CALL SGEMQRT('R','N', M, MB , K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL STPMQRT('R','N', M, MB-K, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL STPMQRT('R','N', M, KK , K, 0,NB, A(II,1), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of SLAMTSQR +* + END diff --git a/dspl/liblapack/SRC/slaneg.f b/dspl/liblapack/SRC/slaneg.f new file mode 100644 index 0000000..dcb11df --- /dev/null +++ b/dspl/liblapack/SRC/slaneg.f @@ -0,0 +1,227 @@ +*> \brief \b SLANEG computes the Sturm count. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANEG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION SLANEG( N, D, LLD, SIGMA, PIVMIN, R ) +* +* .. Scalar Arguments .. +* INTEGER N, R +* REAL PIVMIN, SIGMA +* .. +* .. Array Arguments .. +* REAL D( * ), LLD( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANEG computes the Sturm count, the number of negative pivots +*> encountered while factoring tridiagonal T - sigma I = L D L^T. +*> This implementation works directly on the factors without forming +*> the tridiagonal matrix T. The Sturm count is also the number of +*> eigenvalues of T less than sigma. +*> +*> This routine is called from SLARRB. +*> +*> The current routine does not use the PIVMIN parameter but rather +*> requires IEEE-754 propagation of Infinities and NaNs. This +*> routine also has no input range restrictions but does require +*> default exception handling such that x/0 produces Inf when x is +*> non-zero, and Inf/Inf produces NaN. For more information, see: +*> +*> Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in +*> Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on +*> Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 +*> (Tech report version in LAWN 172 with the same title.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is REAL array, dimension (N-1) +*> The (N-1) elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is REAL +*> Shift amount in T - sigma I = L D L^T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence. May be used +*> when zero pivots are encountered on non-IEEE-754 +*> architectures. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization that is used +*> for the negcount. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +*> Jason Riedy, University of California, Berkeley, USA \n +*> +* ===================================================================== + INTEGER FUNCTION SLANEG( N, D, LLD, SIGMA, PIVMIN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, R + REAL PIVMIN, SIGMA +* .. +* .. Array Arguments .. + REAL D( * ), LLD( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* Some architectures propagate Infinities and NaNs very slowly, so +* the code computes counts in BLKLEN chunks. Then a NaN can +* propagate at most BLKLEN columns before being detected. This is +* not a general tuning parameter; it needs only to be just large +* enough that the overhead is tiny in common cases. + INTEGER BLKLEN + PARAMETER ( BLKLEN = 128 ) +* .. +* .. Local Scalars .. + INTEGER BJ, J, NEG1, NEG2, NEGCNT + REAL BSAV, DMINUS, DPLUS, GAMMA, P, T, TMP + LOGICAL SAWNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL SISNAN + EXTERNAL SISNAN +* .. +* .. Executable Statements .. + + NEGCNT = 0 + +* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T + T = -SIGMA + DO 210 BJ = 1, R-1, BLKLEN + NEG1 = 0 + BSAV = T + DO 21 J = BJ, MIN(BJ+BLKLEN-1, R-1) + DPLUS = D( J ) + T + IF( DPLUS.LT.ZERO ) NEG1 = NEG1 + 1 + TMP = T / DPLUS + T = TMP * LLD( J ) - SIGMA + 21 CONTINUE + SAWNAN = SISNAN( T ) +* Run a slower version of the above loop if a NaN is detected. +* A NaN should occur only with a zero pivot after an infinite +* pivot. In that case, substituting 1 for T/DPLUS is the +* correct limit. + IF( SAWNAN ) THEN + NEG1 = 0 + T = BSAV + DO 22 J = BJ, MIN(BJ+BLKLEN-1, R-1) + DPLUS = D( J ) + T + IF( DPLUS.LT.ZERO ) NEG1 = NEG1 + 1 + TMP = T / DPLUS + IF (SISNAN(TMP)) TMP = ONE + T = TMP * LLD(J) - SIGMA + 22 CONTINUE + END IF + NEGCNT = NEGCNT + NEG1 + 210 CONTINUE +* +* II) lower part: L D L^T - SIGMA I = U- D- U-^T + P = D( N ) - SIGMA + DO 230 BJ = N-1, R, -BLKLEN + NEG2 = 0 + BSAV = P + DO 23 J = BJ, MAX(BJ-BLKLEN+1, R), -1 + DMINUS = LLD( J ) + P + IF( DMINUS.LT.ZERO ) NEG2 = NEG2 + 1 + TMP = P / DMINUS + P = TMP * D( J ) - SIGMA + 23 CONTINUE + SAWNAN = SISNAN( P ) +* As above, run a slower version that substitutes 1 for Inf/Inf. +* + IF( SAWNAN ) THEN + NEG2 = 0 + P = BSAV + DO 24 J = BJ, MAX(BJ-BLKLEN+1, R), -1 + DMINUS = LLD( J ) + P + IF( DMINUS.LT.ZERO ) NEG2 = NEG2 + 1 + TMP = P / DMINUS + IF (SISNAN(TMP)) TMP = ONE + P = TMP * D(J) - SIGMA + 24 CONTINUE + END IF + NEGCNT = NEGCNT + NEG2 + 230 CONTINUE +* +* III) Twist index +* T was shifted by SIGMA initially. + GAMMA = (T + SIGMA) + P + IF( GAMMA.LT.ZERO ) NEGCNT = NEGCNT+1 + + SLANEG = NEGCNT + END diff --git a/dspl/liblapack/SRC/slangb.f b/dspl/liblapack/SRC/slangb.f new file mode 100644 index 0000000..fd538b1 --- /dev/null +++ b/dspl/liblapack/SRC/slangb.f @@ -0,0 +1,225 @@ +*> \brief \b SLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANGB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +*> \endverbatim +*> +*> \return SLANGB +*> \verbatim +*> +*> SLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANGB as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANGB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of sub-diagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of super-diagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBauxiliary +* +* ===================================================================== + REAL FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + REAL SCALE, SUM, VALUE, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + TEMP = ABS( AB( I, J ) ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANGB = VALUE + RETURN +* +* End of SLANGB +* + END diff --git a/dspl/liblapack/SRC/slange.f b/dspl/liblapack/SRC/slange.f new file mode 100644 index 0000000..2eb8d7d --- /dev/null +++ b/dspl/liblapack/SRC/slange.f @@ -0,0 +1,211 @@ +*> \brief \b SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANGE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real matrix A. +*> \endverbatim +*> +*> \return SLANGE +*> \verbatim +*> +*> SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANGE as described +*> above. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. When M = 0, +*> SLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. When N = 0, +*> SLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEauxiliary +* +* ===================================================================== + REAL FUNCTION SLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + TEMP = ABS( A( I, J ) ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. SISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANGE = VALUE + RETURN +* +* End of SLANGE +* + END diff --git a/dspl/liblapack/SRC/slangt.f b/dspl/liblapack/SRC/slangt.f new file mode 100644 index 0000000..6f32021 --- /dev/null +++ b/dspl/liblapack/SRC/slangt.f @@ -0,0 +1,208 @@ +*> \brief \b SLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANGT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANGT( NORM, N, DL, D, DU ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* REAL D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANGT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real tridiagonal matrix A. +*> \endverbatim +*> +*> \return SLANGT +*> \verbatim +*> +*> SLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANGT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANGT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is REAL array, dimension (N-1) +*> The (n-1) sub-diagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is REAL array, dimension (N-1) +*> The (n-1) super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + IF( ANORM.LT.ABS( DL( I ) ) .OR. SISNAN( ABS( DL( I ) ) ) ) + $ ANORM = ABS(DL(I)) + IF( ANORM.LT.ABS( D( I ) ) .OR. SISNAN( ABS( D( I ) ) ) ) + $ ANORM = ABS(D(I)) + IF( ANORM.LT.ABS( DU( I ) ) .OR. SISNAN (ABS( DU( I ) ) ) ) + $ ANORM = ABS(DU(I)) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + DO 20 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + DO 30 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) + IF( ANORM .LT. TEMP .OR. SISNAN( TEMP ) ) ANORM = TEMP + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL SLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL SLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL SLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + SLANGT = ANORM + RETURN +* +* End of SLANGT +* + END diff --git a/dspl/liblapack/SRC/slanhs.f b/dspl/liblapack/SRC/slanhs.f new file mode 100644 index 0000000..c5a077f --- /dev/null +++ b/dspl/liblapack/SRC/slanhs.f @@ -0,0 +1,205 @@ +*> \brief \b SLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANHS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANHS returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> Hessenberg matrix A. +*> \endverbatim +*> +*> \return SLANHS +*> \verbatim +*> +*> SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANHS as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANHS is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The n by n upper Hessenberg matrix A; the part of A below the +*> first sub-diagonal is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANHS = VALUE + RETURN +* +* End of SLANHS +* + END diff --git a/dspl/liblapack/SRC/slansb.f b/dspl/liblapack/SRC/slansb.f new file mode 100644 index 0000000..8f3fe1e --- /dev/null +++ b/dspl/liblapack/SRC/slansb.f @@ -0,0 +1,258 @@ +*> \brief \b SLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANSB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n symmetric band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return SLANSB +*> \verbatim +*> +*> SLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANSB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular part is supplied +*> = 'L': Lower triangular part is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANSB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSB = VALUE + RETURN +* +* End of SLANSB +* + END diff --git a/dspl/liblapack/SRC/slansf.f b/dspl/liblapack/SRC/slansf.f new file mode 100644 index 0000000..eab99a9 --- /dev/null +++ b/dspl/liblapack/SRC/slansf.f @@ -0,0 +1,964 @@ +*> \brief \b SLANSF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANSF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, TRANSR, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL A( 0: * ), WORK( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANSF returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A in RFP format. +*> \endverbatim +*> +*> \return SLANSF +*> \verbatim +*> +*> SLANSF = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANSF as described +*> above. +*> \endverbatim +*> +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> Specifies whether the RFP format of A is normal or +*> transposed format. +*> = 'N': RFP format is Normal; +*> = 'T': RFP format is Transpose. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> = 'U': RFP A came from an upper triangular matrix; +*> = 'L': RFP A came from a lower triangular matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANSF is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension ( N*(N+1)/2 ); +*> On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') +*> part of the symmetric matrix A stored in RFP format. See the +*> "Notes" below for more details. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + REAL FUNCTION SLANSF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, TRANSR, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL A( 0: * ), WORK( 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA + REAL SCALE, S, VALUE, AA, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + SLANSF = ZERO + RETURN + ELSE IF( N.EQ.1 ) THEN + SLANSF = ABS( A(0) ) + RETURN + END IF +* +* set noe = 1 if n is odd. if n is even set noe=0 +* + NOE = 1 + IF( MOD( N, 2 ).EQ.0 ) + $ NOE = 0 +* +* set ifm = 0 when form='T or 't' and 1 otherwise +* + IFM = 1 + IF( LSAME( TRANSR, 'T' ) ) + $ IFM = 0 +* +* set ilu = 0 when uplo='U or 'u' and 1 otherwise +* + ILU = 1 + IF( LSAME( UPLO, 'U' ) ) + $ ILU = 0 +* +* set lda = (n+1)/2 when ifm = 0 +* set lda = n when ifm = 1 and noe = 1 +* set lda = n+1 when ifm = 1 and noe = 0 +* + IF( IFM.EQ.1 ) THEN + IF( NOE.EQ.1 ) THEN + LDA = N + ELSE +* noe=0 + LDA = N + 1 + END IF + ELSE +* ifm=0 + LDA = ( N+1 ) / 2 + END IF +* + IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = ( N+1 ) / 2 + VALUE = ZERO + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is n by k + DO J = 0, K - 1 + DO I = 0, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* xpose case; A is k by n + DO J = 0, N - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is n+1 by k + DO J = 0, K - 1 + DO I = 0, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* xpose case; A is k by n+1 + DO J = 0, N + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + IF( IFM.EQ.1 ) THEN + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd + IF( ILU.EQ.0 ) THEN + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + IF( I.EQ.K+K ) + $ GO TO 10 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + 10 CONTINUE + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + IF( J.GT.0 ) THEN + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + END IF + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even + IF( ILU.EQ.0 ) THEN + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + ELSE +* ifm=0 + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd + IF( ILU.EQ.0 ) THEN + N1 = K +* n/2 + K = K + 1 +* k is the row size and lda + DO I = N1, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, N1 - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,n1+i) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=n1=k-1 is special + S = ABS( A( 0+J*LDA ) ) +* A(k-1,k-1) + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,i+n1) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K, N - 1 + S = ZERO + DO I = 0, J - K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-k + AA = ABS( A( I+J*LDA ) ) +* A(j-k,j-k) + S = S + AA + WORK( J-K ) = WORK( J-K ) + S + I = I + 1 + S = ABS( A( I+J*LDA ) ) +* A(j,j) + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 2 +* process + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* i=j so process of A(j,j) + S = S + AA + WORK( J ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( A( I+J*LDA ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k-1 is special :process col A(k-1,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K, N - 1 +* process col j of A = A(j,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even + IF( ILU.EQ.0 ) THEN + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i+k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=k + AA = ABS( A( 0+J*LDA ) ) +* A(k,k) + S = AA + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k,k+i) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K + 1, N - 1 + S = ZERO + DO I = 0, J - 2 - K + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-1-k + AA = ABS( A( I+J*LDA ) ) +* A(j-k-1,j-k-1) + S = S + AA + WORK( J-K-1 ) = WORK( J-K-1 ) + S + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,j) + S = AA + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO +* j=n + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(i,k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = WORK( I ) + S + VALUE = WORK ( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO +* j=0 is special :process col A(k:n-1,k) + S = ABS( A( 0 ) ) +* A(k,k) + DO I = 1, K - 1 + AA = ABS( A( I ) ) +* A(k+i,k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( K ) = WORK( K ) + S + DO J = 1, K - 1 +* process + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( A( I+J*LDA ) ) +* i=j-1 so process of A(j-1,j-1) + S = S + AA + WORK( J-1 ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( A( I+J*LDA ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k is special :process col A(k,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K + 1, N +* process col j-1 of A = A(j-1,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J-1 ) = WORK( J-1 ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. SISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + K = ( N+1 ) / 2 + SCALE = ZERO + S = ONE + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 3 + CALL SLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) +* L at A(k,0) + END DO + DO J = 0, K - 1 + CALL SLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K-1, A( K ), LDA+1, SCALE, S ) +* tri L at A(k,0) + CALL SLASSQ( K, A( K-1 ), LDA+1, SCALE, S ) +* tri U at A(k-1,0) + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL SLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* trap L at A(0,0) + END DO + DO J = 0, K - 2 + CALL SLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri L at A(0,0) + CALL SLASSQ( K-1, A( 0+LDA ), LDA+1, SCALE, S ) +* tri U at A(0,1) + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**T is upper + DO J = 1, K - 2 + CALL SLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) +* U at A(0,k) + END DO + DO J = 0, K - 2 + CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL SLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, + $ SCALE, S ) +* L at A(0,k-1) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K-1, A( 0+K*LDA ), LDA+1, SCALE, S ) +* tri U at A(0,k) + CALL SLASSQ( K, A( 0+( K-1 )*LDA ), LDA+1, SCALE, S ) +* tri L at A(0,k-1) + ELSE +* A**T is lower + DO J = 1, K - 1 + CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + DO J = K, N - 1 + CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,k) + END DO + DO J = 0, K - 3 + CALL SLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) +* L at A(1,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + CALL SLASSQ( K-1, A( 1 ), LDA+1, SCALE, S ) +* tri L at A(1,0) + END IF + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 2 + CALL SLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) +* L at A(k+1,0) + END DO + DO J = 0, K - 1 + CALL SLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( K+1 ), LDA+1, SCALE, S ) +* tri L at A(k+1,0) + CALL SLASSQ( K, A( K ), LDA+1, SCALE, S ) +* tri U at A(k,0) + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL SLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) +* trap L at A(1,0) + END DO + DO J = 1, K - 1 + CALL SLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( 1 ), LDA+1, SCALE, S ) +* tri L at A(1,0) + CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**T is upper + DO J = 1, K - 1 + CALL SLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) +* U at A(0,k+1) + END DO + DO J = 0, K - 1 + CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL SLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, + $ S ) +* L at A(0,k) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( 0+( K+1 )*LDA ), LDA+1, SCALE, S ) +* tri U at A(0,k+1) + CALL SLASSQ( K, A( 0+K*LDA ), LDA+1, SCALE, S ) +* tri L at A(0,k) + ELSE +* A**T is lower + DO J = 1, K - 1 + CALL SLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + DO J = K + 1, N + CALL SLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,k+1) + END DO + DO J = 0, K - 2 + CALL SLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* L at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + CALL SLASSQ( K, A( LDA ), LDA+1, SCALE, S ) +* tri L at A(0,1) + CALL SLASSQ( K, A( 0 ), LDA+1, SCALE, S ) +* tri U at A(0,0) + END IF + END IF + END IF + VALUE = SCALE*SQRT( S ) + END IF +* + SLANSF = VALUE + RETURN +* +* End of SLANSF +* + END diff --git a/dspl/liblapack/SRC/slansp.f b/dspl/liblapack/SRC/slansp.f new file mode 100644 index 0000000..35390cd --- /dev/null +++ b/dspl/liblapack/SRC/slansp.f @@ -0,0 +1,261 @@ +*> \brief \b SLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANSP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANSP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return SLANSP +*> \verbatim +*> +*> SLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANSP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANSP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( AP( K ).NE.ZERO ) THEN + ABSA = ABS( AP( K ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSP = VALUE + RETURN +* +* End of SLANSP +* + END diff --git a/dspl/liblapack/SRC/slanst.f b/dspl/liblapack/SRC/slanst.f new file mode 100644 index 0000000..4b589ba --- /dev/null +++ b/dspl/liblapack/SRC/slanst.f @@ -0,0 +1,186 @@ +*> \brief \b SLANST returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANST( NORM, N, D, E ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANST returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric tridiagonal matrix A. +*> \endverbatim +*> +*> \return SLANST +*> \verbatim +*> +*> SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANST as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANST is +*> set to zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) sub-diagonal or super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANST( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + SUM = ABS( D( I ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + SUM = ABS( E( I ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) + SUM = ABS( E( N-1 ) )+ABS( D( N ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + DO 20 I = 2, N - 1 + SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) + IF( ANORM .LT. SUM .OR. SISNAN( SUM ) ) ANORM = SUM + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL SLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL SLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + SLANST = ANORM + RETURN +* +* End of SLANST +* + END diff --git a/dspl/liblapack/SRC/slansy.f b/dspl/liblapack/SRC/slansy.f new file mode 100644 index 0000000..c8400e5 --- /dev/null +++ b/dspl/liblapack/SRC/slansy.f @@ -0,0 +1,241 @@ +*> \brief \b SLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANSY returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> real symmetric matrix A. +*> \endverbatim +*> +*> \return SLANSY +*> \verbatim +*> +*> SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANSY as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANSY is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYauxiliary +* +* ===================================================================== + REAL FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL ABSA, SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL SLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANSY = VALUE + RETURN +* +* End of SLANSY +* + END diff --git a/dspl/liblapack/SRC/slantb.f b/dspl/liblapack/SRC/slantb.f new file mode 100644 index 0000000..3588779 --- /dev/null +++ b/dspl/liblapack/SRC/slantb.f @@ -0,0 +1,361 @@ +*> \brief \b SLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANTB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB, +* LDAB, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANTB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n triangular band matrix A, with ( k + 1 ) diagonals. +*> \endverbatim +*> +*> \return SLANTB +*> \verbatim +*> +*> SLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANTB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANTB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first k+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that when DIAG = 'U', the elements of the array AB +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL SLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTB = VALUE + RETURN +* +* End of SLANTB +* + END diff --git a/dspl/liblapack/SRC/slantp.f b/dspl/liblapack/SRC/slantp.f new file mode 100644 index 0000000..1423f5c --- /dev/null +++ b/dspl/liblapack/SRC/slantp.f @@ -0,0 +1,355 @@ +*> \brief \b SLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANTP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> triangular matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return SLANTP +*> \verbatim +*> +*> SLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANTP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, SLANTP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that when DIAG = 'U', the elements of the array AP +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL SLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTP = VALUE + RETURN +* +* End of SLANTP +* + END diff --git a/dspl/liblapack/SRC/slantr.f b/dspl/liblapack/SRC/slantr.f new file mode 100644 index 0000000..63b8558 --- /dev/null +++ b/dspl/liblapack/SRC/slantr.f @@ -0,0 +1,353 @@ +*> \brief \b SLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANTR returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> trapezoidal or triangular matrix A. +*> \endverbatim +*> +*> \return SLANTR +*> \verbatim +*> +*> SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in SLANTR as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower trapezoidal. +*> = 'U': Upper trapezoidal +*> = 'L': Lower trapezoidal +*> Note that A is triangular instead of trapezoidal if M = N. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A has unit diagonal. +*> = 'N': Non-unit diagonal +*> = 'U': Unit diagonal +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0, and if +*> UPLO = 'U', M <= N. When M = 0, SLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0, and if +*> UPLO = 'L', N <= M. When N = 0, SLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The trapezoidal matrix A (A is triangular if M = N). +*> If UPLO = 'U', the leading m by n upper trapezoidal part of +*> the array A contains the upper trapezoidal matrix, and the +*> strictly lower triangular part of A is not referenced. +*> If UPLO = 'L', the leading m by n lower trapezoidal part of +*> the array A contains the lower trapezoidal matrix, and the +*> strictly upper triangular part of A is not referenced. Note +*> that when DIAG = 'U', the diagonal elements of A are not +*> referenced and are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + REAL SCALE, SUM, VALUE +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. SISNAN( SUM ) ) VALUE = SUM + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + SLANTR = VALUE + RETURN +* +* End of SLANTR +* + END diff --git a/dspl/liblapack/SRC/slanv2.f b/dspl/liblapack/SRC/slanv2.f new file mode 100644 index 0000000..e73e545 --- /dev/null +++ b/dspl/liblapack/SRC/slanv2.f @@ -0,0 +1,289 @@ +*> \brief \b SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric matrix in standard form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLANV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* .. Scalar Arguments .. +* REAL A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric +*> matrix in standard form: +*> +*> [ A B ] = [ CS -SN ] [ AA BB ] [ CS SN ] +*> [ C D ] [ SN CS ] [ CC DD ] [-SN CS ] +*> +*> where either +*> 1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or +*> 2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex +*> conjugate eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] A +*> \verbatim +*> A is REAL +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL +*> On entry, the elements of the input matrix. +*> On exit, they are overwritten by the elements of the +*> standardised Schur form. +*> \endverbatim +*> +*> \param[out] RT1R +*> \verbatim +*> RT1R is REAL +*> \endverbatim +*> +*> \param[out] RT1I +*> \verbatim +*> RT1I is REAL +*> \endverbatim +*> +*> \param[out] RT2R +*> \verbatim +*> RT2R is REAL +*> \endverbatim +*> +*> \param[out] RT2I +*> \verbatim +*> RT2I is REAL +*> The real and imaginary parts of the eigenvalues. If the +*> eigenvalues are a complex conjugate pair, RT1I > 0. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is REAL +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is REAL +*> Parameters of the rotation matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by V. Sima, Research Institute for Informatics, Bucharest, +*> Romania, to reduce the risk of cancellation errors, +*> when computing real eigenvalues, and to ensure, if possible, that +*> abs(RT1R) >= abs(RT2R). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) + REAL MULTPL + PARAMETER ( MULTPL = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, + $ SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'P' ) + IF( C.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + GO TO 10 +* + ELSE IF( B.EQ.ZERO ) THEN +* +* Swap rows and columns +* + CS = ZERO + SN = ONE + TEMP = D + D = A + A = TEMP + B = -C + C = ZERO + GO TO 10 + ELSE IF( (A-D).EQ.ZERO .AND. SIGN( ONE, B ).NE. + $ SIGN( ONE, C ) ) THEN + CS = ONE + SN = ZERO + GO TO 10 + ELSE +* + TEMP = A - D + P = HALF*TEMP + BCMAX = MAX( ABS( B ), ABS( C ) ) + BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C ) + SCALE = MAX( ABS( P ), BCMAX ) + Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS +* +* If Z is of the order of the machine accuracy, postpone the +* decision on the nature of eigenvalues +* + IF( Z.GE.MULTPL*EPS ) THEN +* +* Real eigenvalues. Compute A and D. +* + Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P ) + A = D + Z + D = D - ( BCMAX / Z )*BCMIS +* +* Compute B and the rotation matrix +* + TAU = SLAPY2( C, Z ) + CS = Z / TAU + SN = C / TAU + B = B - C + C = ZERO + ELSE +* +* Complex eigenvalues, or real (almost) equal eigenvalues. +* Make diagonal elements equal. +* + SIGMA = B + C + TAU = SLAPY2( SIGMA, TEMP ) + CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) ) + SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA ) +* +* Compute [ AA BB ] = [ A B ] [ CS -SN ] +* [ CC DD ] [ C D ] [ SN CS ] +* + AA = A*CS + B*SN + BB = -A*SN + B*CS + CC = C*CS + D*SN + DD = -C*SN + D*CS +* +* Compute [ A B ] = [ CS SN ] [ AA BB ] +* [ C D ] [-SN CS ] [ CC DD ] +* + A = AA*CS + CC*SN + B = BB*CS + DD*SN + C = -AA*SN + CC*CS + D = -BB*SN + DD*CS +* + TEMP = HALF*( A+D ) + A = TEMP + D = TEMP +* + IF( C.NE.ZERO ) THEN + IF( B.NE.ZERO ) THEN + IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN +* +* Real eigenvalues: reduce to upper triangular form +* + SAB = SQRT( ABS( B ) ) + SAC = SQRT( ABS( C ) ) + P = SIGN( SAB*SAC, C ) + TAU = ONE / SQRT( ABS( B+C ) ) + A = TEMP + P + D = TEMP - P + B = B - C + C = ZERO + CS1 = SAB*TAU + SN1 = SAC*TAU + TEMP = CS*CS1 - SN*SN1 + SN = CS*SN1 + SN*CS1 + CS = TEMP + END IF + ELSE + B = -C + C = ZERO + TEMP = CS + CS = -SN + SN = TEMP + END IF + END IF + END IF +* + END IF +* + 10 CONTINUE +* +* Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). +* + RT1R = A + RT2R = D + IF( C.EQ.ZERO ) THEN + RT1I = ZERO + RT2I = ZERO + ELSE + RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) ) + RT2I = -RT1I + END IF + RETURN +* +* End of SLANV2 +* + END diff --git a/dspl/liblapack/SRC/slapll.f b/dspl/liblapack/SRC/slapll.f new file mode 100644 index 0000000..498fe85 --- /dev/null +++ b/dspl/liblapack/SRC/slapll.f @@ -0,0 +1,165 @@ +*> \brief \b SLAPLL measures the linear dependence of two vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPLL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* REAL SSMIN +* .. +* .. Array Arguments .. +* REAL X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given two column vectors X and Y, let +*> +*> A = ( X Y ). +*> +*> The subroutine first computes the QR factorization of A = Q*R, +*> and then computes the SVD of the 2-by-2 upper triangular matrix R. +*> The smaller singular value of R is returned in SSMIN, which is used +*> as the measurement of the linear dependency of the vectors X and Y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vectors X and Y. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, +*> dimension (1+(N-1)*INCX) +*> On entry, X contains the N-vector X. +*> On exit, X is overwritten. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, +*> dimension (1+(N-1)*INCY) +*> On entry, Y contains the N-vector Y. +*> On exit, Y is overwritten. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is REAL +*> The smallest singular value of the N-by-2 matrix A = ( X Y ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + REAL SSMIN +* .. +* .. Array Arguments .. + REAL X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL A11, A12, A22, C, SSMAX, TAU +* .. +* .. External Functions .. + REAL SDOT + EXTERNAL SDOT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SLAS2 +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL SLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = ONE +* + C = -TAU*SDOT( N, X, INCX, Y, INCY ) + CALL SAXPY( N, C, X, INCX, Y, INCY ) +* + CALL SLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL SLAS2( A11, A12, A22, SSMIN, SSMAX ) +* + RETURN +* +* End of SLAPLL +* + END diff --git a/dspl/liblapack/SRC/slapmr.f b/dspl/liblapack/SRC/slapmr.f new file mode 100644 index 0000000..0473cd3 --- /dev/null +++ b/dspl/liblapack/SRC/slapmr.f @@ -0,0 +1,204 @@ +*> \brief \b SLAPMR rearranges rows of a matrix as specified by a permutation vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAPMR( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* REAL X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPMR rearranges the rows of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (M) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAPMR( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + REAL X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IN, J, JJ + REAL TEMP +* .. +* .. Executable Statements .. +* + IF( M.LE.1 ) + $ RETURN +* + DO 10 I = 1, M + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 JJ = 1, N + TEMP = X( J, JJ ) + X( J, JJ ) = X( IN, JJ ) + X( IN, JJ ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 JJ = 1, N + TEMP = X( I, JJ ) + X( I, JJ ) = X( J, JJ ) + X( J, JJ ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END + diff --git a/dspl/liblapack/SRC/slapmt.f b/dspl/liblapack/SRC/slapmt.f new file mode 100644 index 0000000..67e8aab --- /dev/null +++ b/dspl/liblapack/SRC/slapmt.f @@ -0,0 +1,203 @@ +*> \brief \b SLAPMT performs a forward or backward permutation of the columns of a matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPMT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAPMT( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* REAL X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPMT rearranges the columns of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (N) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + REAL X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, J, IN + REAL TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 60 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 60 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 110 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 100 +* + K( I ) = -K( I ) + J = K( I ) + 80 CONTINUE + IF( J.EQ.I ) + $ GO TO 100 +* + DO 90 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 90 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 80 +* + 100 CONTINUE + + 110 CONTINUE +* + END IF +* + RETURN +* +* End of SLAPMT +* + END diff --git a/dspl/liblapack/SRC/slapy2.f b/dspl/liblapack/SRC/slapy2.f new file mode 100644 index 0000000..3b1b7e4 --- /dev/null +++ b/dspl/liblapack/SRC/slapy2.f @@ -0,0 +1,119 @@ +*> \brief \b SLAPY2 returns sqrt(x2+y2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLAPY2( X, Y ) +* +* .. Scalar Arguments .. +* REAL X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary +*> overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is REAL +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is REAL +*> X and Y specify the values x and y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAPY2( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL X, Y +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, Z + LOGICAL X_IS_NAN, Y_IS_NAN +* .. +* .. External Functions .. + LOGICAL SISNAN + EXTERNAL SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* .. +* .. Executable Statements .. +* + X_IS_NAN = SISNAN( X ) + Y_IS_NAN = SISNAN( Y ) + IF ( X_IS_NAN ) SLAPY2 = X + IF ( Y_IS_NAN ) SLAPY2 = Y +* + IF ( .NOT.( X_IS_NAN.OR.Y_IS_NAN ) ) THEN + XABS = ABS( X ) + YABS = ABS( Y ) + W = MAX( XABS, YABS ) + Z = MIN( XABS, YABS ) + IF( Z.EQ.ZERO ) THEN + SLAPY2 = W + ELSE + SLAPY2 = W*SQRT( ONE+( Z / W )**2 ) + END IF + END IF + RETURN +* +* End of SLAPY2 +* + END diff --git a/dspl/liblapack/SRC/slapy3.f b/dspl/liblapack/SRC/slapy3.f new file mode 100644 index 0000000..04a847f --- /dev/null +++ b/dspl/liblapack/SRC/slapy3.f @@ -0,0 +1,111 @@ +*> \brief \b SLAPY3 returns sqrt(x2+y2+z2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAPY3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* REAL FUNCTION SLAPY3( X, Y, Z ) +* +* .. Scalar Arguments .. +* REAL X, Y, Z +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +*> unnecessary overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is REAL +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is REAL +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL +*> X, Y and Z specify the values x, y and z. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + REAL FUNCTION SLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL X, Y, Z +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + SLAPY3 = XABS + YABS + ZABS + ELSE + SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of SLAPY3 +* + END diff --git a/dspl/liblapack/SRC/slaqgb.f b/dspl/liblapack/SRC/slaqgb.f new file mode 100644 index 0000000..633befa --- /dev/null +++ b/dspl/liblapack/SRC/slaqgb.f @@ -0,0 +1,256 @@ +*> \brief \b SLAQGB scales a general band matrix, using row and column scaling factors computed by sgbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER KL, KU, LDAB, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQGB equilibrates a general M by N band matrix A with KL +*> subdiagonals and KU superdiagonals using the row and scaling factors +*> in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, the equilibrated matrix, in the same storage format +*> as A. See EQUED for the form of the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDA >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is REAL +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is REAL +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGBauxiliary +* +* ===================================================================== + SUBROUTINE SLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of SLAQGB +* + END diff --git a/dspl/liblapack/SRC/slaqge.f b/dspl/liblapack/SRC/slaqge.f new file mode 100644 index 0000000..0622ff6 --- /dev/null +++ b/dspl/liblapack/SRC/slaqge.f @@ -0,0 +1,236 @@ +*> \brief \b SLAQGE scales a general rectangular matrix, using row and column scaling factors computed by sgeequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER LDA, M, N +* REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQGE equilibrates a general M by N matrix A using the row and +*> column scaling factors in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M by N matrix A. +*> On exit, the equilibrated matrix. See EQUED for the form of +*> the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is REAL array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is REAL +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is REAL +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEauxiliary +* +* ===================================================================== + SUBROUTINE SLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + REAL AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), R( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of SLAQGE +* + END diff --git a/dspl/liblapack/SRC/slaqp2.f b/dspl/liblapack/SRC/slaqp2.f new file mode 100644 index 0000000..e99324b --- /dev/null +++ b/dspl/liblapack/SRC/slaqp2.f @@ -0,0 +1,262 @@ +*> \brief \b SLAQP2 computes a QR factorization with column pivoting of the matrix block. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, +* WORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQP2 computes a QR factorization with column pivoting of +*> the block A(OFFSET+1:M,1:N). +*> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of the matrix A that must be pivoted +*> but no factorized. OFFSET >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +*> the triangular factor obtained; the elements in block +*> A(OFFSET+1:M,1:N) below the diagonal, together with the +*> array TAU, represent the orthogonal matrix Q as a product of +*> elementary reflectors. Block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is REAL array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is REAL array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), TAU( * ), VN1( * ), VN2( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + REAL AII, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL SLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)**T to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = ONE + CALL SLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = SNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of SLAQP2 +* + END diff --git a/dspl/liblapack/SRC/slaqps.f b/dspl/liblapack/SRC/slaqps.f new file mode 100644 index 0000000..9c62ec8 --- /dev/null +++ b/dspl/liblapack/SRC/slaqps.f @@ -0,0 +1,359 @@ +*> \brief \b SLAQPS computes a step of QR factorization with column pivoting of a real m-by-n matrix A by using BLAS level 3. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, +* VN2, AUXV, F, LDF ) +* +* .. Scalar Arguments .. +* INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* REAL A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), +* $ VN1( * ), VN2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQPS computes a step of QR factorization with column pivoting +*> of a real M-by-N matrix A by using Blas-3. It tries to factorize +*> NB columns from A starting from the row OFFSET+1, and updates all +*> of the matrix with Blas-3 xGEMM. +*> +*> In some cases, due to catastrophic cancellations, it cannot +*> factorize NB columns. Hence, the actual number of factorized +*> columns is returned in KB. +*> +*> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of A that have been factorized in +*> previous steps. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to factorize. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns actually factorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, block A(OFFSET+1:M,1:KB) is the triangular +*> factor obtained and block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +*> been updated. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> JPVT(I) = K <==> Column K of the full matrix A has been +*> permuted into position I in AP. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (KB) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is REAL array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is REAL array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[in,out] AUXV +*> \verbatim +*> AUXV is REAL array, dimension (NB) +*> Auxiliar vector. +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is REAL array, dimension (LDF,NB) +*> Matrix F**T = L*Y**T*A. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE SLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + REAL A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ), + $ VN1( * ), VN2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + REAL AKK, TEMP, TEMP2, TOL3Z +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEMV, SLARFG, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, NINT, REAL, SQRT +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH, SNRM2 + EXTERNAL ISAMAX, SLAMCH, SNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(SLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL SSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL SSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**T. +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 ) + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL SLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL SLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = ONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**T*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 20 J = 1, K + F( J, K ) = ZERO + 20 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**T +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ), + $ LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 ) +* + CALL SGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, ONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**T. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF, + $ A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 30 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = REAL( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 30 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**T. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL SGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE, + $ A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE, + $ A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 40 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = SNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 40 + END IF +* + RETURN +* +* End of SLAQPS +* + END diff --git a/dspl/liblapack/SRC/slaqr0.f b/dspl/liblapack/SRC/slaqr0.f new file mode 100644 index 0000000..1dcd3d1 --- /dev/null +++ b/dspl/liblapack/SRC/slaqr0.f @@ -0,0 +1,739 @@ +*> \brief \b SLAQR0 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQR0 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to SGEBAL, and then passed to SGEHRD when the +*> matrix output by SGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H contains +*> the upper quasi-triangular matrix T from the Schur +*> decomposition (the Schur form); 2-by-2 diagonal blocks +*> (corresponding to complex conjugate pairs of eigenvalues) +*> are returned in standard form, with H(i,i) = H(i+1,i+1) +*> and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (IHI) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (IHI) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) +*> and WI(ILO:IHI). If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +*> the eigenvalues are stored in the same order as on the +*> diagonal of the Schur form returned in H, with +*> WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +*> block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then SLAQR0 does a workspace query. +*> In this case, SLAQR0 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, SLAQR0 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is an orthogonal matrix. The final +*> value of H is upper Hessenberg and quasi-triangular +*> in rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE SLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== + REAL WILK1, WILK2 + PARAMETER ( WILK1 = 0.75e0, WILK2 = -0.4375e0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + REAL ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLANV2, SLAQR3, SLAQR4, SLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, MOD, REAL +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use SLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to SLAQR3 ==== +* + CALL SLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(SLAQR5, SLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'SLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if SLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . SLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL SLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use SLAQR4 or +* . SLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL SLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL SLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, WORK, + $ LWORK, INF ) + ELSE + CALL SLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), + $ WI( KS ), 1, 1, ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL SLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR0 ==== +* + END diff --git a/dspl/liblapack/SRC/slaqr1.f b/dspl/liblapack/SRC/slaqr1.f new file mode 100644 index 0000000..7d7d851 --- /dev/null +++ b/dspl/liblapack/SRC/slaqr1.f @@ -0,0 +1,179 @@ +*> \brief \b SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and specified shifts. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) +* +* .. Scalar Arguments .. +* REAL SI1, SI2, SR1, SR2 +* INTEGER LDH, N +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given a 2-by-2 or 3-by-3 matrix H, SLAQR1 sets v to a +*> scalar multiple of the first column of the product +*> +*> (*) K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I) +*> +*> scaling to avoid overflows and most underflows. It +*> is assumed that either +*> +*> 1) sr1 = sr2 and si1 = -si2 +*> or +*> 2) si1 = si2 = 0. +*> +*> This is useful for starting double implicit shift bulges +*> in the QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Order of the matrix H. N must be either 2 or 3. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> The 2-by-2 or 3-by-3 matrix H in (*). +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of H as declared in +*> the calling procedure. LDH.GE.N +*> \endverbatim +*> +*> \param[in] SR1 +*> \verbatim +*> SR1 is REAL +*> \endverbatim +*> +*> \param[in] SI1 +*> \verbatim +*> SI1 is REAL +*> \endverbatim +*> +*> \param[in] SR2 +*> \verbatim +*> SR2 is REAL +*> \endverbatim +*> +*> \param[in] SI2 +*> \verbatim +*> SI2 is REAL +*> The shifts in (*). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (N) +*> A scalar multiple of the first column of the +*> matrix K in (*). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE SLAQR1( N, H, LDH, SR1, SI1, SR2, SI2, V ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL SI1, SI2, SR1, SR2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + REAL H( LDH, * ), V( * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0e0 ) +* .. +* .. Local Scalars .. + REAL H21S, H31S, S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-SR1 )* + $ ( ( H( 1, 1 )-SR2 ) / S ) - SI1*( SI2 / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + END IF + ELSE + S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) + + $ ABS( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-SR1 )*( ( H( 1, 1 )-SR2 ) / S ) - + $ SI1*( SI2 / S ) + H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-SR1-SR2 ) + + $ H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-SR1-SR2 ) + + $ H21S*H( 3, 2 ) + END IF + END IF + END diff --git a/dspl/liblapack/SRC/slaqr2.f b/dspl/liblapack/SRC/slaqr2.f new file mode 100644 index 0000000..8e1f349 --- /dev/null +++ b/dspl/liblapack/SRC/slaqr2.f @@ -0,0 +1,684 @@ +*> \brief \b SLAQR2 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, +* LDT, NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), +* $ V( LDV, * ), WORK( * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQR2 is identical to SLAQR3 except that it avoids +*> recursion by calling SLAHQR instead of SLAQR4. +*> +*> Aggressive early deflation: +*> +*> This subroutine accepts as input an upper Hessenberg matrix +*> H and performs an orthogonal similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an orthogonal similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the quasi-triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the orthogonal matrix Z is updated so +*> so that the orthogonal Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the orthogonal matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by an orthogonal +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the orthogonal +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SR +*> \verbatim +*> SR is REAL array, dimension (KBOT) +*> \endverbatim +*> +*> \param[out] SI +*> \verbatim +*> SI is REAL array, dimension (KBOT) +*> On output, the real and imaginary parts of approximate +*> eigenvalues that may be used for shifts are stored in +*> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +*> SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +*> The real and imaginary parts of converged eigenvalues +*> are stored in SR(KBOT-ND+1) through SR(KBOT) and +*> SI(KBOT-ND+1) through SI(KBOT), respectively. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is REAL array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; SLAQR2 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, + $ LWKOPT + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, + $ SLANV2, SLARF, SLARFG, SLASET, SORMHR, STREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to SGEHRD ==== +* + CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SORMHR ==== +* + CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) +* +* ==== STREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT.BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (STREXC can not fail in this case.) ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undeflatable. Move them up out of the way. +* . Fortunately, STREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL SCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR2 ==== +* + END diff --git a/dspl/liblapack/SRC/slaqr3.f b/dspl/liblapack/SRC/slaqr3.f new file mode 100644 index 0000000..534e2c4 --- /dev/null +++ b/dspl/liblapack/SRC/slaqr3.f @@ -0,0 +1,695 @@ +*> \brief \b SLAQR3 performs the orthogonal similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, +* LDT, NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), +* $ V( LDV, * ), WORK( * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Aggressive early deflation: +*> +*> SLAQR3 accepts as input an upper Hessenberg matrix +*> H and performs an orthogonal similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an orthogonal similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the quasi-triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the orthogonal matrix Z is updated so +*> so that the orthogonal Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the orthogonal matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by an orthogonal +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the orthogonal +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SR +*> \verbatim +*> SR is REAL array, dimension (KBOT) +*> \endverbatim +*> +*> \param[out] SI +*> \verbatim +*> SI is REAL array, dimension (KBOT) +*> On output, the real and imaginary parts of approximate +*> eigenvalues that may be used for shifts are stored in +*> SR(KBOT-ND-NS+1) through SR(KBOT-ND) and +*> SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. +*> The real and imaginary parts of converged eigenvalues +*> are stored in SR(KBOT-ND+1) through SR(KBOT) and +*> SI(KBOT-ND+1) through SI(KBOT), respectively. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is REAL array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; SLAQR3 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, + $ LDT, NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), + $ V( LDV, * ), WORK( * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, + $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, + $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN + LOGICAL BULGE, SORTED +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + EXTERNAL SLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, + $ SLANV2, SLAQR4, SLARF, SLARFG, SLASET, SORMHR, + $ STREXC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to SGEHRD ==== +* + CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SORMHR ==== +* + CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to SLAQR4 ==== +* + CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, + $ V, LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SR( KWTOP ) = H( KWTOP, KWTOP ) + SI( KWTOP ) = ZERO + NS = 1 + ND = 0 + IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) + $ THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'SLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), + $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) + END IF +* +* ==== STREXC needs a clean margin near the diagonal ==== +* + DO 10 J = 1, JW - 3 + T( J+2, J ) = ZERO + T( J+3, J ) = ZERO + 10 CONTINUE + IF( JW.GT.2 ) + $ T( JW, JW-2 ) = ZERO +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + 20 CONTINUE + IF( ILST.LE.NS ) THEN + IF( NS.EQ.1 ) THEN + BULGE = .FALSE. + ELSE + BULGE = T( NS, NS-1 ).NE.ZERO + END IF +* +* ==== Small spike tip test for deflation ==== +* + IF( .NOT. BULGE ) THEN +* +* ==== Real eigenvalue ==== +* + FOO = ABS( T( NS, NS ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 1 + ELSE +* +* ==== Undeflatable. Move it up out of the way. +* . (STREXC can not fail in this case.) ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 1 + END IF + ELSE +* +* ==== Complex conjugate pair ==== +* + FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* + $ SQRT( ABS( T( NS-1, NS ) ) ) + IF( FOO.EQ.ZERO ) + $ FOO = ABS( S ) + IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. + $ MAX( SMLNUM, ULP*FOO ) ) THEN +* +* ==== Deflatable ==== +* + NS = NS - 2 + ELSE +* +* ==== Undeflatable. Move them up out of the way. +* . Fortunately, STREXC does the right thing with +* . ILST in case of a rare exchange failure. ==== +* + IFST = NS + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + ILST = ILST + 2 + END IF + END IF +* +* ==== End deflation detection loop ==== +* + GO TO 20 + END IF +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting diagonal blocks of T improves accuracy for +* . graded matrices. Bubble sort deals well with +* . exchange failures. ==== +* + SORTED = .false. + I = NS + 1 + 30 CONTINUE + IF( SORTED ) + $ GO TO 50 + SORTED = .true. +* + KEND = I - 1 + I = INFQR + 1 + IF( I.EQ.NS ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + 40 CONTINUE + IF( K.LE.KEND ) THEN + IF( K.EQ.I+1 ) THEN + EVI = ABS( T( I, I ) ) + ELSE + EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* + $ SQRT( ABS( T( I, I+1 ) ) ) + END IF +* + IF( K.EQ.KEND ) THEN + EVK = ABS( T( K, K ) ) + ELSE IF( T( K+1, K ).EQ.ZERO ) THEN + EVK = ABS( T( K, K ) ) + ELSE + EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* + $ SQRT( ABS( T( K, K+1 ) ) ) + END IF +* + IF( EVI.GE.EVK ) THEN + I = K + ELSE + SORTED = .false. + IFST = I + ILST = K + CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, + $ INFO ) + IF( INFO.EQ.0 ) THEN + I = ILST + ELSE + I = K + END IF + END IF + IF( I.EQ.KEND ) THEN + K = I + 1 + ELSE IF( T( I+1, I ).EQ.ZERO ) THEN + K = I + 1 + ELSE + K = I + 2 + END IF + GO TO 40 + END IF + GO TO 30 + 50 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + I = JW + 60 CONTINUE + IF( I.GE.INFQR+1 ) THEN + IF( I.EQ.INFQR+1 ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN + SR( KWTOP+I-1 ) = T( I, I ) + SI( KWTOP+I-1 ) = ZERO + I = I - 1 + ELSE + AA = T( I-1, I-1 ) + CC = T( I, I-1 ) + BB = T( I-1, I ) + DD = T( I, I ) + CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), + $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), + $ SI( KWTOP+I-1 ), CS, SN ) + I = I - 2 + END IF + GO TO 60 + END IF +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL SCOPY( NS, V, LDV, WORK, 1 ) + BETA = WORK( 1 ) + CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) + CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 70 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 70 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 80 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 80 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 90 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 90 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR3 ==== +* + END diff --git a/dspl/liblapack/SRC/slaqr4.f b/dspl/liblapack/SRC/slaqr4.f new file mode 100644 index 0000000..12b6b2f --- /dev/null +++ b/dspl/liblapack/SRC/slaqr4.f @@ -0,0 +1,742 @@ +*> \brief \b SLAQR4 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, +* ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQR4 implements one level of recursion for SLAQR0. +*> It is a complete implementation of the small bulge multi-shift +*> QR algorithm. It may be called by SLAQR0 and, for large enough +*> deflation window size, it may be called by SLAQR3. This +*> subroutine is identical to SLAQR0 except that it calls SLAQR2 +*> instead of SLAQR3. +*> +*> SLAQR4 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**T, where T is an upper quasi-triangular matrix (the +*> Schur form), and Z is the orthogonal matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input orthogonal +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the orthogonal matrix Q: A = Q*H*Q**T = (QZ)*T*(QZ)**T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to SGEBAL, and then passed to SGEHRD when the +*> matrix output by SGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H contains +*> the upper quasi-triangular matrix T from the Schur +*> decomposition (the Schur form); 2-by-2 diagonal blocks +*> (corresponding to complex conjugate pairs of eigenvalues) +*> are returned in standard form, with H(i,i) = H(i+1,i+1) +*> and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (IHI) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (IHI) +*> The real and imaginary parts, respectively, of the computed +*> eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI) +*> and WI(ILO:IHI). If two eigenvalues are computed as a +*> complex conjugate pair, they are stored in consecutive +*> elements of WR and WI, say the i-th and (i+1)th, with +*> WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then +*> the eigenvalues are stored in the same order as on the +*> diagonal of the Schur form returned in H, with +*> WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal +*> block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and +*> WI(i+1) = -WI(i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then SLAQR4 does a workspace query. +*> In this case, SLAQR4 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, SLAQR4 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a orthogonal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the orthogonal matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE SLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), WI( * ), WORK( * ), WR( * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . SLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constants WILK1 and WILK2 are used to form the +* . exceptional shifts. ==== + REAL WILK1, WILK2 + PARAMETER ( WILK1 = 0.75e0, WILK2 = -0.4375e0 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL AA, BB, CC, CS, DD, SN, SS, SWAP + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + REAL ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAHQR, SLANV2, SLAQR2, SLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, MAX, MIN, MOD, REAL +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use SLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, + $ ILOZ, IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to SLAQR2 ==== +* + CALL SLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H, LDH, N, H, LDH, + $ N, H, LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(SLAQR5, SLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = REAL( LWKOPT ) + RETURN + END IF +* +* ==== SLAHQR/SLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'SLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 80 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 90 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( ABS( H( KWTOP, KWTOP-1 ) ).GT. + $ ABS( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL SLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, WR, WI, H( KV, 1 ), LDH, + $ NHO, H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, + $ WORK, LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if SLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . SLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, MAX( KS+1, KTOP+2 ), -2 + SS = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) ) + AA = WILK1*SS + H( I, I ) + BB = SS + CC = WILK2*SS + DD = AA + CALL SLANV2( AA, BB, CC, DD, WR( I-1 ), WI( I-1 ), + $ WR( I ), WI( I ), CS, SN ) + 30 CONTINUE + IF( KS.EQ.KTOP ) THEN + WR( KS+1 ) = H( KS+1, KS+1 ) + WI( KS+1 ) = ZERO + WR( KS ) = WR( KS+1 ) + WI( KS ) = WI( KS+1 ) + END IF + ELSE +* +* ==== Got NS/2 or fewer shifts? Use SLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL SLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL SLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, WR( KS ), WI( KS ), + $ 1, 1, ZDUM, 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. ==== +* + IF( KS.GE.KBOT ) THEN + AA = H( KBOT-1, KBOT-1 ) + CC = H( KBOT, KBOT-1 ) + BB = H( KBOT-1, KBOT ) + DD = H( KBOT, KBOT ) + CALL SLANV2( AA, BB, CC, DD, WR( KBOT-1 ), + $ WI( KBOT-1 ), WR( KBOT ), + $ WI( KBOT ), CS, SN ) + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) +* . Bubble sort keeps complex conjugate +* . pairs together. ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( ABS( WR( I ) )+ABS( WI( I ) ).LT. + $ ABS( WR( I+1 ) )+ABS( WI( I+1 ) ) ) THEN + SORTED = .false. +* + SWAP = WR( I ) + WR( I ) = WR( I+1 ) + WR( I+1 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I+1 ) + WI( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF +* +* ==== Shuffle shifts into pairs of real shifts +* . and pairs of complex conjugate shifts +* . assuming complex conjugate shifts are +* . already adjacent to one another. (Yes, +* . they are.) ==== +* + DO 70 I = KBOT, KS + 2, -2 + IF( WI( I ).NE.-WI( I-1 ) ) THEN +* + SWAP = WR( I ) + WR( I ) = WR( I-1 ) + WR( I-1 ) = WR( I-2 ) + WR( I-2 ) = SWAP +* + SWAP = WI( I ) + WI( I ) = WI( I-1 ) + WI( I-1 ) = WI( I-2 ) + WI( I-2 ) = SWAP + END IF + 70 CONTINUE + END IF +* +* ==== If there are only two shifts and both are +* . real, then use only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( WI( KBOT ).EQ.ZERO ) THEN + IF( ABS( WR( KBOT )-H( KBOT, KBOT ) ).LT. + $ ABS( WR( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + WR( KBOT-1 ) = WR( KBOT ) + ELSE + WR( KBOT ) = WR( KBOT-1 ) + END IF + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ WR( KS ), WI( KS ), H, LDH, ILOZ, IHIZ, Z, + $ LDZ, WORK, 3, H( KU, 1 ), LDH, NVE, + $ H( KWV, 1 ), LDH, NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 80 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 90 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = REAL( LWKOPT ) +* +* ==== End of SLAQR4 ==== +* + END diff --git a/dspl/liblapack/SRC/slaqr5.f b/dspl/liblapack/SRC/slaqr5.f new file mode 100644 index 0000000..65278e3 --- /dev/null +++ b/dspl/liblapack/SRC/slaqr5.f @@ -0,0 +1,919 @@ +*> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQR5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, +* SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, +* LDU, NV, WV, LDWV, NH, WH, LDWH ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, +* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), +* $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQR5, called by SLAQR0, performs a +*> single small-bulge multi-shift QR sweep. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> WANTT = .true. if the quasi-triangular Schur factor +*> is being computed. WANTT is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> WANTZ = .true. if the orthogonal Schur factor is being +*> computed. WANTZ is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] KACC22 +*> \verbatim +*> KACC22 is INTEGER with value 0, 1, or 2. +*> Specifies the computation mode of far-from-diagonal +*> orthogonal updates. +*> = 0: SLAQR5 does not accumulate reflections and does not +*> use matrix-matrix multiply to update far-from-diagonal +*> matrix entries. +*> = 1: SLAQR5 accumulates reflections and uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries. +*> = 2: SLAQR5 accumulates reflections, uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries, +*> and takes advantage of 2-by-2 block structure during +*> matrix multiplies. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H upon which this +*> subroutine operates. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> These are the first and last rows and columns of an +*> isolated diagonal block upon which the QR sweep is to be +*> applied. It is assumed without a check that +*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 +*> and +*> either KBOT = N or H(KBOT+1,KBOT) = 0. +*> \endverbatim +*> +*> \param[in] NSHFTS +*> \verbatim +*> NSHFTS is INTEGER +*> NSHFTS gives the number of simultaneous shifts. NSHFTS +*> must be positive and even. +*> \endverbatim +*> +*> \param[in,out] SR +*> \verbatim +*> SR is REAL array, dimension (NSHFTS) +*> \endverbatim +*> +*> \param[in,out] SI +*> \verbatim +*> SI is REAL array, dimension (NSHFTS) +*> SR contains the real parts and SI contains the imaginary +*> parts of the NSHFTS shifts of origin that define the +*> multi-shift QR sweep. On output SR and SI may be +*> reordered. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL array, dimension (LDH,N) +*> On input H contains a Hessenberg matrix. On output a +*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +*> to the isolated diagonal block in rows and columns KTOP +*> through KBOT. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> LDH is the leading dimension of H just as declared in the +*> calling procedure. LDH.GE.MAX(1,N). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,IHIZ) +*> If WANTZ = .TRUE., then the QR Sweep orthogonal +*> similarity transformation is accumulated into +*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ = .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> LDA is the leading dimension of Z just as declared in +*> the calling procedure. LDZ.GE.N. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension (LDV,NSHFTS/2) +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> LDV is the leading dimension of V as declared in the +*> calling procedure. LDV.GE.3. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> LDU is the leading dimension of U just as declared in the +*> in the calling subroutine. LDU.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> NH is the number of columns in array WH available for +*> workspace. NH.GE.1. +*> \endverbatim +*> +*> \param[out] WH +*> \verbatim +*> WH is REAL array, dimension (LDWH,NH) +*> \endverbatim +*> +*> \param[in] LDWH +*> \verbatim +*> LDWH is INTEGER +*> Leading dimension of WH just as declared in the +*> calling procedure. LDWH.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> NV is the number of rows in WV agailable for workspace. +*> NV.GE.1. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is REAL array, dimension (LDWV,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> LDWV is the leading dimension of WV as declared in the +*> in the calling subroutine. LDWV.GE.NV. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> +* ===================================================================== + SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, + $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, + $ LDU, NV, WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ), + $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ), + $ Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) +* .. +* .. Local Scalars .. + REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM, + $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2, + $ ULP + INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, MAX, MIN, MOD, REAL +* .. +* .. Local Arrays .. + REAL VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLABAD, SLACPY, SLAQR1, SLARFG, SLASET, + $ STRMM +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== Shuffle shifts into pairs of real shifts and pairs +* . of complex conjugate shifts assuming complex +* . conjugate shifts are already adjacent to one +* . another. ==== +* + DO 10 I = 1, NSHFTS - 2, 2 + IF( SI( I ).NE.-SI( I+1 ) ) THEN +* + SWAP = SR( I ) + SR( I ) = SR( I+1 ) + SR( I+1 ) = SR( I+2 ) + SR( I+2 ) = SWAP +* + SWAP = SI( I ) + SI( I ) = SI( I+1 ) + SI( I+1 ) = SI( I+2 ) + SI( I+2 ) = SWAP + END IF + 10 CONTINUE +* +* ==== NSHFTS is supposed to be even, but if it is odd, +* . then simply reduce it by one. The shuffle above +* . ensures that the dropped shift is real and that +* . the remaining shifts are paired. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = SLAMCH( 'SAFE MINIMUM' ) + SAFMAX = ONE / SAFMIN + CALL SLABAD( SAFMIN, SAFMAX ) + ULP = SLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( REAL( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL SLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 20 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL SLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ V( 1, M ) ) + ALPHA = V( 1, M ) + CALL SLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL SLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. In the +* . underflow case, try the two-small-subdiagonals +* . trick to try to reinflate the bulge. ==== +* + IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. + $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K) and H(K+2,K). +* . If the fill resulting from the new +* . reflector is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL SLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ), + $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ), + $ VT ) + ALPHA = VT( 1 ) + CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )* + $ H( K+2, K ) ) +* + IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+ + $ ABS( REFSUM*VT( 3 ) ).GT.ULP* + $ ( ABS( H( K, K ) )+ABS( H( K+1, + $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. Use +* . the old one with trepidation. ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 20 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL SLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ), + $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ), + $ V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 40 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 30 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )* + $ H( K+2, J )+V( 3, M )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 30 CONTINUE + 40 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 50 J = MAX( K+1, KTOP ), JBOT + REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 50 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 90 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 60 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M ) + H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M ) + 60 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 70 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M ) + U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M ) + 70 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 80 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M ) + Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF ( V( 1, M22 ).NE.ZERO ) THEN + DO 100 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 ) + 100 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 110 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ + $ V( 2, M22 )*U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM* + $ V( 2, M22 ) + 110 CONTINUE + ELSE IF( WANTZ ) THEN + DO 120 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 ) + 120 CONTINUE + END IF + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 130 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) ) + IF( TST1.EQ.ZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + ABS( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + ABS( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + ABS( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + ABS( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + ABS( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + ABS( H( K+4, K+1 ) ) + END IF + IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) ) + H11 = MAX( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( ABS( H( K+1, K+1 ) ), + $ ABS( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 130 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 140 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*V( 2, M ) + H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M ) + 140 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 150 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL SGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL SLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 160 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 170 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 180 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 180 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL SLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21**T ==== +* + CALL SLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL STRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11**T ==== +* + CALL SGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H to bottom of WH ==== +* + CALL SLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21**T ==== +* + CALL STRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL SLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 190 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL SLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL SLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 200 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 210 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL SLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL SLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 210 CONTINUE + END IF + END IF + END IF + 220 CONTINUE +* +* ==== End of SLAQR5 ==== +* + END diff --git a/dspl/liblapack/SRC/slaqsb.f b/dspl/liblapack/SRC/slaqsb.f new file mode 100644 index 0000000..747d32d --- /dev/null +++ b/dspl/liblapack/SRC/slaqsb.f @@ -0,0 +1,226 @@ +*> \brief \b SLAQSB scales a symmetric/Hermitian band matrix, using scaling factors computed by spbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQSB equilibrates a symmetric band matrix A using the scaling +*> factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSB +* + END diff --git a/dspl/liblapack/SRC/slaqsp.f b/dspl/liblapack/SRC/slaqsp.f new file mode 100644 index 0000000..a88206d --- /dev/null +++ b/dspl/liblapack/SRC/slaqsp.f @@ -0,0 +1,212 @@ +*> \brief \b SLAQSP scales a symmetric/Hermitian matrix in packed storage, using scaling factors computed by sppequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL AP( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQSP equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AP( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSP +* + END diff --git a/dspl/liblapack/SRC/slaqsy.f b/dspl/liblapack/SRC/slaqsy.f new file mode 100644 index 0000000..fc66aae --- /dev/null +++ b/dspl/liblapack/SRC/slaqsy.f @@ -0,0 +1,216 @@ +*> \brief \b SLAQSY scales a symmetric/Hermitian matrix, using scaling factors computed by spoequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQSY equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is REAL +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYauxiliary +* +* ===================================================================== + SUBROUTINE SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, THRESH + PARAMETER ( ONE = 1.0E+0, THRESH = 0.1E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + REAL CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of SLAQSY +* + END diff --git a/dspl/liblapack/SRC/slaqtr.f b/dspl/liblapack/SRC/slaqtr.f new file mode 100644 index 0000000..9d3e592 --- /dev/null +++ b/dspl/liblapack/SRC/slaqtr.f @@ -0,0 +1,748 @@ +*> \brief \b SLAQTR solves a real quasi-triangular system of equations, or a complex quasi-triangular system of special form, in real arithmetic. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAQTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LREAL, LTRAN +* INTEGER INFO, LDT, N +* REAL SCALE, W +* .. +* .. Array Arguments .. +* REAL B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAQTR solves the real quasi-triangular system +*> +*> op(T)*p = scale*c, if LREAL = .TRUE. +*> +*> or the complex quasi-triangular systems +*> +*> op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. +*> +*> in real arithmetic, where T is upper quasi-triangular. +*> If LREAL = .FALSE., then the first diagonal block of T must be +*> 1 by 1, B is the specially structured matrix +*> +*> B = [ b(1) b(2) ... b(n) ] +*> [ w ] +*> [ w ] +*> [ . ] +*> [ w ] +*> +*> op(A) = A or A**T, A**T denotes the transpose of +*> matrix A. +*> +*> On input, X = [ c ]. On output, X = [ p ]. +*> [ d ] [ q ] +*> +*> This subroutine is designed for the condition number estimation +*> in routine STRSNA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRAN +*> \verbatim +*> LTRAN is LOGICAL +*> On entry, LTRAN specifies the option of conjugate transpose: +*> = .FALSE., op(T+i*B) = T+i*B, +*> = .TRUE., op(T+i*B) = (T+i*B)**T. +*> \endverbatim +*> +*> \param[in] LREAL +*> \verbatim +*> LREAL is LOGICAL +*> On entry, LREAL specifies the input matrix structure: +*> = .FALSE., the input is complex +*> = .TRUE., the input is real +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of T+i*B. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> On entry, T contains a matrix in Schur canonical form. +*> If LREAL = .FALSE., then the first diagonal block of T must +*> be 1 by 1. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the matrix T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (N) +*> On entry, B contains the elements to form the matrix +*> B as described above. +*> If LREAL = .TRUE., B is not referenced. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL +*> On entry, W is the diagonal element of the matrix B. +*> If LREAL = .TRUE., W is not referenced. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, SCALE is the scale factor. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (2*N) +*> On entry, X contains the right hand side of the system. +*> On exit, X is overwritten by the solution. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO is set to +*> 0: successful exit. +*> 1: the some diagonal 1 by 1 block has been perturbed by +*> a small number SMIN to keep nonsingularity. +*> 2: the some diagonal 2 by 2 block has been perturbed by +*> a small number in SLALN2 to keep nonsingularity. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL LREAL, LTRAN + INTEGER INFO, LDT, N + REAL SCALE, W +* .. +* .. Array Arguments .. + REAL B( * ), T( LDT, * ), WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, J1, J2, JNEXT, K, N1, N2 + REAL BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW, + $ SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z +* .. +* .. Local Arrays .. + REAL D( 2, 2 ), V( 2, 2 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH, SLANGE + EXTERNAL ISAMAX, SASUM, SDOT, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLADIV, SLALN2, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Do not test the input parameters for errors +* + NOTRAN = .NOT.LTRAN + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM +* + XNORM = SLANGE( 'M', N, N, T, LDT, D ) + IF( .NOT.LREAL ) + $ XNORM = MAX( XNORM, ABS( W ), SLANGE( 'M', N, 1, B, N, D ) ) + SMIN = MAX( SMLNUM, EPS*XNORM ) +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 10 J = 2, N + WORK( J ) = SASUM( J-1, T( 1, J ), 1 ) + 10 CONTINUE +* + IF( .NOT.LREAL ) THEN + DO 20 I = 2, N + WORK( I ) = WORK( I ) + ABS( B( I ) ) + 20 CONTINUE + END IF +* + N2 = 2*N + N1 = N + IF( .NOT.LREAL ) + $ N1 = N2 + K = ISAMAX( N1, X, 1 ) + XMAX = ABS( X( K ) ) + SCALE = ONE +* + IF( XMAX.GT.BIGNUM ) THEN + SCALE = BIGNUM / XMAX + CALL SSCAL( N1, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( LREAL ) THEN +* + IF( NOTRAN ) THEN +* +* Solve T*p = scale*c +* + JNEXT = N + DO 30 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 30 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* Meet 1 by 1 diagonal block +* +* Scale to avoid overflow when computing +* x(j) = b(j)/T(j,j) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 30 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XJ = ABS( X( J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + K = ISAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* +* Call 2 by 2 linear system solve, to take +* care of possible overflow by scaling factor. +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) +* +* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) +* to avoid overflow in updating right-hand side. +* + XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update right-hand side +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) + K = ISAMAX( J1-1, X, 1 ) + XMAX = ABS( X( K ) ) + END IF +* + END IF +* + 30 CONTINUE +* + ELSE +* +* Solve T**T*p = scale*c +* + JNEXT = 1 + DO 40 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 40 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 ) +* + XJ = ABS( X( J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMIN ) THEN + TMP = SMIN + TJJ = SMIN + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J1 ) = X( J1 ) / TMP + XMAX = MAX( XMAX, ABS( X( J1 ) ) ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side elements by inner product. +* + XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )* + $ REC ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N, SCALOC, X, 1 ) + SCALE = SCALE*SCALOC + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX ) +* + END IF + 40 CONTINUE + END IF +* + ELSE +* + SMINW = MAX( EPS*ABS( W ), SMIN ) + IF( NOTRAN ) THEN +* +* Solve (T + iB)*(p+iq) = c+id +* + JNEXT = N + DO 70 J = N, 1, -1 + IF( J.GT.JNEXT ) + $ GO TO 70 + J1 = J + J2 = J + JNEXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNEXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in division +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( XJ.EQ.ZERO ) + $ GO TO 70 +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL SLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI ) + X( J1 ) = SR + X( N+J1 ) = SI + XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) ) +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j1 of T. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) +* + XMAX = ZERO + DO 50 K = 1, J1 - 1 + XMAX = MAX( XMAX, ABS( X( K ) )+ + $ ABS( X( K+N ) ) ) + 50 CONTINUE + END IF +* + ELSE +* +* Meet 2 by 2 diagonal block +* + D( 1, 1 ) = X( J1 ) + D( 2, 1 ) = X( J2 ) + D( 1, 2 ) = X( N+J1 ) + D( 2, 2 ) = X( N+J2 ) + CALL SLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, -W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( 2*N, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) +* +* Scale X(J1), .... to avoid overflow in +* updating right hand side. +* + XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ), + $ ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) ) + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XMAX )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + END IF + END IF +* +* Update the right-hand side. +* + IF( J1.GT.1 ) THEN + CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 ) + CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 ) +* + CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + CALL SAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1, + $ X( N+1 ), 1 ) +* + X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) + + $ B( J2 )*X( N+J2 ) + X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) - + $ B( J2 )*X( J2 ) +* + XMAX = ZERO + DO 60 K = 1, J1 - 1 + XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ), + $ XMAX ) + 60 CONTINUE + END IF +* + END IF + 70 CONTINUE +* + ELSE +* +* Solve (T + iB)**T*(p+iq) = c+id +* + JNEXT = 1 + DO 80 J = 1, N + IF( J.LT.JNEXT ) + $ GO TO 80 + J1 = J + J2 = J + JNEXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNEXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1 by 1 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 ) + X( N+J1 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + IF( J1.GT.1 ) THEN + X( J1 ) = X( J1 ) - B( J1 )*X( N+1 ) + X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 ) + END IF + XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) ) +* + Z = W + IF( J1.EQ.1 ) + $ Z = B( 1 ) +* +* Scale if necessary to avoid overflow in +* complex division +* + TJJ = ABS( T( J1, J1 ) ) + ABS( Z ) + TMP = T( J1, J1 ) + IF( TJJ.LT.SMINW ) THEN + TMP = SMINW + TJJ = SMINW + INFO = 1 + END IF +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.BIGNUM*TJJ ) THEN + REC = ONE / XJ + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + CALL SLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI ) + X( J1 ) = SR + X( J1+N ) = SI + XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX ) +* + ELSE +* +* 2 by 2 diagonal block +* +* Scale if necessary to avoid overflow in forming the +* right-hand side element by inner product. +* + XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ) ) + IF( XMAX.GT.ONE ) THEN + REC = ONE / XMAX + IF( MAX( WORK( J1 ), WORK( J2 ) ).GT. + $ ( BIGNUM-XJ ) / XMAX ) THEN + CALL SSCAL( N2, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, + $ 1 ) + D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X, + $ 1 ) + D( 1, 2 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1, + $ X( N+1 ), 1 ) + D( 2, 2 ) = X( N+J2 ) - SDOT( J1-1, T( 1, J2 ), 1, + $ X( N+1 ), 1 ) + D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 ) + D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 ) + D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 ) + D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 ) +* + CALL SLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ), + $ LDT, ONE, ONE, D, 2, ZERO, W, V, 2, + $ SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 2 +* + IF( SCALOC.NE.ONE ) THEN + CALL SSCAL( N2, SCALOC, X, 1 ) + SCALE = SCALOC*SCALE + END IF + X( J1 ) = V( 1, 1 ) + X( J2 ) = V( 2, 1 ) + X( N+J1 ) = V( 1, 2 ) + X( N+J2 ) = V( 2, 2 ) + XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ), + $ ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX ) +* + END IF +* + 80 CONTINUE +* + END IF +* + END IF +* + RETURN +* +* End of SLAQTR +* + END diff --git a/dspl/liblapack/SRC/slar1v.f b/dspl/liblapack/SRC/slar1v.f new file mode 100644 index 0000000..d242328 --- /dev/null +++ b/dspl/liblapack/SRC/slar1v.f @@ -0,0 +1,486 @@ +*> \brief \b SLAR1V computes the (scaled) r-th column of the inverse of the submatrix in rows b1 through bn of the tridiagonal matrix LDLT - λI. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAR1V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, +* PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, +* R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* .. Scalar Arguments .. +* LOGICAL WANTNC +* INTEGER B1, BN, N, NEGCNT, R +* REAL GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, +* $ RQCORR, ZTZ +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ) +* REAL D( * ), L( * ), LD( * ), LLD( * ), +* $ WORK( * ) +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAR1V computes the (scaled) r-th column of the inverse of +*> the sumbmatrix in rows B1 through BN of the tridiagonal matrix +*> L D L**T - sigma I. When sigma is close to an eigenvalue, the +*> computed vector is an accurate eigenvector. Usually, r corresponds +*> to the index where the eigenvector is largest in magnitude. +*> The following steps accomplish this computation : +*> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, +*> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, +*> (c) Computation of the diagonal elements of the inverse of +*> L D L**T - sigma I by combining the above transforms, and choosing +*> r as the index where the diagonal of the inverse is (one of the) +*> largest in magnitude. +*> (d) Computation of the (scaled) r-th column of the inverse using the +*> twisted factorization obtained by combining the top part of the +*> the stationary and the bottom part of the progressive transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix L D L**T. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is INTEGER +*> First index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] BN +*> \verbatim +*> BN is INTEGER +*> Last index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is REAL +*> The shift. In order to compute an accurate eigenvector, +*> LAMBDA should be a good approximation to an eigenvalue +*> of L D L**T. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal matrix +*> L, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is REAL array, dimension (N-1) +*> The n-1 elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is REAL array, dimension (N-1) +*> The n-1 elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] GAPTOL +*> \verbatim +*> GAPTOL is REAL +*> Tolerance that indicates when eigenvector entries are negligible +*> w.r.t. their contribution to the residual. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On input, all entries of Z must be set to 0. +*> On output, Z contains the (scaled) r-th column of the +*> inverse. The scaling is such that Z(R) equals 1. +*> \endverbatim +*> +*> \param[in] WANTNC +*> \verbatim +*> WANTNC is LOGICAL +*> Specifies whether NEGCNT has to be computed. +*> \endverbatim +*> +*> \param[out] NEGCNT +*> \verbatim +*> NEGCNT is INTEGER +*> If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin +*> in the matrix factorization L D L**T, and NEGCNT = -1 otherwise. +*> \endverbatim +*> +*> \param[out] ZTZ +*> \verbatim +*> ZTZ is REAL +*> The square of the 2-norm of Z. +*> \endverbatim +*> +*> \param[out] MINGMA +*> \verbatim +*> MINGMA is REAL +*> The reciprocal of the largest (in magnitude) diagonal +*> element of the inverse of L D L**T - sigma I. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization used to +*> compute Z. +*> On input, 0 <= R <= N. If R is input as 0, R is set to +*> the index where (L D L**T - sigma I)^{-1} is largest +*> in magnitude. If 1 <= R <= N, R is unchanged. +*> On output, R contains the twist index used to compute Z. +*> Ideally, R designates the position of the maximum entry in the +*> eigenvector. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension (2) +*> The support of the vector in Z, i.e., the vector Z is +*> nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +*> \endverbatim +*> +*> \param[out] NRMINV +*> \verbatim +*> NRMINV is REAL +*> NRMINV = 1/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RESID +*> \verbatim +*> RESID is REAL +*> The residual of the FP vector. +*> RESID = ABS( MINGMA )/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RQCORR +*> \verbatim +*> RQCORR is REAL +*> The Rayleigh Quotient correction to LAMBDA. +*> RQCORR = MINGMA*TMP +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, + $ PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, + $ R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTNC + INTEGER B1, BN, N, NEGCNT, R + REAL GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, + $ RQCORR, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + REAL D( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) + +* .. +* .. Local Scalars .. + LOGICAL SAWNAN1, SAWNAN2 + INTEGER I, INDLPL, INDP, INDS, INDUMN, NEG1, NEG2, R1, + $ R2 + REAL DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + LOGICAL SISNAN + REAL SLAMCH + EXTERNAL SISNAN, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + EPS = SLAMCH( 'Precision' ) + + + IF( R.EQ.0 ) THEN + R1 = B1 + R2 = BN + ELSE + R1 = R + R2 = R + END IF + +* Storage for LPLUS + INDLPL = 0 +* Storage for UMINUS + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS+B1-1 ) = LLD( B1-1 ) + END IF + +* +* Compute the stationary transform (using the differential form) +* until the index R2. +* + SAWNAN1 = .FALSE. + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 50 I = B1, R1 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 50 CONTINUE + SAWNAN1 = SISNAN( S ) + IF( SAWNAN1 ) GOTO 60 + DO 51 I = R1, R2 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 51 CONTINUE + SAWNAN1 = SISNAN( S ) +* + 60 CONTINUE + IF( SAWNAN1 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 70 I = B1, R1 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 70 CONTINUE + DO 71 I = R1, R2 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 71 CONTINUE + END IF +* +* Compute the progressive transform (using the differential form) +* until the index R1 +* + SAWNAN2 = .FALSE. + NEG2 = 0 + WORK( INDP+BN-1 ) = D( BN ) - LAMBDA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + SAWNAN2 = SISNAN( TMP ) + + IF( SAWNAN2 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG2 = 0 + DO 100 I = BN-1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + IF(ABS(DMINUS).LT.PIVMIN) DMINUS = -PIVMIN + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + IF( TMP.EQ.ZERO ) + $ WORK( INDP+I-1 ) = D( I ) - LAMBDA + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.LT.ZERO ) NEG1 = NEG1 + 1 + IF( WANTNC ) THEN + NEGCNT = NEG1 + NEG2 + ELSE + NEGCNT = -1 + ENDIF + IF( ABS(MINGMA).EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LE.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the FP vector: solve N^T v = e_r +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = ONE + ZTZ = ONE +* +* Compute the FP vector upwards from R +* + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 210 I = R-1, B1, -1 + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GOTO 220 + ENDIF + ZTZ = ZTZ + Z( I )*Z( I ) + 210 CONTINUE + 220 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 230 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GO TO 240 + END IF + ZTZ = ZTZ + Z( I )*Z( I ) + 230 CONTINUE + 240 CONTINUE + ENDIF + +* Compute the FP vector downwards from R in blocks of size BLKSIZ + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 250 I = R, BN-1 + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 260 + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 250 CONTINUE + 260 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 270 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 280 + END IF + ZTZ = ZTZ + Z( I+1 )*Z( I+1 ) + 270 CONTINUE + 280 CONTINUE + END IF +* +* Compute quantities for convergence test +* + TMP = ONE / ZTZ + NRMINV = SQRT( TMP ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP +* +* + RETURN +* +* End of SLAR1V +* + END diff --git a/dspl/liblapack/SRC/slar2v.f b/dspl/liblapack/SRC/slar2v.f new file mode 100644 index 0000000..583f2e6 --- /dev/null +++ b/dspl/liblapack/SRC/slar2v.f @@ -0,0 +1,157 @@ +*> \brief \b SLAR2V applies a vector of plane rotations with real cosines and real sines from both sides to a sequence of 2-by-2 symmetric/Hermitian matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAR2V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. +* REAL C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAR2V applies a vector of real plane rotations from both sides to +*> a sequence of 2-by-2 real symmetric matrices, defined by the elements +*> of the vectors x, y and z. For i = 1,2,...,n +*> +*> ( x(i) z(i) ) := ( c(i) s(i) ) ( x(i) z(i) ) ( c(i) -s(i) ) +*> ( z(i) y(i) ) ( -s(i) c(i) ) ( z(i) y(i) ) ( s(i) c(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, +*> dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, +*> dimension (1+(N-1)*INCX) +*> The vector y. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, +*> dimension (1+(N-1)*INCX) +*> The vector z. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X, Y and Z. INCX > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + REAL CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IX ) + ZI = Z( IX ) + CI = C( IC ) + SI = S( IC ) + T1 = SI*ZI + T2 = CI*ZI + T3 = T2 - SI*XI + T4 = T2 + SI*YI + T5 = CI*XI + T1 + T6 = CI*YI - T1 + X( IX ) = CI*T5 + SI*T4 + Y( IX ) = CI*T6 - SI*T3 + Z( IX ) = CI*T4 - SI*T5 + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE +* +* End of SLAR2V +* + RETURN + END diff --git a/dspl/liblapack/SRC/slarf.f b/dspl/liblapack/SRC/slarf.f new file mode 100644 index 0000000..d66c7bf --- /dev/null +++ b/dspl/liblapack/SRC/slarf.f @@ -0,0 +1,227 @@ +*> \brief \b SLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARF applies a real elementary reflector H to a real m by n matrix +*> C, from either the left or the right. H is represented in the form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILASLR, ILASLC + EXTERNAL LSAME, ILASLR, ILASLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +! Set up variables for scanning V. LASTV begins pointing to the end +! of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +! Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +! Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILASLC(LASTV, N, C, LDC) + ELSE +! Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILASLR(M, LASTV, C, LDC) + END IF + END IF +! Note that lastc.eq.0 renders the BLAS operations null; no special +! case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**T * v(1:lastv,1) +* + CALL SGEMV( 'Transpose', LASTV, LASTC, ONE, C, LDC, V, INCV, + $ ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**T +* + CALL SGER( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL SGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**T +* + CALL SGER( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of SLARF +* + END diff --git a/dspl/liblapack/SRC/slarfb.f b/dspl/liblapack/SRC/slarfb.f new file mode 100644 index 0000000..c51f695 --- /dev/null +++ b/dspl/liblapack/SRC/slarfb.f @@ -0,0 +1,710 @@ +*> \brief \b SLARFB applies a block reflector or its transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFB applies a real block reflector H or its transpose H**T to a +*> real m by n matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'T': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The triangular k by k matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2013 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2013 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C1**T +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**T * V2 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**T +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2**T +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V = (C1**T * V1 + C2**T * V2) (stored in WORK) +* +* W := C2**T +* + DO 70 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T * V1 +* + CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**T +* + CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H' where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**T +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**T +* + CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C1**T +* + DO 130 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**T * V2**T +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2**T * W**T +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**T +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - WORK( I, J ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**T +* + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K, + $ ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2**T +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C where C = ( C1 ) +* ( C2 ) +* +* W := C**T * V**T = (C1**T * V1**T + C2**T * V2**T) (stored in WORK) +* +* W := C2**T +* + DO 190 J = 1, K + CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K, + $ ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**T * V1**T +* + CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE, + $ C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**T or W * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**T * W**T +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**T * W**T +* + CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE, + $ V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**T +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T where C = ( C1 C2 ) +* +* W := C * V**T = (C1*V1**T + C2*V2**T) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**T +* + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K, + $ ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**T +* + CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**T +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of SLARFB +* + END diff --git a/dspl/liblapack/SRC/slarfg.f b/dspl/liblapack/SRC/slarfg.f new file mode 100644 index 0000000..a528ce3 --- /dev/null +++ b/dspl/liblapack/SRC/slarfg.f @@ -0,0 +1,196 @@ +*> \brief \b SLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL ALPHA, TAU +* .. +* .. Array Arguments .. +* REAL X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFG generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, and x is an (n-1)-element real +*> vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> +*> Otherwise 1 <= tau <= 2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL ALPHA, TAU +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2, SNRM2 + EXTERNAL SLAMCH, SLAPY2, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + RSAFMN = ONE / SAFMIN + 10 CONTINUE + KNT = KNT + 1 + CALL SSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHA = ALPHA*RSAFMN + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = SNRM2( N-1, X, INCX ) + BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + TAU = ( BETA-ALPHA ) / BETA + CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of SLARFG +* + END diff --git a/dspl/liblapack/SRC/slarfgp.f b/dspl/liblapack/SRC/slarfgp.f new file mode 100644 index 0000000..6a4c084 --- /dev/null +++ b/dspl/liblapack/SRC/slarfgp.f @@ -0,0 +1,242 @@ +*> \brief \b SLARFGP generates an elementary reflector (Householder matrix) with non-negative beta. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL ALPHA, TAU +* .. +* .. Array Arguments .. +* REAL X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFGP generates a real elementary reflector H of order n, such +*> that +*> +*> H * ( alpha ) = ( beta ), H**T * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, beta is non-negative, and x is +*> an (n-1)-element real vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**T ) , +*> ( v ) +*> +*> where tau is a real scalar and v is a real (n-1)-element +*> vector. +*> +*> If the elements of x are all zero, then tau = 0 and H is taken to be +*> the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARFGP( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL ALPHA, TAU +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO, ONE, ZERO + PARAMETER ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + REAL BETA, BIGNUM, SAVEALPHA, SMLNUM, XNORM +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2, SNRM2 + EXTERNAL SLAMCH, SLAPY2, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = SNRM2( N-1, X, INCX ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = [+/-1, 0; I], sign chosen so ALPHA >= 0. +* + IF( ALPHA.GE.ZERO ) THEN +* When TAU.eq.ZERO, the vector is special-cased to be +* all zeros in the application routines. We do not need +* to clear it. + TAU = ZERO + ELSE +* However, the application routines rely on explicit +* zero checks when TAU.ne.ZERO, and we must clear X. + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = 0 + END DO + ALPHA = -ALPHA + END IF + ELSE +* +* general case +* + BETA = SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'E' ) + KNT = 0 + IF( ABS( BETA ).LT.SMLNUM ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + BIGNUM = ONE / SMLNUM + 10 CONTINUE + KNT = KNT + 1 + CALL SSCAL( N-1, BIGNUM, X, INCX ) + BETA = BETA*BIGNUM + ALPHA = ALPHA*BIGNUM + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SMLNUM +* + XNORM = SNRM2( N-1, X, INCX ) + BETA = SIGN( SLAPY2( ALPHA, XNORM ), ALPHA ) + END IF + SAVEALPHA = ALPHA + ALPHA = ALPHA + BETA + IF( BETA.LT.ZERO ) THEN + BETA = -BETA + TAU = -ALPHA / BETA + ELSE + ALPHA = XNORM * (XNORM/ALPHA) + TAU = ALPHA / BETA + ALPHA = -ALPHA + END IF +* + IF ( ABS(TAU).LE.SMLNUM ) THEN +* +* In the case where the computed TAU ends up being a denormalized number, +* it loses relative accuracy. This is a BIG problem. Solution: flush TAU +* to ZERO. This explains the next IF statement. +* +* (Bug report provided by Pat Quillen from MathWorks on Jul 29, 2009.) +* (Thanks Pat. Thanks MathWorks.) +* + IF( SAVEALPHA.GE.ZERO ) THEN + TAU = ZERO + ELSE + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = 0 + END DO + BETA = -SAVEALPHA + END IF +* + ELSE +* +* This is the general case. +* + CALL SSCAL( N-1, ONE / ALPHA, X, INCX ) +* + END IF +* +* If BETA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SMLNUM + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of SLARFGP +* + END diff --git a/dspl/liblapack/SRC/slarft.f b/dspl/liblapack/SRC/slarft.f new file mode 100644 index 0000000..05ee8b2 --- /dev/null +++ b/dspl/liblapack/SRC/slarft.f @@ -0,0 +1,326 @@ +*> \brief \b SLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFT forms the triangular factor T of a real block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( I, PREVLASTV ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( I , J ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**T * V(i:j,i) +* + CALL SGEMV( 'Transpose', J-I, I-1, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I+1, I ), 1, ONE, + $ T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**T +* + CALL SGEMV( 'No transpose', I-1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), 1 ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( N-K+I , J ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**T * V(j:n-k+i,i) +* + CALL SGEMV( 'Transpose', N-K+I-J, K-I, -TAU( I ), + $ V( J, I+1 ), LDV, V( J, I ), 1, ONE, + $ T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**T +* + CALL SGEMV( 'No transpose', K-I, N-K+I-J, + $ -TAU( I ), V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), 1 ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of SLARFT +* + END diff --git a/dspl/liblapack/SRC/slarfx.f b/dspl/liblapack/SRC/slarfx.f new file mode 100644 index 0000000..590e99e --- /dev/null +++ b/dspl/liblapack/SRC/slarfx.f @@ -0,0 +1,696 @@ +*> \brief \b SLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the reflector has order ≤ 10. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARFX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFX applies a real elementary reflector H to a real m by n +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix +*> +*> This version uses inline code if H has order < 11. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (M) if SIDE = 'L' +*> or (N) if SIDE = 'R' +*> The vector v in the representation of H. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDA >= (1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> WORK is not referenced if H has order < 11. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER J + REAL SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* + CALL SLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* + CALL SLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*V( 1 ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*V1 + V2 = V( 2 ) + T2 = TAU*V2 + V3 = V( 3 ) + T3 = TAU*V3 + V4 = V( 4 ) + T4 = TAU*V4 + V5 = V( 5 ) + T5 = TAU*V5 + V6 = V( 6 ) + T6 = TAU*V6 + V7 = V( 7 ) + T7 = TAU*V7 + V8 = V( 8 ) + T8 = TAU*V8 + V9 = V( 9 ) + T9 = TAU*V9 + V10 = V( 10 ) + T10 = TAU*V10 + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 RETURN +* +* End of SLARFX +* + END diff --git a/dspl/liblapack/SRC/slarfy.f b/dspl/liblapack/SRC/slarfy.f new file mode 100644 index 0000000..340c544 --- /dev/null +++ b/dspl/liblapack/SRC/slarfy.f @@ -0,0 +1,161 @@ +*> \brief \b SLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE SLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n symmetric matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup single_eig +* +* ===================================================================== + SUBROUTINE SLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSYMV, SSYR2 +* .. +* .. External Functions .. + REAL SDOT + EXTERNAL SDOT +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL SSYMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*SDOT( N, WORK, 1, V, INCV ) + CALL SAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL SSYR2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of SLARFY +* + END diff --git a/dspl/liblapack/SRC/slargv.f b/dspl/liblapack/SRC/slargv.f new file mode 100644 index 0000000..694eccc --- /dev/null +++ b/dspl/liblapack/SRC/slargv.f @@ -0,0 +1,167 @@ +*> \brief \b SLARGV generates a vector of plane rotations with real cosines and real sines. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* REAL C( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARGV generates a vector of real plane rotations, determined by +*> elements of the real vectors x and y. For i = 1,2,...,n +*> +*> ( c(i) s(i) ) ( x(i) ) = ( a(i) ) +*> ( -s(i) c(i) ) ( y(i) ) = ( 0 ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be generated. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, +*> dimension (1+(N-1)*INCX) +*> On entry, the vector x. +*> On exit, x(i) is overwritten by a(i), for i = 1,...,n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, +*> dimension (1+(N-1)*INCY) +*> On entry, the vector y. +*> On exit, the sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IC, IX, IY + REAL F, G, T, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + F = X( IX ) + G = Y( IY ) + IF( G.EQ.ZERO ) THEN + C( IC ) = ONE + ELSE IF( F.EQ.ZERO ) THEN + C( IC ) = ZERO + Y( IY ) = ONE + X( IX ) = G + ELSE IF( ABS( F ).GT.ABS( G ) ) THEN + T = G / F + TT = SQRT( ONE+T*T ) + C( IC ) = ONE / TT + Y( IY ) = T*C( IC ) + X( IX ) = F*TT + ELSE + T = F / G + TT = SQRT( ONE+T*T ) + Y( IY ) = ONE / TT + C( IC ) = T*Y( IY ) + X( IX ) = G*TT + END IF + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 10 CONTINUE + RETURN +* +* End of SLARGV +* + END diff --git a/dspl/liblapack/SRC/slarnv.f b/dspl/liblapack/SRC/slarnv.f new file mode 100644 index 0000000..44fdeb9 --- /dev/null +++ b/dspl/liblapack/SRC/slarnv.f @@ -0,0 +1,178 @@ +*> \brief \b SLARNV returns a vector of random numbers from a uniform or normal distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARNV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARNV( IDIST, ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER IDIST, N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* REAL X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARNV returns a vector of n random real numbers from a uniform or +*> normal distribution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDIST +*> \verbatim +*> IDIST is INTEGER +*> Specifies the distribution of the random numbers: +*> = 1: uniform (0,1) +*> = 2: uniform (-1,1) +*> = 3: normal (0,1) +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine calls the auxiliary routine SLARUV to generate random +*> real numbers from a uniform (0,1) distribution, in batches of up to +*> 128 using vectorisable code. The Box-Muller method is used to +*> transform numbers from a uniform to a normal distribution. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + REAL X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, TWO + PARAMETER ( ONE = 1.0E+0, TWO = 2.0E+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + REAL TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IL2, IV +* .. +* .. Local Arrays .. + REAL U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC COS, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL SLARUV +* .. +* .. Executable Statements .. +* + DO 40 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) + IF( IDIST.EQ.3 ) THEN + IL2 = 2*IL + ELSE + IL2 = IL + END IF +* +* Call SLARUV to generate IL2 numbers from a uniform (0,1) +* distribution (IL2 <= LV) +* + CALL SLARUV( ISEED, IL2, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = U( I ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = TWO*U( I ) - ONE + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ COS( TWOPI*U( 2*I ) ) + 30 CONTINUE + END IF + 40 CONTINUE + RETURN +* +* End of SLARNV +* + END diff --git a/dspl/liblapack/SRC/slarra.f b/dspl/liblapack/SRC/slarra.f new file mode 100644 index 0000000..0456263 --- /dev/null +++ b/dspl/liblapack/SRC/slarra.f @@ -0,0 +1,210 @@ +*> \brief \b SLARRA computes the splitting points with the specified threshold. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, +* NSPLIT, ISPLIT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N, NSPLIT +* REAL SPLTOL, TNRM +* .. +* .. Array Arguments .. +* INTEGER ISPLIT( * ) +* REAL D( * ), E( * ), E2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Compute the splitting points with threshold SPLTOL. +*> SLARRA sets any "small" off-diagonal elements to zero. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal +*> matrix T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) need not be set. +*> On exit, the entries E( ISPLIT( I ) ), 1 <= I <= NSPLIT, +*> are set to zero, the other entries of E are untouched. +*> \endverbatim +*> +*> \param[in,out] E2 +*> \verbatim +*> E2 is REAL array, dimension (N) +*> On entry, the first (N-1) entries contain the SQUARES of the +*> subdiagonal elements of the tridiagonal matrix T; +*> E2(N) need not be set. +*> On exit, the entries E2( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, have been set to zero +*> \endverbatim +*> +*> \param[in] SPLTOL +*> \verbatim +*> SPLTOL is REAL +*> The threshold for splitting. Two criteria can be used: +*> SPLTOL<0 : criterion based on absolute off-diagonal value +*> SPLTOL>0 : criterion that preserves relative accuracy +*> \endverbatim +*> +*> \param[in] TNRM +*> \verbatim +*> TNRM is REAL +*> The norm of the matrix. +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of blocks T splits into. 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRA( N, D, E, E2, SPLTOL, TNRM, + $ NSPLIT, ISPLIT, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, N, NSPLIT + REAL SPLTOL, TNRM +* .. +* .. Array Arguments .. + INTEGER ISPLIT( * ) + REAL D( * ), E( * ), E2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL EABS, TMP1 + +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Compute splitting points + NSPLIT = 1 + IF(SPLTOL.LT.ZERO) THEN +* Criterion based on absolute off-diagonal value + TMP1 = ABS(SPLTOL)* TNRM + DO 9 I = 1, N-1 + EABS = ABS( E(I) ) + IF( EABS .LE. TMP1) THEN + E(I) = ZERO + E2(I) = ZERO + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 9 CONTINUE + ELSE +* Criterion that guarantees relative accuracy + DO 10 I = 1, N-1 + EABS = ABS( E(I) ) + IF( EABS .LE. SPLTOL * SQRT(ABS(D(I)))*SQRT(ABS(D(I+1))) ) + $ THEN + E(I) = ZERO + E2(I) = ZERO + ISPLIT( NSPLIT ) = I + NSPLIT = NSPLIT + 1 + END IF + 10 CONTINUE + ENDIF + ISPLIT( NSPLIT ) = N + + RETURN +* +* End of SLARRA +* + END diff --git a/dspl/liblapack/SRC/slarrb.f b/dspl/liblapack/SRC/slarrb.f new file mode 100644 index 0000000..988e25f --- /dev/null +++ b/dspl/liblapack/SRC/slarrb.f @@ -0,0 +1,407 @@ +*> \brief \b SLARRB provides limited bisection to locate eigenvalues for more accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRB( N, D, LLD, IFIRST, ILAST, RTOL1, +* RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, +* PIVMIN, SPDIAM, TWIST, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST +* REAL PIVMIN, RTOL1, RTOL2, SPDIAM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), LLD( * ), W( * ), +* $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the relatively robust representation(RRR) L D L^T, SLARRB +*> does "limited" bisection to refine the eigenvalues of L D L^T, +*> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial +*> guesses for these eigenvalues are input in W, the corresponding estimate +*> of the error in these guesses and their gaps are input in WERR +*> and WGAP, respectively. During bisection, intervals +*> [left, right] are maintained by storing their mid-points and +*> semi-widths in the arrays W and WERR respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is REAL array, dimension (N-1) +*> The (N-1) elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] IFIRST +*> \verbatim +*> IFIRST is INTEGER +*> The index of the first eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] ILAST +*> \verbatim +*> ILAST is INTEGER +*> The index of the last eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is REAL +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is REAL +*> Tolerance for the convergence of the bisection intervals. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> where GAP is the (estimated) distance to the nearest +*> eigenvalue. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> Offset for the arrays W, WGAP and WERR, i.e., the IFIRST-OFFSET +*> through ILAST-OFFSET elements of these arrays are to be used. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are +*> estimates of the eigenvalues of L D L^T indexed IFIRST through +*> ILAST. +*> On output, these estimates are refined. +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is REAL array, dimension (N-1) +*> On input, the (estimated) gaps between consecutive +*> eigenvalues of L D L^T, i.e., WGAP(I-OFFSET) is the gap between +*> eigenvalues I and I+1. Note that if IFIRST.EQ.ILAST +*> then WGAP(IFIRST-OFFSET) must be set to ZERO. +*> On output, these gaps are refined. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are +*> the errors in the estimates of the corresponding elements in W. +*> On output, these errors are refined. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is REAL +*> The spectral diameter of the matrix. +*> \endverbatim +*> +*> \param[in] TWIST +*> \verbatim +*> TWIST is INTEGER +*> The twist index for the twisted factorization that is used +*> for the negcount. +*> TWIST = N: Compute negcount from L D L^T - LAMBDA I = L+ D+ L+^T +*> TWIST = 1: Compute negcount from L D L^T - LAMBDA I = U- D- U-^T +*> TWIST = R: Compute negcount from L D L^T - LAMBDA I = N(r) D(r) N(r) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Error flag. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRB( N, D, LLD, IFIRST, ILAST, RTOL1, + $ RTOL2, OFFSET, W, WGAP, WERR, WORK, IWORK, + $ PIVMIN, SPDIAM, TWIST, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N, OFFSET, TWIST + REAL PIVMIN, RTOL1, RTOL2, SPDIAM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), LLD( * ), W( * ), + $ WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, TWO, HALF + PARAMETER ( ZERO = 0.0E0, TWO = 2.0E0, + $ HALF = 0.5E0 ) + INTEGER MAXITR +* .. +* .. Local Scalars .. + INTEGER I, I1, II, IP, ITER, K, NEGCNT, NEXT, NINT, + $ OLNINT, PREV, R + REAL BACK, CVRGD, GAP, LEFT, LGAP, MID, MNWDTH, + $ RGAP, RIGHT, TMP, WIDTH +* .. +* .. External Functions .. + INTEGER SLANEG + EXTERNAL SLANEG +* +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + MNWDTH = TWO * PIVMIN +* + R = TWIST + IF((R.LT.1).OR.(R.GT.N)) R = N +* +* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. +* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while +* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) +* for an unconverged interval is set to the index of the next unconverged +* interval, and is -1 or 0 for a converged interval. Thus a linked +* list of unconverged intervals is set up. +* + I1 = IFIRST +* The number of unconverged intervals + NINT = 0 +* The last unconverged interval found + PREV = 0 + + RGAP = WGAP( I1-OFFSET ) + DO 75 I = I1, ILAST + K = 2*I + II = I - OFFSET + LEFT = W( II ) - WERR( II ) + RIGHT = W( II ) + WERR( II ) + LGAP = RGAP + RGAP = WGAP( II ) + GAP = MIN( LGAP, RGAP ) + +* Make sure that [LEFT,RIGHT] contains the desired eigenvalue +* Compute negcount from dstqds facto L+D+L+^T = L D L^T - LEFT +* +* Do while( NEGCNT(LEFT).GT.I-1 ) +* + BACK = WERR( II ) + 20 CONTINUE + NEGCNT = SLANEG( N, D, LLD, LEFT, PIVMIN, R ) + IF( NEGCNT.GT.I-1 ) THEN + LEFT = LEFT - BACK + BACK = TWO*BACK + GO TO 20 + END IF +* +* Do while( NEGCNT(RIGHT).LT.I ) +* Compute negcount from dstqds facto L+D+L+^T = L D L^T - RIGHT +* + BACK = WERR( II ) + 50 CONTINUE + + NEGCNT = SLANEG( N, D, LLD, RIGHT, PIVMIN, R ) + IF( NEGCNT.LT.I ) THEN + RIGHT = RIGHT + BACK + BACK = TWO*BACK + GO TO 50 + END IF + WIDTH = HALF*ABS( LEFT - RIGHT ) + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + CVRGD = MAX(RTOL1*GAP,RTOL2*TMP) + IF( WIDTH.LE.CVRGD .OR. WIDTH.LE.MNWDTH ) THEN +* This interval has already converged and does not need refinement. +* (Note that the gaps might change through refining the +* eigenvalues, however, they can only get bigger.) +* Remove it from the list. + IWORK( K-1 ) = -1 +* Make sure that I1 always points to the first unconverged interval + IF((I.EQ.I1).AND.(I.LT.ILAST)) I1 = I + 1 + IF((PREV.GE.I1).AND.(I.LE.ILAST)) IWORK( 2*PREV-1 ) = I + 1 + ELSE +* unconverged interval found + PREV = I + NINT = NINT + 1 + IWORK( K-1 ) = I + 1 + IWORK( K ) = NEGCNT + END IF + WORK( K-1 ) = LEFT + WORK( K ) = RIGHT + 75 CONTINUE + +* +* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals +* and while (ITER.LT.MAXITR) +* + ITER = 0 + 80 CONTINUE + PREV = I1 - 1 + I = I1 + OLNINT = NINT + + DO 100 IP = 1, OLNINT + K = 2*I + II = I - OFFSET + RGAP = WGAP( II ) + LGAP = RGAP + IF(II.GT.1) LGAP = WGAP( II-1 ) + GAP = MIN( LGAP, RGAP ) + NEXT = IWORK( K-1 ) + LEFT = WORK( K-1 ) + RIGHT = WORK( K ) + MID = HALF*( LEFT + RIGHT ) + +* semiwidth of interval + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + CVRGD = MAX(RTOL1*GAP,RTOL2*TMP) + IF( ( WIDTH.LE.CVRGD ) .OR. ( WIDTH.LE.MNWDTH ).OR. + $ ( ITER.EQ.MAXITR ) )THEN +* reduce number of unconverged intervals + NINT = NINT - 1 +* Mark interval as converged. + IWORK( K-1 ) = 0 + IF( I1.EQ.I ) THEN + I1 = NEXT + ELSE +* Prev holds the last unconverged interval previously examined + IF(PREV.GE.I1) IWORK( 2*PREV-1 ) = NEXT + END IF + I = NEXT + GO TO 100 + END IF + PREV = I +* +* Perform one bisection step +* + NEGCNT = SLANEG( N, D, LLD, MID, PIVMIN, R ) + IF( NEGCNT.LE.I-1 ) THEN + WORK( K-1 ) = MID + ELSE + WORK( K ) = MID + END IF + I = NEXT + 100 CONTINUE + ITER = ITER + 1 +* do another loop if there are still unconverged intervals +* However, in the last iteration, all intervals are accepted +* since this is the best we can do. + IF( ( NINT.GT.0 ).AND.(ITER.LE.MAXITR) ) GO TO 80 +* +* +* At this point, all the intervals have converged + DO 110 I = IFIRST, ILAST + K = 2*I + II = I - OFFSET +* All intervals marked by '0' have been refined. + IF( IWORK( K-1 ).EQ.0 ) THEN + W( II ) = HALF*( WORK( K-1 )+WORK( K ) ) + WERR( II ) = WORK( K ) - W( II ) + END IF + 110 CONTINUE +* + DO 111 I = IFIRST+1, ILAST + K = 2*I + II = I - OFFSET + WGAP( II-1 ) = MAX( ZERO, + $ W(II) - WERR (II) - W( II-1 ) - WERR( II-1 )) + 111 CONTINUE + + RETURN +* +* End of SLARRB +* + END diff --git a/dspl/liblapack/SRC/slarrc.f b/dspl/liblapack/SRC/slarrc.f new file mode 100644 index 0000000..f0c0335 --- /dev/null +++ b/dspl/liblapack/SRC/slarrc.f @@ -0,0 +1,251 @@ +*> \brief \b SLARRC computes the number of eigenvalues of the symmetric tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, +* EIGCNT, LCNT, RCNT, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBT +* INTEGER EIGCNT, INFO, LCNT, N, RCNT +* REAL PIVMIN, VL, VU +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Find the number of eigenvalues of the symmetric tridiagonal matrix T +*> that are in the interval (VL,VU] if JOBT = 'T', and of L D L^T +*> if JOBT = 'L'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> = 'T': Compute Sturm count for matrix T. +*> = 'L': Compute Sturm count for matrix L D L^T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> The lower bound for the eigenvalues. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> The upper bound for the eigenvalues. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> JOBT = 'T': The N diagonal elements of the tridiagonal matrix T. +*> JOBT = 'L': The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> JOBT = 'T': The N-1 offdiagonal elements of the matrix T. +*> JOBT = 'L': The N-1 offdiagonal elements of the matrix L. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[out] EIGCNT +*> \verbatim +*> EIGCNT is INTEGER +*> The number of eigenvalues of the symmetric tridiagonal matrix T +*> that are in the interval (VL,VU] +*> \endverbatim +*> +*> \param[out] LCNT +*> \verbatim +*> LCNT is INTEGER +*> \endverbatim +*> +*> \param[out] RCNT +*> \verbatim +*> RCNT is INTEGER +*> The left and right negcounts of the interval. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRC( JOBT, N, VL, VU, D, E, PIVMIN, + $ EIGCNT, LCNT, RCNT, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBT + INTEGER EIGCNT, INFO, LCNT, N, RCNT + REAL PIVMIN, VL, VU +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I + LOGICAL MATT + REAL LPIVOT, RPIVOT, SL, SU, TMP, TMP2 + +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + LCNT = 0 + RCNT = 0 + EIGCNT = 0 + MATT = LSAME( JOBT, 'T' ) + + + IF (MATT) THEN +* Sturm sequence count on T + LPIVOT = D( 1 ) - VL + RPIVOT = D( 1 ) - VU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + DO 10 I = 1, N-1 + TMP = E(I)**2 + LPIVOT = ( D( I+1 )-VL ) - TMP/LPIVOT + RPIVOT = ( D( I+1 )-VU ) - TMP/RPIVOT + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + 10 CONTINUE + ELSE +* Sturm sequence count on L D L^T + SL = -VL + SU = -VU + DO 20 I = 1, N - 1 + LPIVOT = D( I ) + SL + RPIVOT = D( I ) + SU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + TMP = E(I) * D(I) * E(I) +* + TMP2 = TMP / LPIVOT + IF( TMP2.EQ.ZERO ) THEN + SL = TMP - VL + ELSE + SL = SL*TMP2 - VL + END IF +* + TMP2 = TMP / RPIVOT + IF( TMP2.EQ.ZERO ) THEN + SU = TMP - VU + ELSE + SU = SU*TMP2 - VU + END IF + 20 CONTINUE + LPIVOT = D( N ) + SL + RPIVOT = D( N ) + SU + IF( LPIVOT.LE.ZERO ) THEN + LCNT = LCNT + 1 + ENDIF + IF( RPIVOT.LE.ZERO ) THEN + RCNT = RCNT + 1 + ENDIF + ENDIF + EIGCNT = RCNT - LCNT + + RETURN +* +* end of SLARRC +* + END diff --git a/dspl/liblapack/SRC/slarrd.f b/dspl/liblapack/SRC/slarrd.f new file mode 100644 index 0000000..7a7ec0a --- /dev/null +++ b/dspl/liblapack/SRC/slarrd.f @@ -0,0 +1,869 @@ +*> \brief \b SLARRD computes the eigenvalues of a symmetric tridiagonal matrix to suitable accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, +* RELTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, +* M, W, WERR, WL, WU, IBLOCK, INDEXW, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ORDER, RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* REAL PIVMIN, RELTOL, VL, VU, WL, WU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), +* $ ISPLIT( * ), IWORK( * ) +* REAL D( * ), E( * ), E2( * ), +* $ GERS( * ), W( * ), WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARRD computes the eigenvalues of a symmetric tridiagonal +*> matrix T to suitable accuracy. This is an auxiliary code to be +*> called from SSTEMR. +*> The user may ask for all eigenvalues, all eigenvalues +*> in the half-open interval (VL, VU], or the IL-th through IU-th +*> eigenvalues. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] ORDER +*> \verbatim +*> ORDER is CHARACTER*1 +*> = 'B': ("By Block") the eigenvalues will be grouped by +*> split-off block (see IBLOCK, ISPLIT) and +*> ordered from smallest to largest within +*> the block. +*> = 'E': ("Entire matrix") +*> the eigenvalues for the entire matrix +*> will be ordered from smallest to +*> largest. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is REAL array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is REAL +*> The minimum relative width of an interval. When an interval +*> is narrower than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is REAL array, dimension (N-1) +*> The (n-1) squared off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot allowed in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of diagonal blocks in the matrix T. +*> 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> (Only the first NSPLIT elements will actually be used, but +*> since the user cannot know a priori what value NSPLIT will +*> have, N words must be reserved for ISPLIT.) +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The actual number of eigenvalues found. 0 <= M <= N. +*> (See also the description of INFO=2,3.) +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On exit, the first M elements of W will contain the +*> eigenvalue approximations. SLARRD computes an interval +*> I_j = (a_j, b_j] that includes eigenvalue j. The eigenvalue +*> approximation is given as the interval midpoint +*> W(j)= ( a_j + b_j)/2. The corresponding error is bounded by +*> WERR(j) = abs( a_j - b_j)/2 +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> The error bound on the corresponding eigenvalue approximation +*> in W. +*> \endverbatim +*> +*> \param[out] WL +*> \verbatim +*> WL is REAL +*> \endverbatim +*> +*> \param[out] WU +*> \verbatim +*> WU is REAL +*> The interval (WL, WU] contains all the wanted eigenvalues. +*> If RANGE='V', then WL=VL and WU=VU. +*> If RANGE='A', then WL and WU are the global Gerschgorin bounds +*> on the spectrum. +*> If RANGE='I', then WL and WU are computed by SLAEBZ from the +*> index range specified. +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> At each row/column j where E(j) is zero or small, the +*> matrix T is considered to split into a block diagonal +*> matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +*> block (from 1 to the number of blocks) the eigenvalue W(i) +*> belongs. (SLARRD may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= j and IBLOCK(i)=k imply that the +*> i-th eigenvalue W(i) is the j-th eigenvalue in block k. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: some or all of the eigenvalues failed to converge or +*> were not computed: +*> =1 or 3: Bisection failed to converge for some +*> eigenvalues; these eigenvalues are flagged by a +*> negative block number. The effect is that the +*> eigenvalues may not be as accurate as the +*> absolute and relative tolerances. This is +*> generally caused by unexpectedly inaccurate +*> arithmetic. +*> =2 or 3: RANGE='I' only: Not all of the eigenvalues +*> IL:IU were found. +*> Effect: M < IU+1-IL +*> Cause: non-monotonic arithmetic, causing the +*> Sturm sequence to be non-monotonic. +*> Cure: recalculate, using RANGE='A', and pick +*> out eigenvalues IL:IU. In some cases, +*> increasing the PARAMETER "FUDGE" may +*> make things work. +*> = 4: RANGE='I', and the Gershgorin interval +*> initially used was too small. No eigenvalues +*> were computed. +*> Probable cause: your machine has sloppy +*> floating-point arithmetic. +*> Cure: Increase the PARAMETER "FUDGE", +*> recompile, and try again. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> FUDGE REAL, default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. Ideally, +*> a value of 1 should work, but on machines with sloppy +*> arithmetic, this needs to be larger. The default for +*> publicly released versions should be large enough to handle +*> the worst machine around. Note that this has no effect +*> on accuracy of the solution. +*> \endverbatim +*> +*> \par Contributors: +* ================== +*> +*> W. Kahan, University of California, Berkeley, USA \n +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARRD( RANGE, ORDER, N, VL, VU, IL, IU, GERS, + $ RELTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, + $ M, W, WERR, WL, WU, IBLOCK, INDEXW, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + REAL PIVMIN, RELTOL, VL, VU, WL, WU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), + $ ISPLIT( * ), IWORK( * ) + REAL D( * ), E( * ), E2( * ), + $ GERS( * ), W( * ), WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, HALF, FUDGE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, HALF = ONE/TWO, + $ FUDGE = TWO ) + INTEGER ALLRNG, VALRNG, INDRNG + PARAMETER ( ALLRNG = 1, VALRNG = 2, INDRNG = 3 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER I, IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IOUT, IRANGE, ITMAX, ITMP1, + $ ITMP2, IW, IWOFF, J, JBLK, JDISC, JE, JEE, NB, + $ NWL, NWU + REAL ATOLI, EPS, GL, GU, RTOLI, TMP1, TMP2, + $ TNORM, UFLOW, WKILL, WLU, WUL + +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH + EXTERNAL LSAME, ILAENV, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLAEBZ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = ALLRNG + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = VALRNG + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = INDRNG + ELSE + IRANGE = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( .NOT.(LSAME(ORDER,'B').OR.LSAME(ORDER,'E')) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.VALRNG ) THEN + IF( VL.GE.VU ) + $ INFO = -5 + ELSE IF( IRANGE.EQ.INDRNG .AND. + $ ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF( IRANGE.EQ.INDRNG .AND. + $ ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + +* Initialize error flags + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. + +* Quick return if possible + M = 0 + IF( N.EQ.0 ) RETURN + +* Simplification: + IF( IRANGE.EQ.INDRNG .AND. IL.EQ.1 .AND. IU.EQ.N ) IRANGE = 1 + +* Get machine constants + EPS = SLAMCH( 'P' ) + UFLOW = SLAMCH( 'U' ) + + +* Special Case when N=1 +* Treat case of 1x1 matrix for quick return + IF( N.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR. + $ ((IRANGE.EQ.VALRNG).AND.(D(1).GT.VL).AND.(D(1).LE.VU)).OR. + $ ((IRANGE.EQ.INDRNG).AND.(IL.EQ.1).AND.(IU.EQ.1)) ) THEN + M = 1 + W(1) = D(1) +* The computation error of the eigenvalue is zero + WERR(1) = ZERO + IBLOCK( 1 ) = 1 + INDEXW( 1 ) = 1 + ENDIF + RETURN + END IF + +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. + NB = ILAENV( 1, 'SSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = 0 + +* Find global spectral radius + GL = D(1) + GU = D(1) + DO 5 I = 1,N + GL = MIN( GL, GERS( 2*I - 1)) + GU = MAX( GU, GERS(2*I) ) + 5 CONTINUE +* Compute global Gerschgorin bounds and spectral diameter + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*EPS*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*EPS*N + FUDGE*TWO*PIVMIN +* [JAN/28/2009] remove the line below since SPDIAM variable not use +* SPDIAM = GU - GL +* Input arguments for SLAEBZ: +* The relative tolerance. An interval (a,b] lies within +* "relative tolerance" if b-a < RELTOL*max(|a|,|b|), + RTOLI = RELTOL +* Set the absolute tolerance for interval convergence to zero to force +* interval convergence based on relative size of the interval. +* This is dangerous because intervals might not converge when RELTOL is +* small. But at least a very small number should be selected so that for +* strongly graded matrices, the code can get relatively accurate +* eigenvalues. + ATOLI = FUDGE*TWO*UFLOW + FUDGE*TWO*PIVMIN + + IF( IRANGE.EQ.INDRNG ) THEN + +* RANGE='I': Compute an interval containing eigenvalues +* IL through IU. The initial interval [GL,GU] from the global +* Gerschgorin bounds GL and GU is refined by SLAEBZ. + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL SLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, + $ D, E, E2, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* On exit, output intervals may not be ordered by ascending negcount + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* On exit, the interval [WL, WLU] contains a value with negcount NWL, +* and [WUL, WU] contains a value with negcount NWU. + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + + ELSEIF( IRANGE.EQ.VALRNG ) THEN + WL = VL + WU = VU + + ELSEIF( IRANGE.EQ.ALLRNG ) THEN + WL = GL + WU = GU + ENDIF + + + +* Find Eigenvalues -- Loop Over blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JBLK = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JBLK ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* 1x1 block + IF( WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.ALLRNG .OR. + $ ( WL.LT.D( IBEGIN )-PIVMIN + $ .AND. WU.GE. D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + WERR(M) = ZERO +* The gap for a single block doesn't matter for the later +* algorithm and is assigned an arbitrary large value + IBLOCK( M ) = JBLK + INDEXW( M ) = 1 + END IF + +* Disabled 2x2 case because of a failure on the following matrix +* RANGE = 'I', IL = IU = 4 +* Original Tridiagonal, d = [ +* -0.150102010615740E+00 +* -0.849897989384260E+00 +* -0.128208148052635E-15 +* 0.128257718286320E-15 +* ]; +* e = [ +* -0.357171383266986E+00 +* -0.180411241501588E-15 +* -0.175152352710251E-15 +* ]; +* +* ELSE IF( IN.EQ.2 ) THEN +** 2x2 block +* DISC = SQRT( (HALF*(D(IBEGIN)-D(IEND)))**2 + E(IBEGIN)**2 ) +* TMP1 = HALF*(D(IBEGIN)+D(IEND)) +* L1 = TMP1 - DISC +* IF( WL.GE. L1-PIVMIN ) +* $ NWL = NWL + 1 +* IF( WU.GE. L1-PIVMIN ) +* $ NWU = NWU + 1 +* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L1-PIVMIN .AND. WU.GE. +* $ L1-PIVMIN ) ) THEN +* M = M + 1 +* W( M ) = L1 +** The uncertainty of eigenvalues of a 2x2 matrix is very small +* WERR( M ) = EPS * ABS( W( M ) ) * TWO +* IBLOCK( M ) = JBLK +* INDEXW( M ) = 1 +* ENDIF +* L2 = TMP1 + DISC +* IF( WL.GE. L2-PIVMIN ) +* $ NWL = NWL + 1 +* IF( WU.GE. L2-PIVMIN ) +* $ NWU = NWU + 1 +* IF( IRANGE.EQ.ALLRNG .OR. ( WL.LT.L2-PIVMIN .AND. WU.GE. +* $ L2-PIVMIN ) ) THEN +* M = M + 1 +* W( M ) = L2 +** The uncertainty of eigenvalues of a 2x2 matrix is very small +* WERR( M ) = EPS * ABS( W( M ) ) * TWO +* IBLOCK( M ) = JBLK +* INDEXW( M ) = 2 +* ENDIF + ELSE +* General Case - block of size IN >= 2 +* Compute local Gerschgorin interval and use it as the initial +* interval for SLAEBZ + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO + + DO 40 J = IBEGIN, IEND + GL = MIN( GL, GERS( 2*J - 1)) + GU = MAX( GU, GERS(2*J) ) + 40 CONTINUE +* [JAN/28/2009] +* change SPDIAM by TNORM in lines 2 and 3 thereafter +* line 1: remove computation of SPDIAM (not useful anymore) +* SPDIAM = GU - GL +* GL = GL - FUDGE*SPDIAM*EPS*IN - FUDGE*PIVMIN +* GU = GU + FUDGE*SPDIAM*EPS*IN + FUDGE*PIVMIN + GL = GL - FUDGE*TNORM*EPS*IN - FUDGE*PIVMIN + GU = GU + FUDGE*TNORM*EPS*IN + FUDGE*PIVMIN +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN +* the local block contains none of the wanted eigenvalues + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF +* refine search interval if possible, only range (WL,WU] matters + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF + +* Find negcount of initial interval boundaries GL and GU + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL SLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), E2( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) + +* Compute Eigenvalues + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL SLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), E2( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Copy eigenvalues into W and IBLOCK +* Use -JBLK for block number for unconverged eigenvalues. +* Loop over the number of output intervals from SLAEBZ + DO 60 J = 1, IOUT +* eigenvalue approximation is middle point of interval + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* semi length of error interval + TMP2 = HALF*ABS( WORK( J+N )-WORK( J+IN+N ) ) + IF( J.GT.IOUT-IINFO ) THEN +* Flag non-convergence. + NCNVRG = .TRUE. + IB = -JBLK + ELSE + IB = JBLK + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + WERR( JE ) = TMP2 + INDEXW( JE ) = JE - IWOFF + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE + +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. + IF( IRANGE.EQ.INDRNG ) THEN + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 ) THEN + IM = 0 + DO 80 JE = 1, M +* Remove some of the smallest eigenvalues from the left so that +* at the end IDISCL =0. Move all eigenvalues up to the left. + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCU.GT.0 ) THEN +* Remove some of the largest eigenvalues from the right so that +* at the end IDISCU =0. Move all eigenvalues up to the left. + IM=M+1 + DO 81 JE = M, 1, -1 + IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM - 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 81 CONTINUE + JEE = 0 + DO 82 JE = IM, M + JEE = JEE + 1 + W( JEE ) = W( JE ) + WERR( JEE ) = WERR( JE ) + INDEXW( JEE ) = INDEXW( JE ) + IBLOCK( JEE ) = IBLOCK( JE ) + 82 CONTINUE + M = M-IM+1 + END IF + + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* Code to deal with effects of bad arithmetic. (If N(w) is +* monotone non-decreasing, this should never happen.) +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by marking the corresponding IBLOCK = 0 + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GE.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF +* Now erase all eigenvalues with IBLOCK set to zero + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + WERR( IM ) = WERR( JE ) + INDEXW( IM ) = INDEXW( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* + IF(( IRANGE.EQ.ALLRNG .AND. M.NE.N ).OR. + $ ( IRANGE.EQ.INDRNG .AND. M.NE.IU-IL+1 ) ) THEN + TOOFEW = .TRUE. + END IF + +* If ORDER='B', do nothing the eigenvalues are already sorted by +* block. +* If ORDER='E', sort the eigenvalues from smallest to largest + + IF( LSAME(ORDER,'E') .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE + IF( IE.NE.0 ) THEN + TMP2 = WERR( IE ) + ITMP1 = IBLOCK( IE ) + ITMP2 = INDEXW( IE ) + W( IE ) = W( JE ) + WERR( IE ) = WERR( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + INDEXW( IE ) = INDEXW( JE ) + W( JE ) = TMP1 + WERR( JE ) = TMP2 + IBLOCK( JE ) = ITMP1 + INDEXW( JE ) = ITMP2 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of SLARRD +* + END diff --git a/dspl/liblapack/SRC/slarre.f b/dspl/liblapack/SRC/slarre.f new file mode 100644 index 0000000..ea9b8fc --- /dev/null +++ b/dspl/liblapack/SRC/slarre.f @@ -0,0 +1,908 @@ +*> \brief \b SLARRE given the tridiagonal matrix T, sets small off-diagonal elements to zero and for each unreduced block Ti, finds base representations and eigenvalues. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, +* RTOL1, RTOL2, SPLTOL, NSPLIT, ISPLIT, M, +* W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* REAL PIVMIN, RTOL1, RTOL2, SPLTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ), +* $ INDEXW( * ) +* REAL D( * ), E( * ), E2( * ), GERS( * ), +* $ W( * ),WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> To find the desired eigenvalues of a given real symmetric +*> tridiagonal matrix T, SLARRE sets any "small" off-diagonal +*> elements to zero, and for each unreduced block T_i, it finds +*> (a) a suitable shift at one end of the block's spectrum, +*> (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and +*> (c) eigenvalues of each L_i D_i L_i^T. +*> The representations and eigenvalues found are then used by +*> SSTEMR to compute the eigenvectors of T. +*> The accuracy varies depending on whether bisection is used to +*> find a few eigenvalues or the dqds algorithm (subroutine SLASQ2) to +*> conpute all and then discard any unwanted one. +*> As an added benefit, SLARRE also outputs the n +*> Gerschgorin intervals for the matrices L_i D_i L_i^T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound for the eigenvalues. +*> Eigenvalues less than or equal to VL, or greater than VU, +*> will not be returned. VL < VU. +*> If RANGE='I' or ='A', SLARRE computes bounds on the desired +*> part of the spectrum. +*> \endverbatim +*> +*> \param[in,out] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound for the eigenvalues. +*> Eigenvalues less than or equal to VL, or greater than VU, +*> will not be returned. VL < VU. +*> If RANGE='I' or ='A', SLARRE computes bounds on the desired +*> part of the spectrum. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal +*> matrix T. +*> On exit, the N diagonal elements of the diagonal +*> matrices D_i. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) need not be set. +*> On exit, E contains the subdiagonal elements of the unit +*> bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, contain the base points sigma_i on output. +*> \endverbatim +*> +*> \param[in,out] E2 +*> \verbatim +*> E2 is REAL array, dimension (N) +*> On entry, the first (N-1) entries contain the SQUARES of the +*> subdiagonal elements of the tridiagonal matrix T; +*> E2(N) need not be set. +*> On exit, the entries E2( ISPLIT( I ) ), +*> 1 <= I <= NSPLIT, have been set to zero +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is REAL +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is REAL +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in] SPLTOL +*> \verbatim +*> SPLTOL is REAL +*> The threshold for splitting. +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of blocks T splits into. 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues (of all L_i D_i L_i^T) +*> found. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the eigenvalues. The +*> eigenvalues of each of the blocks, L_i D_i L_i^T, are +*> sorted in ascending order ( SLARRE may use the +*> remaining N-M elements as workspace). +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> The error bound on the corresponding eigenvalue in W. +*> \endverbatim +*> +*> \param[out] WGAP +*> \verbatim +*> WGAP is REAL array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> The gap is only with respect to the eigenvalues of the same block +*> as each block has its own representation tree. +*> Exception: at the right end of a block we store the left gap +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[out] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 +*> \endverbatim +*> +*> \param[out] GERS +*> \verbatim +*> GERS is REAL array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). +*> \endverbatim +*> +*> \param[out] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (6*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: A problem occurred in SLARRE. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in SLARRD. +*> = 2: No base representation could be found in MAXTRY iterations. +*> Increasing MAXTRY and recompilation might be a remedy. +*> =-3: Problem in SLARRB when computing the refined root +*> representation for SLASQ2. +*> =-4: Problem in SLARRB when preforming bisection on the +*> desired part of the spectrum. +*> =-5: Problem in SLASQ2. +*> =-6: Problem in SLASQ2. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The base representations are required to suffer very little +*> element growth and consequently define all their eigenvalues to +*> high relative accuracy. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE SLARRE( RANGE, N, VL, VU, IL, IU, D, E, E2, + $ RTOL1, RTOL2, SPLTOL, NSPLIT, ISPLIT, M, + $ W, WERR, WGAP, IBLOCK, INDEXW, GERS, PIVMIN, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + REAL PIVMIN, RTOL1, RTOL2, SPLTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ), + $ INDEXW( * ) + REAL D( * ), E( * ), E2( * ), GERS( * ), + $ W( * ),WERR( * ), WGAP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL FAC, FOUR, FOURTH, FUDGE, HALF, HNDRD, + $ MAXGROWTH, ONE, PERT, TWO, ZERO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, FOUR=4.0E0, + $ HNDRD = 100.0E0, + $ PERT = 4.0E0, + $ HALF = ONE/TWO, FOURTH = ONE/FOUR, FAC= HALF, + $ MAXGROWTH = 64.0E0, FUDGE = 2.0E0 ) + INTEGER MAXTRY, ALLRNG, INDRNG, VALRNG + PARAMETER ( MAXTRY = 6, ALLRNG = 1, INDRNG = 2, + $ VALRNG = 3 ) +* .. +* .. Local Scalars .. + LOGICAL FORCEB, NOREP, USEDQD + INTEGER CNT, CNT1, CNT2, I, IBEGIN, IDUM, IEND, IINFO, + $ IN, INDL, INDU, IRANGE, J, JBLK, MB, MM, + $ WBEGIN, WEND + REAL AVGAP, BSRTOL, CLWDTH, DMAX, DPIVOT, EABS, + $ EMAX, EOLD, EPS, GL, GU, ISLEFT, ISRGHT, RTL, + $ RTOL, S1, S2, SAFMIN, SGNDEF, SIGMA, SPDIAM, + $ TAU, TMP, TMP1 + + +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL SLAMCH, LSAME + +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLARNV, SLARRA, SLARRB, SLARRC, SLARRD, + $ SLASQ2, SLARRK +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN + +* .. +* .. Executable Statements .. +* + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = ALLRNG + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = VALRNG + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = INDRNG + END IF + + M = 0 + +* Get machine constants + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'P' ) + +* Set parameters + RTL = HNDRD*EPS +* If one were ever to ask for less initial precision in BSRTOL, +* one should keep in mind that for the subset case, the extremal +* eigenvalues must be at least as accurate as the current setting +* (eigenvalues in the middle need not as much accuracy) + BSRTOL = SQRT(EPS)*(0.5E-3) + +* Treat case of 1x1 matrix for quick return + IF( N.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR. + $ ((IRANGE.EQ.VALRNG).AND.(D(1).GT.VL).AND.(D(1).LE.VU)).OR. + $ ((IRANGE.EQ.INDRNG).AND.(IL.EQ.1).AND.(IU.EQ.1)) ) THEN + M = 1 + W(1) = D(1) +* The computation error of the eigenvalue is zero + WERR(1) = ZERO + WGAP(1) = ZERO + IBLOCK( 1 ) = 1 + INDEXW( 1 ) = 1 + GERS(1) = D( 1 ) + GERS(2) = D( 1 ) + ENDIF +* store the shift for the initial RRR, which is zero in this case + E(1) = ZERO + RETURN + END IF + +* General case: tridiagonal matrix of order > 1 +* +* Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. +* Compute maximum off-diagonal entry and pivmin. + GL = D(1) + GU = D(1) + EOLD = ZERO + EMAX = ZERO + E(N) = ZERO + DO 5 I = 1,N + WERR(I) = ZERO + WGAP(I) = ZERO + EABS = ABS( E(I) ) + IF( EABS .GE. EMAX ) THEN + EMAX = EABS + END IF + TMP1 = EABS + EOLD + GERS( 2*I-1) = D(I) - TMP1 + GL = MIN( GL, GERS( 2*I - 1)) + GERS( 2*I ) = D(I) + TMP1 + GU = MAX( GU, GERS(2*I) ) + EOLD = EABS + 5 CONTINUE +* The minimum pivot allowed in the Sturm sequence for T + PIVMIN = SAFMIN * MAX( ONE, EMAX**2 ) +* Compute spectral diameter. The Gerschgorin bounds give an +* estimate that is wrong by at most a factor of SQRT(2) + SPDIAM = GU - GL + +* Compute splitting points + CALL SLARRA( N, D, E, E2, SPLTOL, SPDIAM, + $ NSPLIT, ISPLIT, IINFO ) + +* Can force use of bisection instead of faster DQDS. +* Option left in the code for future multisection work. + FORCEB = .FALSE. + +* Initialize USEDQD, DQDS should be used for ALLRNG unless someone +* explicitly wants bisection. + USEDQD = (( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB)) + + IF( (IRANGE.EQ.ALLRNG) .AND. (.NOT. FORCEB) ) THEN +* Set interval [VL,VU] that contains all eigenvalues + VL = GL + VU = GU + ELSE +* We call SLARRD to find crude approximations to the eigenvalues +* in the desired range. In case IRANGE = INDRNG, we also obtain the +* interval (VL,VU] that contains all the wanted eigenvalues. +* An interval [LEFT,RIGHT] has converged if +* RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) +* SLARRD needs a WORK of size 4*N, IWORK of size 3*N + CALL SLARRD( RANGE, 'B', N, VL, VU, IL, IU, GERS, + $ BSRTOL, D, E, E2, PIVMIN, NSPLIT, ISPLIT, + $ MM, W, WERR, VL, VU, IBLOCK, INDEXW, + $ WORK, IWORK, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 + DO 14 I = MM+1,N + W( I ) = ZERO + WERR( I ) = ZERO + IBLOCK( I ) = 0 + INDEXW( I ) = 0 + 14 CONTINUE + END IF + + +*** +* Loop over unreduced blocks + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, NSPLIT + IEND = ISPLIT( JBLK ) + IN = IEND - IBEGIN + 1 + +* 1 X 1 block + IF( IN.EQ.1 ) THEN + IF( (IRANGE.EQ.ALLRNG).OR.( (IRANGE.EQ.VALRNG).AND. + $ ( D( IBEGIN ).GT.VL ).AND.( D( IBEGIN ).LE.VU ) ) + $ .OR. ( (IRANGE.EQ.INDRNG).AND.(IBLOCK(WBEGIN).EQ.JBLK)) + $ ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + WERR(M) = ZERO +* The gap for a single block doesn't matter for the later +* algorithm and is assigned an arbitrary large value + WGAP(M) = ZERO + IBLOCK( M ) = JBLK + INDEXW( M ) = 1 + WBEGIN = WBEGIN + 1 + ENDIF +* E( IEND ) holds the shift for the initial RRR + E( IEND ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + END IF +* +* Blocks of size larger than 1x1 +* +* E( IEND ) will hold the shift for the initial RRR, for now set it =0 + E( IEND ) = ZERO +* +* Find local outer bounds GL,GU for the block + GL = D(IBEGIN) + GU = D(IBEGIN) + DO 15 I = IBEGIN , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 15 CONTINUE + SPDIAM = GU - GL + + IF(.NOT. ((IRANGE.EQ.ALLRNG).AND.(.NOT.FORCEB)) ) THEN +* Count the number of eigenvalues in the current block. + MB = 0 + DO 20 I = WBEGIN,MM + IF( IBLOCK(I).EQ.JBLK ) THEN + MB = MB+1 + ELSE + GOTO 21 + ENDIF + 20 CONTINUE + 21 CONTINUE + + IF( MB.EQ.0) THEN +* No eigenvalue in the current block lies in the desired range +* E( IEND ) holds the shift for the initial RRR + E( IEND ) = ZERO + IBEGIN = IEND + 1 + GO TO 170 + ELSE + +* Decide whether dqds or bisection is more efficient + USEDQD = ( (MB .GT. FAC*IN) .AND. (.NOT.FORCEB) ) + WEND = WBEGIN + MB - 1 +* Calculate gaps for the current block +* In later stages, when representations for individual +* eigenvalues are different, we use SIGMA = E( IEND ). + SIGMA = ZERO + DO 30 I = WBEGIN, WEND - 1 + WGAP( I ) = MAX( ZERO, + $ W(I+1)-WERR(I+1) - (W(I)+WERR(I)) ) + 30 CONTINUE + WGAP( WEND ) = MAX( ZERO, + $ VU - SIGMA - (W( WEND )+WERR( WEND ))) +* Find local index of the first and last desired evalue. + INDL = INDEXW(WBEGIN) + INDU = INDEXW( WEND ) + ENDIF + ENDIF + IF(( (IRANGE.EQ.ALLRNG) .AND. (.NOT. FORCEB) ).OR.USEDQD) THEN +* Case of DQDS +* Find approximations to the extremal eigenvalues of the block + CALL SLARRK( IN, 1, GL, GU, D(IBEGIN), + $ E2(IBEGIN), PIVMIN, RTL, TMP, TMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF + ISLEFT = MAX(GL, TMP - TMP1 + $ - HNDRD * EPS* ABS(TMP - TMP1)) + + CALL SLARRK( IN, IN, GL, GU, D(IBEGIN), + $ E2(IBEGIN), PIVMIN, RTL, TMP, TMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF + ISRGHT = MIN(GU, TMP + TMP1 + $ + HNDRD * EPS * ABS(TMP + TMP1)) +* Improve the estimate of the spectral diameter + SPDIAM = ISRGHT - ISLEFT + ELSE +* Case of bisection +* Find approximations to the wanted extremal eigenvalues + ISLEFT = MAX(GL, W(WBEGIN) - WERR(WBEGIN) + $ - HNDRD * EPS*ABS(W(WBEGIN)- WERR(WBEGIN) )) + ISRGHT = MIN(GU,W(WEND) + WERR(WEND) + $ + HNDRD * EPS * ABS(W(WEND)+ WERR(WEND))) + ENDIF + + +* Decide whether the base representation for the current block +* L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I +* should be on the left or the right end of the current block. +* The strategy is to shift to the end which is "more populated" +* Furthermore, decide whether to use DQDS for the computation of +* the eigenvalue approximations at the end of SLARRE or bisection. +* dqds is chosen if all eigenvalues are desired or the number of +* eigenvalues to be computed is large compared to the blocksize. + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN +* If all the eigenvalues have to be computed, we use dqd + USEDQD = .TRUE. +* INDL is the local index of the first eigenvalue to compute + INDL = 1 + INDU = IN +* MB = number of eigenvalues to compute + MB = IN + WEND = WBEGIN + MB - 1 +* Define 1/4 and 3/4 points of the spectrum + S1 = ISLEFT + FOURTH * SPDIAM + S2 = ISRGHT - FOURTH * SPDIAM + ELSE +* SLARRD has computed IBLOCK and INDEXW for each eigenvalue +* approximation. +* choose sigma + IF( USEDQD ) THEN + S1 = ISLEFT + FOURTH * SPDIAM + S2 = ISRGHT - FOURTH * SPDIAM + ELSE + TMP = MIN(ISRGHT,VU) - MAX(ISLEFT,VL) + S1 = MAX(ISLEFT,VL) + FOURTH * TMP + S2 = MIN(ISRGHT,VU) - FOURTH * TMP + ENDIF + ENDIF + +* Compute the negcount at the 1/4 and 3/4 points + IF(MB.GT.1) THEN + CALL SLARRC( 'T', IN, S1, S2, D(IBEGIN), + $ E(IBEGIN), PIVMIN, CNT, CNT1, CNT2, IINFO) + ENDIF + + IF(MB.EQ.1) THEN + SIGMA = GL + SGNDEF = ONE + ELSEIF( CNT1 - INDL .GE. INDU - CNT2 ) THEN + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN + SIGMA = MAX(ISLEFT,GL) + ELSEIF( USEDQD ) THEN +* use Gerschgorin bound as shift to get pos def matrix +* for dqds + SIGMA = ISLEFT + ELSE +* use approximation of the first desired eigenvalue of the +* block as shift + SIGMA = MAX(ISLEFT,VL) + ENDIF + SGNDEF = ONE + ELSE + IF( ( IRANGE.EQ.ALLRNG ) .AND. (.NOT.FORCEB) ) THEN + SIGMA = MIN(ISRGHT,GU) + ELSEIF( USEDQD ) THEN +* use Gerschgorin bound as shift to get neg def matrix +* for dqds + SIGMA = ISRGHT + ELSE +* use approximation of the first desired eigenvalue of the +* block as shift + SIGMA = MIN(ISRGHT,VU) + ENDIF + SGNDEF = -ONE + ENDIF + + +* An initial SIGMA has been chosen that will be used for computing +* T - SIGMA I = L D L^T +* Define the increment TAU of the shift in case the initial shift +* needs to be refined to obtain a factorization with not too much +* element growth. + IF( USEDQD ) THEN +* The initial SIGMA was to the outer end of the spectrum +* the matrix is definite and we need not retreat. + TAU = SPDIAM*EPS*N + TWO*PIVMIN + TAU = MAX( TAU,TWO*EPS*ABS(SIGMA) ) + ELSE + IF(MB.GT.1) THEN + CLWDTH = W(WEND) + WERR(WEND) - W(WBEGIN) - WERR(WBEGIN) + AVGAP = ABS(CLWDTH / REAL(WEND-WBEGIN)) + IF( SGNDEF.EQ.ONE ) THEN + TAU = HALF*MAX(WGAP(WBEGIN),AVGAP) + TAU = MAX(TAU,WERR(WBEGIN)) + ELSE + TAU = HALF*MAX(WGAP(WEND-1),AVGAP) + TAU = MAX(TAU,WERR(WEND)) + ENDIF + ELSE + TAU = WERR(WBEGIN) + ENDIF + ENDIF +* + DO 80 IDUM = 1, MAXTRY +* Compute L D L^T factorization of tridiagonal matrix T - sigma I. +* Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of +* pivots in WORK(2*IN+1:3*IN) + DPIVOT = D( IBEGIN ) - SIGMA + WORK( 1 ) = DPIVOT + DMAX = ABS( WORK(1) ) + J = IBEGIN + DO 70 I = 1, IN - 1 + WORK( 2*IN+I ) = ONE / WORK( I ) + TMP = E( J )*WORK( 2*IN+I ) + WORK( IN+I ) = TMP + DPIVOT = ( D( J+1 )-SIGMA ) - TMP*E( J ) + WORK( I+1 ) = DPIVOT + DMAX = MAX( DMAX, ABS(DPIVOT) ) + J = J + 1 + 70 CONTINUE +* check for element growth + IF( DMAX .GT. MAXGROWTH*SPDIAM ) THEN + NOREP = .TRUE. + ELSE + NOREP = .FALSE. + ENDIF + IF( USEDQD .AND. .NOT.NOREP ) THEN +* Ensure the definiteness of the representation +* All entries of D (of L D L^T) must have the same sign + DO 71 I = 1, IN + TMP = SGNDEF*WORK( I ) + IF( TMP.LT.ZERO ) NOREP = .TRUE. + 71 CONTINUE + ENDIF + IF(NOREP) THEN +* Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin +* shift which makes the matrix definite. So we should end up +* here really only in the case of IRANGE = VALRNG or INDRNG. + IF( IDUM.EQ.MAXTRY-1 ) THEN + IF( SGNDEF.EQ.ONE ) THEN +* The fudged Gerschgorin shift should succeed + SIGMA = + $ GL - FUDGE*SPDIAM*EPS*N - FUDGE*TWO*PIVMIN + ELSE + SIGMA = + $ GU + FUDGE*SPDIAM*EPS*N + FUDGE*TWO*PIVMIN + END IF + ELSE + SIGMA = SIGMA - SGNDEF * TAU + TAU = TWO * TAU + END IF + ELSE +* an initial RRR is found + GO TO 83 + END IF + 80 CONTINUE +* if the program reaches this point, no base representation could be +* found in MAXTRY iterations. + INFO = 2 + RETURN + + 83 CONTINUE +* At this point, we have found an initial base representation +* T - SIGMA I = L D L^T with not too much element growth. +* Store the shift. + E( IEND ) = SIGMA +* Store D and L. + CALL SCOPY( IN, WORK, 1, D( IBEGIN ), 1 ) + CALL SCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 ) + + + IF(MB.GT.1 ) THEN +* +* Perturb each entry of the base representation by a small +* (but random) relative amount to overcome difficulties with +* glued matrices. +* + DO 122 I = 1, 4 + ISEED( I ) = 1 + 122 CONTINUE + + CALL SLARNV(2, ISEED, 2*IN-1, WORK(1)) + DO 125 I = 1,IN-1 + D(IBEGIN+I-1) = D(IBEGIN+I-1)*(ONE+EPS*PERT*WORK(I)) + E(IBEGIN+I-1) = E(IBEGIN+I-1)*(ONE+EPS*PERT*WORK(IN+I)) + 125 CONTINUE + D(IEND) = D(IEND)*(ONE+EPS*FOUR*WORK(IN)) +* + ENDIF +* +* Don't update the Gerschgorin intervals because keeping track +* of the updates would be too much work in SLARRV. +* We update W instead and use it to locate the proper Gerschgorin +* intervals. + +* Compute the required eigenvalues of L D L' by bisection or dqds + IF ( .NOT.USEDQD ) THEN +* If SLARRD has been used, shift the eigenvalue approximations +* according to their representation. This is necessary for +* a uniform SLARRV since dqds computes eigenvalues of the +* shifted representation. In SLARRV, W will always hold the +* UNshifted eigenvalue approximation. + DO 134 J=WBEGIN,WEND + W(J) = W(J) - SIGMA + WERR(J) = WERR(J) + ABS(W(J)) * EPS + 134 CONTINUE +* call SLARRB to reduce eigenvalue error of the approximations +* from SLARRD + DO 135 I = IBEGIN, IEND-1 + WORK( I ) = D( I ) * E( I )**2 + 135 CONTINUE +* use bisection to find EV from INDL to INDU + CALL SLARRB(IN, D(IBEGIN), WORK(IBEGIN), + $ INDL, INDU, RTOL1, RTOL2, INDL-1, + $ W(WBEGIN), WGAP(WBEGIN), WERR(WBEGIN), + $ WORK( 2*N+1 ), IWORK, PIVMIN, SPDIAM, + $ IN, IINFO ) + IF( IINFO .NE. 0 ) THEN + INFO = -4 + RETURN + END IF +* SLARRB computes all gaps correctly except for the last one +* Record distance to VU/GU + WGAP( WEND ) = MAX( ZERO, + $ ( VU-SIGMA ) - ( W( WEND ) + WERR( WEND ) ) ) + DO 138 I = INDL, INDU + M = M + 1 + IBLOCK(M) = JBLK + INDEXW(M) = I + 138 CONTINUE + ELSE +* Call dqds to get all eigs (and then possibly delete unwanted +* eigenvalues). +* Note that dqds finds the eigenvalues of the L D L^T representation +* of T to high relative accuracy. High relative accuracy +* might be lost when the shift of the RRR is subtracted to obtain +* the eigenvalues of T. However, T is not guaranteed to define its +* eigenvalues to high relative accuracy anyway. +* Set RTOL to the order of the tolerance used in SLASQ2 +* This is an ESTIMATED error, the worst case bound is 4*N*EPS +* which is usually too large and requires unnecessary work to be +* done by bisection when computing the eigenvectors + RTOL = LOG(REAL(IN)) * FOUR * EPS + J = IBEGIN + DO 140 I = 1, IN - 1 + WORK( 2*I-1 ) = ABS( D( J ) ) + WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 ) + J = J + 1 + 140 CONTINUE + WORK( 2*IN-1 ) = ABS( D( IEND ) ) + WORK( 2*IN ) = ZERO + CALL SLASQ2( IN, WORK, IINFO ) + IF( IINFO .NE. 0 ) THEN +* If IINFO = -5 then an index is part of a tight cluster +* and should be changed. The index is in IWORK(1) and the +* gap is in WORK(N+1) + INFO = -5 + RETURN + ELSE +* Test that all eigenvalues are positive as expected + DO 149 I = 1, IN + IF( WORK( I ).LT.ZERO ) THEN + INFO = -6 + RETURN + ENDIF + 149 CONTINUE + END IF + IF( SGNDEF.GT.ZERO ) THEN + DO 150 I = INDL, INDU + M = M + 1 + W( M ) = WORK( IN-I+1 ) + IBLOCK( M ) = JBLK + INDEXW( M ) = I + 150 CONTINUE + ELSE + DO 160 I = INDL, INDU + M = M + 1 + W( M ) = -WORK( I ) + IBLOCK( M ) = JBLK + INDEXW( M ) = I + 160 CONTINUE + END IF + + DO 165 I = M - MB + 1, M +* the value of RTOL below should be the tolerance in SLASQ2 + WERR( I ) = RTOL * ABS( W(I) ) + 165 CONTINUE + DO 166 I = M - MB + 1, M - 1 +* compute the right gap between the intervals + WGAP( I ) = MAX( ZERO, + $ W(I+1)-WERR(I+1) - (W(I)+WERR(I)) ) + 166 CONTINUE + WGAP( M ) = MAX( ZERO, + $ ( VU-SIGMA ) - ( W( M ) + WERR( M ) ) ) + END IF +* proceed with next block + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* end of SLARRE +* + END diff --git a/dspl/liblapack/SRC/slarrf.f b/dspl/liblapack/SRC/slarrf.f new file mode 100644 index 0000000..8c8de52 --- /dev/null +++ b/dspl/liblapack/SRC/slarrf.f @@ -0,0 +1,495 @@ +*> \brief \b SLARRF finds a new relatively robust representation such that at least one of the eigenvalues is relatively isolated. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRF( N, D, L, LD, CLSTRT, CLEND, +* W, WGAP, WERR, +* SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, +* DPLUS, LPLUS, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CLSTRT, CLEND, INFO, N +* REAL CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM +* .. +* .. Array Arguments .. +* REAL D( * ), DPLUS( * ), L( * ), LD( * ), +* $ LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the initial representation L D L^T and its cluster of close +*> eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... +*> W( CLEND ), SLARRF finds a new relatively robust representation +*> L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the +*> eigenvalues of L(+) D(+) L(+)^T is relatively isolated. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix (subblock, if the matrix split). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The N diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is REAL array, dimension (N-1) +*> The (N-1) subdiagonal elements of the unit bidiagonal +*> matrix L. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is REAL array, dimension (N-1) +*> The (N-1) elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] CLSTRT +*> \verbatim +*> CLSTRT is INTEGER +*> The index of the first eigenvalue in the cluster. +*> \endverbatim +*> +*> \param[in] CLEND +*> \verbatim +*> CLEND is INTEGER +*> The index of the last eigenvalue in the cluster. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> The eigenvalue APPROXIMATIONS of L D L^T in ascending order. +*> W( CLSTRT ) through W( CLEND ) form the cluster of relatively +*> close eigenalues. +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is REAL array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] WERR +*> \verbatim +*> WERR is REAL array, dimension +*> dimension is >= (CLEND-CLSTRT+1) +*> WERR contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue APPROXIMATION in W +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is REAL +*> estimate of the spectral diameter obtained from the +*> Gerschgorin intervals +*> \endverbatim +*> +*> \param[in] CLGAPL +*> \verbatim +*> CLGAPL is REAL +*> \endverbatim +*> +*> \param[in] CLGAPR +*> \verbatim +*> CLGAPR is REAL +*> absolute gap on each end of the cluster. +*> Set by the calling routine to protect against shifts too close +*> to eigenvalues outside the cluster. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is REAL +*> The shift used to form L(+) D(+) L(+)^T. +*> \endverbatim +*> +*> \param[out] DPLUS +*> \verbatim +*> DPLUS is REAL array, dimension (N) +*> The N diagonal elements of the diagonal matrix D(+). +*> \endverbatim +*> +*> \param[out] LPLUS +*> \verbatim +*> LPLUS is REAL array, dimension (N-1) +*> The first (N-1) elements of LPLUS contain the subdiagonal +*> elements of the unit bidiagonal matrix L(+). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Signals processing OK (=0) or failure (=1) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRF( N, D, L, LD, CLSTRT, CLEND, + $ W, WGAP, WERR, + $ SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, + $ DPLUS, LPLUS, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CLSTRT, CLEND, INFO, N + REAL CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM +* .. +* .. Array Arguments .. + REAL D( * ), DPLUS( * ), L( * ), LD( * ), + $ LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL MAXGROWTH1, MAXGROWTH2, ONE, QUART, TWO + PARAMETER ( ONE = 1.0E0, TWO = 2.0E0, + $ QUART = 0.25E0, + $ MAXGROWTH1 = 8.E0, + $ MAXGROWTH2 = 8.E0 ) +* .. +* .. Local Scalars .. + LOGICAL DORRR1, FORCER, NOFAIL, SAWNAN1, SAWNAN2, TRYRRR1 + INTEGER I, INDX, KTRY, KTRYMAX, SLEFT, SRIGHT, SHIFT + PARAMETER ( KTRYMAX = 1, SLEFT = 1, SRIGHT = 2 ) + REAL AVGAP, BESTSHIFT, CLWDTH, EPS, FACT, FAIL, + $ FAIL2, GROWTHBOUND, LDELTA, LDMAX, LSIGMA, + $ MAX1, MAX2, MINGAP, OLDP, PROD, RDELTA, RDMAX, + $ RRR1, RRR2, RSIGMA, S, SMLGROWTH, TMP, ZNM2 +* .. +* .. External Functions .. + LOGICAL SISNAN + REAL SLAMCH + EXTERNAL SISNAN, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + FACT = REAL(2**KTRYMAX) + EPS = SLAMCH( 'Precision' ) + SHIFT = 0 + FORCER = .FALSE. + + +* Note that we cannot guarantee that for any of the shifts tried, +* the factorization has a small or even moderate element growth. +* There could be Ritz values at both ends of the cluster and despite +* backing off, there are examples where all factorizations tried +* (in IEEE mode, allowing zero pivots & infinities) have INFINITE +* element growth. +* For this reason, we should use PIVMIN in this subroutine so that at +* least the L D L^T factorization exists. It can be checked afterwards +* whether the element growth caused bad residuals/orthogonality. + +* Decide whether the code should accept the best among all +* representations despite large element growth or signal INFO=1 +* Setting NOFAIL to .FALSE. for quick fix for bug 113 + NOFAIL = .FALSE. +* + +* Compute the average gap length of the cluster + CLWDTH = ABS(W(CLEND)-W(CLSTRT)) + WERR(CLEND) + WERR(CLSTRT) + AVGAP = CLWDTH / REAL(CLEND-CLSTRT) + MINGAP = MIN(CLGAPL, CLGAPR) +* Initial values for shifts to both ends of cluster + LSIGMA = MIN(W( CLSTRT ),W( CLEND )) - WERR( CLSTRT ) + RSIGMA = MAX(W( CLSTRT ),W( CLEND )) + WERR( CLEND ) + +* Use a small fudge to make sure that we really shift to the outside + LSIGMA = LSIGMA - ABS(LSIGMA)* TWO * EPS + RSIGMA = RSIGMA + ABS(RSIGMA)* TWO * EPS + +* Compute upper bounds for how much to back off the initial shifts + LDMAX = QUART * MINGAP + TWO * PIVMIN + RDMAX = QUART * MINGAP + TWO * PIVMIN + + LDELTA = MAX(AVGAP,WGAP( CLSTRT ))/FACT + RDELTA = MAX(AVGAP,WGAP( CLEND-1 ))/FACT +* +* Initialize the record of the best representation found +* + S = SLAMCH( 'S' ) + SMLGROWTH = ONE / S + FAIL = REAL(N-1)*MINGAP/(SPDIAM*EPS) + FAIL2 = REAL(N-1)*MINGAP/(SPDIAM*SQRT(EPS)) + BESTSHIFT = LSIGMA +* +* while (KTRY <= KTRYMAX) + KTRY = 0 + GROWTHBOUND = MAXGROWTH1*SPDIAM + + 5 CONTINUE + SAWNAN1 = .FALSE. + SAWNAN2 = .FALSE. +* Ensure that we do not back off too much of the initial shifts + LDELTA = MIN(LDMAX,LDELTA) + RDELTA = MIN(RDMAX,RDELTA) + +* Compute the element growth when shifting to both ends of the cluster +* accept the shift if there is no element growth at one of the two ends + +* Left end + S = -LSIGMA + DPLUS( 1 ) = D( 1 ) + S + IF(ABS(DPLUS(1)).LT.PIVMIN) THEN + DPLUS(1) = -PIVMIN +* Need to set SAWNAN1 because refined RRR test should not be used +* in this case + SAWNAN1 = .TRUE. + ENDIF + MAX1 = ABS( DPLUS( 1 ) ) + DO 6 I = 1, N - 1 + LPLUS( I ) = LD( I ) / DPLUS( I ) + S = S*LPLUS( I )*L( I ) - LSIGMA + DPLUS( I+1 ) = D( I+1 ) + S + IF(ABS(DPLUS(I+1)).LT.PIVMIN) THEN + DPLUS(I+1) = -PIVMIN +* Need to set SAWNAN1 because refined RRR test should not be used +* in this case + SAWNAN1 = .TRUE. + ENDIF + MAX1 = MAX( MAX1,ABS(DPLUS(I+1)) ) + 6 CONTINUE + SAWNAN1 = SAWNAN1 .OR. SISNAN( MAX1 ) + + IF( FORCER .OR. + $ (MAX1.LE.GROWTHBOUND .AND. .NOT.SAWNAN1 ) ) THEN + SIGMA = LSIGMA + SHIFT = SLEFT + GOTO 100 + ENDIF + +* Right end + S = -RSIGMA + WORK( 1 ) = D( 1 ) + S + IF(ABS(WORK(1)).LT.PIVMIN) THEN + WORK(1) = -PIVMIN +* Need to set SAWNAN2 because refined RRR test should not be used +* in this case + SAWNAN2 = .TRUE. + ENDIF + MAX2 = ABS( WORK( 1 ) ) + DO 7 I = 1, N - 1 + WORK( N+I ) = LD( I ) / WORK( I ) + S = S*WORK( N+I )*L( I ) - RSIGMA + WORK( I+1 ) = D( I+1 ) + S + IF(ABS(WORK(I+1)).LT.PIVMIN) THEN + WORK(I+1) = -PIVMIN +* Need to set SAWNAN2 because refined RRR test should not be used +* in this case + SAWNAN2 = .TRUE. + ENDIF + MAX2 = MAX( MAX2,ABS(WORK(I+1)) ) + 7 CONTINUE + SAWNAN2 = SAWNAN2 .OR. SISNAN( MAX2 ) + + IF( FORCER .OR. + $ (MAX2.LE.GROWTHBOUND .AND. .NOT.SAWNAN2 ) ) THEN + SIGMA = RSIGMA + SHIFT = SRIGHT + GOTO 100 + ENDIF +* If we are at this point, both shifts led to too much element growth + +* Record the better of the two shifts (provided it didn't lead to NaN) + IF(SAWNAN1.AND.SAWNAN2) THEN +* both MAX1 and MAX2 are NaN + GOTO 50 + ELSE + IF( .NOT.SAWNAN1 ) THEN + INDX = 1 + IF(MAX1.LE.SMLGROWTH) THEN + SMLGROWTH = MAX1 + BESTSHIFT = LSIGMA + ENDIF + ENDIF + IF( .NOT.SAWNAN2 ) THEN + IF(SAWNAN1 .OR. MAX2.LE.MAX1) INDX = 2 + IF(MAX2.LE.SMLGROWTH) THEN + SMLGROWTH = MAX2 + BESTSHIFT = RSIGMA + ENDIF + ENDIF + ENDIF + +* If we are here, both the left and the right shift led to +* element growth. If the element growth is moderate, then +* we may still accept the representation, if it passes a +* refined test for RRR. This test supposes that no NaN occurred. +* Moreover, we use the refined RRR test only for isolated clusters. + IF((CLWDTH.LT.MINGAP/REAL(128)) .AND. + $ (MIN(MAX1,MAX2).LT.FAIL2) + $ .AND.(.NOT.SAWNAN1).AND.(.NOT.SAWNAN2)) THEN + DORRR1 = .TRUE. + ELSE + DORRR1 = .FALSE. + ENDIF + TRYRRR1 = .TRUE. + IF( TRYRRR1 .AND. DORRR1 ) THEN + IF(INDX.EQ.1) THEN + TMP = ABS( DPLUS( N ) ) + ZNM2 = ONE + PROD = ONE + OLDP = ONE + DO 15 I = N-1, 1, -1 + IF( PROD .LE. EPS ) THEN + PROD = + $ ((DPLUS(I+1)*WORK(N+I+1))/(DPLUS(I)*WORK(N+I)))*OLDP + ELSE + PROD = PROD*ABS(WORK(N+I)) + END IF + OLDP = PROD + ZNM2 = ZNM2 + PROD**2 + TMP = MAX( TMP, ABS( DPLUS( I ) * PROD )) + 15 CONTINUE + RRR1 = TMP/( SPDIAM * SQRT( ZNM2 ) ) + IF (RRR1.LE.MAXGROWTH2) THEN + SIGMA = LSIGMA + SHIFT = SLEFT + GOTO 100 + ENDIF + ELSE IF(INDX.EQ.2) THEN + TMP = ABS( WORK( N ) ) + ZNM2 = ONE + PROD = ONE + OLDP = ONE + DO 16 I = N-1, 1, -1 + IF( PROD .LE. EPS ) THEN + PROD = ((WORK(I+1)*LPLUS(I+1))/(WORK(I)*LPLUS(I)))*OLDP + ELSE + PROD = PROD*ABS(LPLUS(I)) + END IF + OLDP = PROD + ZNM2 = ZNM2 + PROD**2 + TMP = MAX( TMP, ABS( WORK( I ) * PROD )) + 16 CONTINUE + RRR2 = TMP/( SPDIAM * SQRT( ZNM2 ) ) + IF (RRR2.LE.MAXGROWTH2) THEN + SIGMA = RSIGMA + SHIFT = SRIGHT + GOTO 100 + ENDIF + END IF + ENDIF + + 50 CONTINUE + + IF (KTRY.LT.KTRYMAX) THEN +* If we are here, both shifts failed also the RRR test. +* Back off to the outside + LSIGMA = MAX( LSIGMA - LDELTA, + $ LSIGMA - LDMAX) + RSIGMA = MIN( RSIGMA + RDELTA, + $ RSIGMA + RDMAX ) + LDELTA = TWO * LDELTA + RDELTA = TWO * RDELTA + KTRY = KTRY + 1 + GOTO 5 + ELSE +* None of the representations investigated satisfied our +* criteria. Take the best one we found. + IF((SMLGROWTH.LT.FAIL).OR.NOFAIL) THEN + LSIGMA = BESTSHIFT + RSIGMA = BESTSHIFT + FORCER = .TRUE. + GOTO 5 + ELSE + INFO = 1 + RETURN + ENDIF + END IF + + 100 CONTINUE + IF (SHIFT.EQ.SLEFT) THEN + ELSEIF (SHIFT.EQ.SRIGHT) THEN +* store new L and D back into DPLUS, LPLUS + CALL SCOPY( N, WORK, 1, DPLUS, 1 ) + CALL SCOPY( N-1, WORK(N+1), 1, LPLUS, 1 ) + ENDIF + + RETURN +* +* End of SLARRF +* + END diff --git a/dspl/liblapack/SRC/slarrj.f b/dspl/liblapack/SRC/slarrj.f new file mode 100644 index 0000000..a721d07 --- /dev/null +++ b/dspl/liblapack/SRC/slarrj.f @@ -0,0 +1,379 @@ +*> \brief \b SLARRJ performs refinement of the initial estimates of the eigenvalues of the matrix T. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRJ( N, D, E2, IFIRST, ILAST, +* RTOL, OFFSET, W, WERR, WORK, IWORK, +* PIVMIN, SPDIAM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IFIRST, ILAST, INFO, N, OFFSET +* REAL PIVMIN, RTOL, SPDIAM +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E2( * ), W( * ), +* $ WERR( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given the initial eigenvalue approximations of T, SLARRJ +*> does bisection to refine the eigenvalues of T, +*> W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial +*> guesses for these eigenvalues are input in W, the corresponding estimate +*> of the error in these guesses in WERR. During bisection, intervals +*> [left, right] are maintained by storing their mid-points and +*> semi-widths in the arrays W and WERR respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The N diagonal elements of T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is REAL array, dimension (N-1) +*> The Squares of the (N-1) subdiagonal elements of T. +*> \endverbatim +*> +*> \param[in] IFIRST +*> \verbatim +*> IFIRST is INTEGER +*> The index of the first eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] ILAST +*> \verbatim +*> ILAST is INTEGER +*> The index of the last eigenvalue to be computed. +*> \endverbatim +*> +*> \param[in] RTOL +*> \verbatim +*> RTOL is REAL +*> Tolerance for the convergence of the bisection intervals. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET +*> through ILAST-OFFSET elements of these arrays are to be used. +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are +*> estimates of the eigenvalues of L D L^T indexed IFIRST through +*> ILAST. +*> On output, these estimates are refined. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are +*> the errors in the estimates of the corresponding elements in W. +*> On output, these errors are refined. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] SPDIAM +*> \verbatim +*> SPDIAM is REAL +*> The spectral diameter of T. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> Error flag. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRJ( N, D, E2, IFIRST, ILAST, + $ RTOL, OFFSET, W, WERR, WORK, IWORK, + $ PIVMIN, SPDIAM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IFIRST, ILAST, INFO, N, OFFSET + REAL PIVMIN, RTOL, SPDIAM +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E2( * ), W( * ), + $ WERR( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ HALF = 0.5E0 ) + INTEGER MAXITR +* .. +* .. Local Scalars .. + INTEGER CNT, I, I1, I2, II, ITER, J, K, NEXT, NINT, + $ OLNINT, P, PREV, SAVI1 + REAL DPLUS, FAC, LEFT, MID, RIGHT, S, TMP, WIDTH +* +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + MAXITR = INT( ( LOG( SPDIAM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 +* +* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. +* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while +* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) +* for an unconverged interval is set to the index of the next unconverged +* interval, and is -1 or 0 for a converged interval. Thus a linked +* list of unconverged intervals is set up. +* + + I1 = IFIRST + I2 = ILAST +* The number of unconverged intervals + NINT = 0 +* The last unconverged interval found + PREV = 0 + DO 75 I = I1, I2 + K = 2*I + II = I - OFFSET + LEFT = W( II ) - WERR( II ) + MID = W(II) + RIGHT = W( II ) + WERR( II ) + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + +* The following test prevents the test of converged intervals + IF( WIDTH.LT.RTOL*TMP ) THEN +* This interval has already converged and does not need refinement. +* (Note that the gaps might change through refining the +* eigenvalues, however, they can only get bigger.) +* Remove it from the list. + IWORK( K-1 ) = -1 +* Make sure that I1 always points to the first unconverged interval + IF((I.EQ.I1).AND.(I.LT.I2)) I1 = I + 1 + IF((PREV.GE.I1).AND.(I.LE.I2)) IWORK( 2*PREV-1 ) = I + 1 + ELSE +* unconverged interval found + PREV = I +* Make sure that [LEFT,RIGHT] contains the desired eigenvalue +* +* Do while( CNT(LEFT).GT.I-1 ) +* + FAC = ONE + 20 CONTINUE + CNT = 0 + S = LEFT + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 30 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 30 CONTINUE + IF( CNT.GT.I-1 ) THEN + LEFT = LEFT - WERR( II )*FAC + FAC = TWO*FAC + GO TO 20 + END IF +* +* Do while( CNT(RIGHT).LT.I ) +* + FAC = ONE + 50 CONTINUE + CNT = 0 + S = RIGHT + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 60 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 60 CONTINUE + IF( CNT.LT.I ) THEN + RIGHT = RIGHT + WERR( II )*FAC + FAC = TWO*FAC + GO TO 50 + END IF + NINT = NINT + 1 + IWORK( K-1 ) = I + 1 + IWORK( K ) = CNT + END IF + WORK( K-1 ) = LEFT + WORK( K ) = RIGHT + 75 CONTINUE + + + SAVI1 = I1 +* +* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals +* and while (ITER.LT.MAXITR) +* + ITER = 0 + 80 CONTINUE + PREV = I1 - 1 + I = I1 + OLNINT = NINT + + DO 100 P = 1, OLNINT + K = 2*I + II = I - OFFSET + NEXT = IWORK( K-1 ) + LEFT = WORK( K-1 ) + RIGHT = WORK( K ) + MID = HALF*( LEFT + RIGHT ) + +* semiwidth of interval + WIDTH = RIGHT - MID + TMP = MAX( ABS( LEFT ), ABS( RIGHT ) ) + + IF( ( WIDTH.LT.RTOL*TMP ) .OR. + $ (ITER.EQ.MAXITR) )THEN +* reduce number of unconverged intervals + NINT = NINT - 1 +* Mark interval as converged. + IWORK( K-1 ) = 0 + IF( I1.EQ.I ) THEN + I1 = NEXT + ELSE +* Prev holds the last unconverged interval previously examined + IF(PREV.GE.I1) IWORK( 2*PREV-1 ) = NEXT + END IF + I = NEXT + GO TO 100 + END IF + PREV = I +* +* Perform one bisection step +* + CNT = 0 + S = MID + DPLUS = D( 1 ) - S + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + DO 90 J = 2, N + DPLUS = D( J ) - S - E2( J-1 )/DPLUS + IF( DPLUS.LT.ZERO ) CNT = CNT + 1 + 90 CONTINUE + IF( CNT.LE.I-1 ) THEN + WORK( K-1 ) = MID + ELSE + WORK( K ) = MID + END IF + I = NEXT + + 100 CONTINUE + ITER = ITER + 1 +* do another loop if there are still unconverged intervals +* However, in the last iteration, all intervals are accepted +* since this is the best we can do. + IF( ( NINT.GT.0 ).AND.(ITER.LE.MAXITR) ) GO TO 80 +* +* +* At this point, all the intervals have converged + DO 110 I = SAVI1, ILAST + K = 2*I + II = I - OFFSET +* All intervals marked by '0' have been refined. + IF( IWORK( K-1 ).EQ.0 ) THEN + W( II ) = HALF*( WORK( K-1 )+WORK( K ) ) + WERR( II ) = WORK( K ) - W( II ) + END IF + 110 CONTINUE +* + + RETURN +* +* End of SLARRJ +* + END diff --git a/dspl/liblapack/SRC/slarrk.f b/dspl/liblapack/SRC/slarrk.f new file mode 100644 index 0000000..f39f074 --- /dev/null +++ b/dspl/liblapack/SRC/slarrk.f @@ -0,0 +1,256 @@ +*> \brief \b SLARRK computes one eigenvalue of a symmetric tridiagonal matrix T to suitable accuracy. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRK( N, IW, GL, GU, +* D, E2, PIVMIN, RELTOL, W, WERR, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, IW, N +* REAL PIVMIN, RELTOL, GL, GU, W, WERR +* .. +* .. Array Arguments .. +* REAL D( * ), E2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARRK computes one eigenvalue of a symmetric tridiagonal +*> matrix T to suitable accuracy. This is an auxiliary code to be +*> called from SSTEMR. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] IW +*> \verbatim +*> IW is INTEGER +*> The index of the eigenvalues to be returned. +*> \endverbatim +*> +*> \param[in] GL +*> \verbatim +*> GL is REAL +*> \endverbatim +*> +*> \param[in] GU +*> \verbatim +*> GU is REAL +*> An upper and a lower bound on the eigenvalue. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E2 +*> \verbatim +*> E2 is REAL array, dimension (N-1) +*> The (n-1) squared off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot allowed in the Sturm sequence for T. +*> \endverbatim +*> +*> \param[in] RELTOL +*> \verbatim +*> RELTOL is REAL +*> The minimum relative width of an interval. When an interval +*> is narrower than RELTOL times the larger (in +*> magnitude) endpoint, then it is considered to be +*> sufficiently small, i.e., converged. Note: this should +*> always be at least radix*machine epsilon. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL +*> \endverbatim +*> +*> \param[out] WERR +*> \verbatim +*> WERR is REAL +*> The error bound on the corresponding eigenvalue approximation +*> in W. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Eigenvalue converged +*> = -1: Eigenvalue did NOT converge +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> FUDGE REAL , default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARRK( N, IW, GL, GU, + $ D, E2, PIVMIN, RELTOL, W, WERR, INFO) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, IW, N + REAL PIVMIN, RELTOL, GL, GU, W, WERR +* .. +* .. Array Arguments .. + REAL D( * ), E2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL FUDGE, HALF, TWO, ZERO + PARAMETER ( HALF = 0.5E0, TWO = 2.0E0, + $ FUDGE = TWO, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IT, ITMAX, NEGCNT + REAL ATOLI, EPS, LEFT, MID, RIGHT, RTOLI, TMP1, + $ TMP2, TNORM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* +* Get machine constants + EPS = SLAMCH( 'P' ) + + TNORM = MAX( ABS( GL ), ABS( GU ) ) + RTOLI = RELTOL + ATOLI = FUDGE*TWO*PIVMIN + + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + + INFO = -1 + + LEFT = GL - FUDGE*TNORM*EPS*N - FUDGE*TWO*PIVMIN + RIGHT = GU + FUDGE*TNORM*EPS*N + FUDGE*TWO*PIVMIN + IT = 0 + + 10 CONTINUE +* +* Check if interval converged or maximum number of iterations reached +* + TMP1 = ABS( RIGHT - LEFT ) + TMP2 = MAX( ABS(RIGHT), ABS(LEFT) ) + IF( TMP1.LT.MAX( ATOLI, PIVMIN, RTOLI*TMP2 ) ) THEN + INFO = 0 + GOTO 30 + ENDIF + IF(IT.GT.ITMAX) + $ GOTO 30 + +* +* Count number of negative pivots for mid-point +* + IT = IT + 1 + MID = HALF * (LEFT + RIGHT) + NEGCNT = 0 + TMP1 = D( 1 ) - MID + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NEGCNT = NEGCNT + 1 +* + DO 20 I = 2, N + TMP1 = D( I ) - E2( I-1 ) / TMP1 - MID + IF( ABS( TMP1 ).LT.PIVMIN ) + $ TMP1 = -PIVMIN + IF( TMP1.LE.ZERO ) + $ NEGCNT = NEGCNT + 1 + 20 CONTINUE + + IF(NEGCNT.GE.IW) THEN + RIGHT = MID + ELSE + LEFT = MID + ENDIF + GOTO 10 + + 30 CONTINUE +* +* Converged or maximum number of iterations reached +* + W = HALF * (LEFT + RIGHT) + WERR = HALF * ABS( RIGHT - LEFT ) + + RETURN +* +* End of SLARRK +* + END diff --git a/dspl/liblapack/SRC/slarrr.f b/dspl/liblapack/SRC/slarrr.f new file mode 100644 index 0000000..3f77d38 --- /dev/null +++ b/dspl/liblapack/SRC/slarrr.f @@ -0,0 +1,211 @@ +*> \brief \b SLARRR performs tests to decide whether the symmetric tridiagonal matrix T warrants expensive computations which guarantee high relative accuracy in the eigenvalues. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRR( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Perform tests to decide whether the symmetric tridiagonal matrix T +*> warrants expensive computations which guarantee high relative accuracy +*> in the eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N > 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The N diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the first (N-1) entries contain the subdiagonal +*> elements of the tridiagonal matrix T; E(N) is set to ZERO. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> INFO = 0(default) : the matrix warrants computations preserving +*> relative accuracy. +*> INFO = 1 : the matrix warrants computations guaranteeing +*> only absolute accuracy. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRR( N, D, E, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER N, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, RELCOND + PARAMETER ( ZERO = 0.0E0, + $ RELCOND = 0.999E0 ) +* .. +* .. Local Scalars .. + INTEGER I + LOGICAL YESREL + REAL EPS, SAFMIN, SMLNUM, RMIN, TMP, TMP2, + $ OFFDIG, OFFDIG2 + +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + INFO = 0 + RETURN + END IF +* +* As a default, do NOT go for relative-accuracy preserving computations. + INFO = 1 + + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + RMIN = SQRT( SMLNUM ) + +* Tests for relative accuracy +* +* Test for scaled diagonal dominance +* Scale the diagonal entries to one and check whether the sum of the +* off-diagonals is less than one +* +* The sdd relative error bounds have a 1/(1- 2*x) factor in them, +* x = max(OFFDIG + OFFDIG2), so when x is close to 1/2, no relative +* accuracy is promised. In the notation of the code fragment below, +* 1/(1 - (OFFDIG + OFFDIG2)) is the condition number. +* We don't think it is worth going into "sdd mode" unless the relative +* condition number is reasonable, not 1/macheps. +* The threshold should be compatible with other thresholds used in the +* code. We set OFFDIG + OFFDIG2 <= .999 =: RELCOND, it corresponds +* to losing at most 3 decimal digits: 1 / (1 - (OFFDIG + OFFDIG2)) <= 1000 +* instead of the current OFFDIG + OFFDIG2 < 1 +* + YESREL = .TRUE. + OFFDIG = ZERO + TMP = SQRT(ABS(D(1))) + IF (TMP.LT.RMIN) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + DO 10 I = 2, N + TMP2 = SQRT(ABS(D(I))) + IF (TMP2.LT.RMIN) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + OFFDIG2 = ABS(E(I-1))/(TMP*TMP2) + IF(OFFDIG+OFFDIG2.GE.RELCOND) YESREL = .FALSE. + IF(.NOT.YESREL) GOTO 11 + TMP = TMP2 + OFFDIG = OFFDIG2 + 10 CONTINUE + 11 CONTINUE + + IF( YESREL ) THEN + INFO = 0 + RETURN + ELSE + ENDIF +* + +* +* *** MORE TO BE IMPLEMENTED *** +* + +* +* Test if the lower bidiagonal matrix L from T = L D L^T +* (zero shift facto) is well conditioned +* + +* +* Test if the upper bidiagonal matrix U from T = U D U^T +* (zero shift facto) is well conditioned. +* In this case, the matrix needs to be flipped and, at the end +* of the eigenvector computation, the flip needs to be applied +* to the computed eigenvectors (and the support) +* + +* + RETURN +* +* END OF SLARRR +* + END diff --git a/dspl/liblapack/SRC/slarrv.f b/dspl/liblapack/SRC/slarrv.f new file mode 100644 index 0000000..f9e3cf2 --- /dev/null +++ b/dspl/liblapack/SRC/slarrv.f @@ -0,0 +1,1045 @@ +*> \brief \b SLARRV computes the eigenvectors of the tridiagonal matrix T = L D LT given L, D and the eigenvalues of L D LT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARRV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARRV( N, VL, VU, D, L, PIVMIN, +* ISPLIT, M, DOL, DOU, MINRGP, +* RTOL1, RTOL2, W, WERR, WGAP, +* IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER DOL, DOU, INFO, LDZ, M, N +* REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), +* $ ISUPPZ( * ), IWORK( * ) +* REAL D( * ), GERS( * ), L( * ), W( * ), WERR( * ), +* $ WGAP( * ), WORK( * ) +* REAL Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARRV computes the eigenvectors of the tridiagonal matrix +*> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. +*> The input eigenvalues should have been computed by SLARRE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> Lower bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> Upper bound of the interval that contains the desired +*> eigenvalues. VL < VU. +*> Note: VU is currently not used by this implementation of SLARRV, VU is +*> passed to SLARRV because it could be used compute gaps on the right end +*> of the extremal eigenvalues. However, with not much initial accuracy in +*> LAMBDA and VU, the formula can lead to an overestimation of the right gap +*> and thus to inadequately early RQI 'convergence'. This is currently +*> prevented this by forcing a small right gap. And so it turns out that VU +*> is currently not used by this implementation of SLARRV. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the diagonal matrix D. +*> On exit, D may be overwritten. +*> \endverbatim +*> +*> \param[in,out] L +*> \verbatim +*> L is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the unit +*> bidiagonal matrix L are in elements 1 to N-1 of L +*> (if the matrix is not split.) At the end of each block +*> is stored the corresponding shift as given by SLARRE. +*> On exit, L is overwritten. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is REAL +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of input eigenvalues. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] DOL +*> \verbatim +*> DOL is INTEGER +*> \endverbatim +*> +*> \param[in] DOU +*> \verbatim +*> DOU is INTEGER +*> If the user wants to compute only selected eigenvectors from all +*> the eigenvalues supplied, he can specify an index range DOL:DOU. +*> Or else the setting DOL=1, DOU=M should be applied. +*> Note that DOL and DOU refer to the order in which the eigenvalues +*> are stored in W. +*> If the user wants to compute only selected eigenpairs, then +*> the columns DOL-1 to DOU+1 of the eigenvector space Z contain the +*> computed eigenvectors. All other columns of Z are set to zero. +*> \endverbatim +*> +*> \param[in] MINRGP +*> \verbatim +*> MINRGP is REAL +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is REAL +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is REAL +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements of W contain the APPROXIMATE eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block ( The output array +*> W from SLARRE is expected here ). Furthermore, they are with +*> respect to the shift of the corresponding root representation +*> for their block. On exit, W holds the eigenvalues of the +*> UNshifted matrix. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is REAL array, dimension (N) +*> The first M elements contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue in W +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is REAL array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[in] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is REAL array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should +*> be computed from the original UNshifted matrix. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M) ) +*> If INFO = 0, the first M columns of Z contain the +*> orthonormal eigenvectors of the matrix T +*> corresponding to the input eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The I-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*I-1 ) through +*> ISUPPZ( 2*I ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (12*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> > 0: A problem occurred in SLARRV. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in SLARRB when refining a child's eigenvalues. +*> =-2: Problem in SLARRF when computing the RRR of a child. +*> When a child is inside a tight cluster, it can be difficult +*> to find an RRR. A partial remedy from the user's point of +*> view is to make the parameter MINRGP smaller and recompile. +*> However, as the orthogonality of the computed vectors is +*> proportional to 1/MINRGP, the user should be aware that +*> he might be trading in precision when he decreases MINRGP. +*> =-3: Problem in SLARRB when refining a single eigenvalue +*> after the Rayleigh correction was rejected. +*> = 5: The Rayleigh Quotient Iteration failed to converge to +*> full accuracy in MAXITR steps. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SLARRV( N, VL, VU, D, L, PIVMIN, + $ ISPLIT, M, DOL, DOU, MINRGP, + $ RTOL1, RTOL2, W, WERR, WGAP, + $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER DOL, DOU, INFO, LDZ, M, N + REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), + $ ISUPPZ( * ), IWORK( * ) + REAL D( * ), GERS( * ), L( * ), W( * ), WERR( * ), + $ WGAP( * ), WORK( * ) + REAL Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 10 ) + REAL ZERO, ONE, TWO, THREE, FOUR, HALF + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, THREE = 3.0E0, + $ FOUR = 4.0E0, HALF = 0.5E0) +* .. +* .. Local Scalars .. + LOGICAL ESKIP, NEEDBS, STP2II, TRYRQC, USEDBS, USEDRQ + INTEGER DONE, I, IBEGIN, IDONE, IEND, II, IINDC1, + $ IINDC2, IINDR, IINDWK, IINFO, IM, IN, INDEIG, + $ INDLD, INDLLD, INDWRK, ISUPMN, ISUPMX, ITER, + $ ITMP1, J, JBLK, K, MINIWSIZE, MINWSIZE, NCLUS, + $ NDEPTH, NEGCNT, NEWCLS, NEWFST, NEWFTT, NEWLST, + $ NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN, OLDLST, + $ OLDNCL, P, PARITY, Q, WBEGIN, WEND, WINDEX, + $ WINDMN, WINDPL, ZFROM, ZTO, ZUSEDL, ZUSEDU, + $ ZUSEDW + REAL BSTRES, BSTW, EPS, FUDGE, GAP, GAPTOL, GL, GU, + $ LAMBDA, LEFT, LGAP, MINGMA, NRMINV, RESID, + $ RGAP, RIGHT, RQCORR, RQTOL, SAVGAP, SGNDEF, + $ SIGMA, SPDIAM, SSIGMA, TAU, TMP, TOL, ZTZ +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAR1V, SLARRB, SLARRF, SLASET, + $ SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, MAX, MIN +* .. +* .. Executable Statements .. +* .. + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* The first N entries of WORK are reserved for the eigenvalues + INDLD = N+1 + INDLLD= 2*N+1 + INDWRK= 3*N+1 + MINWSIZE = 12 * N + + DO 5 I= 1,MINWSIZE + WORK( I ) = ZERO + 5 CONTINUE + +* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the +* factorization used to compute the FP vector + IINDR = 0 +* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current +* layer and the one above. + IINDC1 = N + IINDC2 = 2*N + IINDWK = 3*N + 1 + + MINIWSIZE = 7 * N + DO 10 I= 1,MINIWSIZE + IWORK( I ) = 0 + 10 CONTINUE + + ZUSEDL = 1 + IF(DOL.GT.1) THEN +* Set lower bound for use of Z + ZUSEDL = DOL-1 + ENDIF + ZUSEDU = M + IF(DOU.LT.M) THEN +* Set lower bound for use of Z + ZUSEDU = DOU+1 + ENDIF +* The width of the part of Z that is used + ZUSEDW = ZUSEDU - ZUSEDL + 1 + + + CALL SLASET( 'Full', N, ZUSEDW, ZERO, ZERO, + $ Z(1,ZUSEDL), LDZ ) + + EPS = SLAMCH( 'Precision' ) + RQTOL = TWO * EPS +* +* Set expert flags for standard code. + TRYRQC = .TRUE. + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN + ELSE +* Only selected eigenpairs are computed. Since the other evalues +* are not refined by RQ iteration, bisection has to compute to full +* accuracy. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ENDIF + +* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the +* desired eigenvalues. The support of the nonzero eigenvector +* entries is contained in the interval IBEGIN:IEND. +* Remark that if k eigenpairs are desired, then the eigenvectors +* are stored in k contiguous columns of Z. + +* DONE is the number of eigenvectors already computed + DONE = 0 + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, IBLOCK( M ) + IEND = ISPLIT( JBLK ) + SIGMA = L( IEND ) +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. + WEND = WBEGIN - 1 + 15 CONTINUE + IF( WEND.LT.M ) THEN + IF( IBLOCK( WEND+1 ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 15 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 170 + ELSEIF( (WEND.LT.DOL).OR.(WBEGIN.GT.DOU) ) THEN + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + GO TO 170 + END IF + +* Find local spectral diameter of the block + GL = GERS( 2*IBEGIN-1 ) + GU = GERS( 2*IBEGIN ) + DO 20 I = IBEGIN+1 , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 20 CONTINUE + SPDIAM = GU - GL + +* OLDIEN is the last index of the previous block + OLDIEN = IBEGIN - 1 +* Calculate the size of the current block + IN = IEND - IBEGIN + 1 +* The number of eigenvalues in the current block + IM = WEND - WBEGIN + 1 + +* This is for a 1x1 block + IF( IBEGIN.EQ.IEND ) THEN + DONE = DONE+1 + Z( IBEGIN, WBEGIN ) = ONE + ISUPPZ( 2*WBEGIN-1 ) = IBEGIN + ISUPPZ( 2*WBEGIN ) = IBEGIN + W( WBEGIN ) = W( WBEGIN ) + SIGMA + WORK( WBEGIN ) = W( WBEGIN ) + IBEGIN = IEND + 1 + WBEGIN = WBEGIN + 1 + GO TO 170 + END IF + +* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) +* Note that these can be approximations, in this case, the corresp. +* entries of WERR give the size of the uncertainty interval. +* The eigenvalue approximations will be refined when necessary as +* high relative accuracy is required for the computation of the +* corresponding eigenvectors. + CALL SCOPY( IM, W( WBEGIN ), 1, + $ WORK( WBEGIN ), 1 ) + +* We store in W the eigenvalue approximations w.r.t. the original +* matrix T. + DO 30 I=1,IM + W(WBEGIN+I-1) = W(WBEGIN+I-1)+SIGMA + 30 CONTINUE + + +* NDEPTH is the current depth of the representation tree + NDEPTH = 0 +* PARITY is either 1 or 0 + PARITY = 1 +* NCLUS is the number of clusters for the next level of the +* representation tree, we start with NCLUS = 1 for the root + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IM + +* IDONE is the number of eigenvectors already computed in the current +* block + IDONE = 0 +* loop while( IDONE.LT.IM ) +* generate the representation tree for the current block and +* compute the eigenvectors + 40 CONTINUE + IF( IDONE.LT.IM ) THEN +* This is a crude protection against infinitely deep trees + IF( NDEPTH.GT.M ) THEN + INFO = -2 + RETURN + ENDIF +* breadth first processing of the current level of the representation +* tree: OLDNCL = number of clusters on current level + OLDNCL = NCLUS +* reset NCLUS to count the number of child clusters + NCLUS = 0 +* + PARITY = 1 - PARITY + IF( PARITY.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* Process the clusters on the current level + DO 150 I = 1, OLDNCL + J = OLDCLS + 2*I +* OLDFST, OLDLST = first, last index of current cluster. +* cluster indices start with 1 and are relative +* to WBEGIN when accessing W, WGAP, WERR, Z + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN +* Retrieve relatively robust representation (RRR) of cluster +* that has been computed at the previous level +* The RRR is stored in Z and overwritten once the eigenvectors +* have been computed or when the cluster is refined + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Get representation from location of the leftmost evalue +* of the cluster + J = WBEGIN + OLDFST - 1 + ELSE + IF(WBEGIN+OLDFST-1.LT.DOL) THEN +* Get representation from the left end of Z array + J = DOL - 1 + ELSEIF(WBEGIN+OLDFST-1.GT.DOU) THEN +* Get representation from the right end of Z array + J = DOU + ELSE + J = WBEGIN + OLDFST - 1 + ENDIF + ENDIF + CALL SCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 ) + CALL SCOPY( IN-1, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), + $ 1 ) + SIGMA = Z( IEND, J+1 ) + +* Set the corresponding entries in Z to zero + CALL SLASET( 'Full', IN, 2, ZERO, ZERO, + $ Z( IBEGIN, J), LDZ ) + END IF + +* Compute DL and DLL of current RRR + DO 50 J = IBEGIN, IEND-1 + TMP = D( J )*L( J ) + WORK( INDLD-1+J ) = TMP + WORK( INDLLD-1+J ) = TMP*L( J ) + 50 CONTINUE + + IF( NDEPTH.GT.0 ) THEN +* P and Q are index of the first and last eigenvalue to compute +* within the current block + P = INDEXW( WBEGIN-1+OLDFST ) + Q = INDEXW( WBEGIN-1+OLDLST ) +* Offset for the arrays WORK, WGAP and WERR, i.e., the P-OFFSET +* through the Q-OFFSET elements of these arrays are to be used. +* OFFSET = P-OLDFST + OFFSET = INDEXW( WBEGIN ) - 1 +* perform limited bisection (if necessary) to get approximate +* eigenvalues to the precision needed. + CALL SLARRB( IN, D( IBEGIN ), + $ WORK(INDLLD+IBEGIN-1), + $ P, Q, RTOL1, RTOL2, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN),WERR(WBEGIN), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ PIVMIN, SPDIAM, IN, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* We also recompute the extremal gaps. W holds all eigenvalues +* of the unshifted matrix and must be used for computation +* of WGAP, the entries of WORK might stem from RRRs with +* different shifts. The gaps from WBEGIN-1+OLDFST to +* WBEGIN-1+OLDLST are correctly computed in SLARRB. +* However, we only allow the gaps to become greater since +* this is what should happen when we decrease WERR + IF( OLDFST.GT.1) THEN + WGAP( WBEGIN+OLDFST-2 ) = + $ MAX(WGAP(WBEGIN+OLDFST-2), + $ W(WBEGIN+OLDFST-1)-WERR(WBEGIN+OLDFST-1) + $ - W(WBEGIN+OLDFST-2)-WERR(WBEGIN+OLDFST-2) ) + ENDIF + IF( WBEGIN + OLDLST -1 .LT. WEND ) THEN + WGAP( WBEGIN+OLDLST-1 ) = + $ MAX(WGAP(WBEGIN+OLDLST-1), + $ W(WBEGIN+OLDLST)-WERR(WBEGIN+OLDLST) + $ - W(WBEGIN+OLDLST-1)-WERR(WBEGIN+OLDLST-1) ) + ENDIF +* Each time the eigenvalues in WORK get refined, we store +* the newly found approximation with all shifts applied in W + DO 53 J=OLDFST,OLDLST + W(WBEGIN+J-1) = WORK(WBEGIN+J-1)+SIGMA + 53 CONTINUE + END IF + +* Process the current node. + NEWFST = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST ) THEN +* we are at the right end of the cluster, this is also the +* boundary of the child cluster + NEWLST = J + ELSE IF ( WGAP( WBEGIN + J -1).GE. + $ MINRGP* ABS( WORK(WBEGIN + J -1) ) ) THEN +* the right relative gap is big enough, the child cluster +* (NEWFST,..,NEWLST) is well separated from the following + NEWLST = J + ELSE +* inside a child cluster, the relative gap is not +* big enough. + GOTO 140 + END IF + +* Compute size of child cluster found + NEWSIZ = NEWLST - NEWFST + 1 + +* NEWFTT is the place in Z where the new RRR or the computed +* eigenvector is to be stored + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Store representation at location of the leftmost evalue +* of the cluster + NEWFTT = WBEGIN + NEWFST - 1 + ELSE + IF(WBEGIN+NEWFST-1.LT.DOL) THEN +* Store representation at the left end of Z array + NEWFTT = DOL - 1 + ELSEIF(WBEGIN+NEWFST-1.GT.DOU) THEN +* Store representation at the right end of Z array + NEWFTT = DOU + ELSE + NEWFTT = WBEGIN + NEWFST - 1 + ENDIF + ENDIF + + IF( NEWSIZ.GT.1) THEN +* +* Current child is not a singleton but a cluster. +* Compute and store new representation of child. +* +* +* Compute left and right cluster gap. +* +* LGAP and RGAP are not computed from WORK because +* the eigenvalue approximations may stem from RRRs +* different shifts. However, W hold all eigenvalues +* of the unshifted matrix. Still, the entries in WGAP +* have to be computed from WORK since the entries +* in W might be of the same order so that gaps are not +* exhibited correctly for very close eigenvalues. + IF( NEWFST.EQ.1 ) THEN + LGAP = MAX( ZERO, + $ W(WBEGIN)-WERR(WBEGIN) - VL ) + ELSE + LGAP = WGAP( WBEGIN+NEWFST-2 ) + ENDIF + RGAP = WGAP( WBEGIN+NEWLST-1 ) +* +* Compute left- and rightmost eigenvalue of child +* to high precision in order to shift as close +* as possible and obtain as large relative gaps +* as possible +* + DO 55 K =1,2 + IF(K.EQ.1) THEN + P = INDEXW( WBEGIN-1+NEWFST ) + ELSE + P = INDEXW( WBEGIN-1+NEWLST ) + ENDIF + OFFSET = INDEXW( WBEGIN ) - 1 + CALL SLARRB( IN, D(IBEGIN), + $ WORK( INDLLD+IBEGIN-1 ),P,P, + $ RQTOL, RQTOL, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ IN, IINFO ) + 55 CONTINUE +* + IF((WBEGIN+NEWLST-1.LT.DOL).OR. + $ (WBEGIN+NEWFST-1.GT.DOU)) THEN +* if the cluster contains no desired eigenvalues +* skip the computation of that branch of the rep. tree +* +* We could skip before the refinement of the extremal +* eigenvalues of the child, but then the representation +* tree could be different from the one when nothing is +* skipped. For this reason we skip at this place. + IDONE = IDONE + NEWLST - NEWFST + 1 + GOTO 139 + ENDIF +* +* Compute RRR of child cluster. +* Note that the new RRR is stored in Z +* +* SLARRF needs LWORK = 2*N + CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ NEWFST, NEWLST, WORK(WBEGIN), + $ WGAP(WBEGIN), WERR(WBEGIN), + $ SPDIAM, LGAP, RGAP, PIVMIN, TAU, + $ Z(IBEGIN, NEWFTT),Z(IBEGIN, NEWFTT+1), + $ WORK( INDWRK ), IINFO ) + IF( IINFO.EQ.0 ) THEN +* a new RRR for the cluster was found by SLARRF +* update shift and store it + SSIGMA = SIGMA + TAU + Z( IEND, NEWFTT+1 ) = SSIGMA +* WORK() are the midpoints and WERR() the semi-width +* Note that the entries in W are unchanged. + DO 116 K = NEWFST, NEWLST + FUDGE = + $ THREE*EPS*ABS(WORK(WBEGIN+K-1)) + WORK( WBEGIN + K - 1 ) = + $ WORK( WBEGIN + K - 1) - TAU + FUDGE = FUDGE + + $ FOUR*EPS*ABS(WORK(WBEGIN+K-1)) +* Fudge errors + WERR( WBEGIN + K - 1 ) = + $ WERR( WBEGIN + K - 1 ) + FUDGE +* Gaps are not fudged. Provided that WERR is small +* when eigenvalues are close, a zero gap indicates +* that a new representation is needed for resolving +* the cluster. A fudge could lead to a wrong decision +* of judging eigenvalues 'separated' which in +* reality are not. This could have a negative impact +* on the orthogonality of the computed eigenvectors. + 116 CONTINUE + + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFST + IWORK( K ) = NEWLST + ELSE + INFO = -2 + RETURN + ENDIF + ELSE +* +* Compute eigenvector of singleton +* + ITER = 0 +* + TOL = FOUR * LOG(REAL(IN)) * EPS +* + K = NEWFST + WINDEX = WBEGIN + K - 1 + WINDMN = MAX(WINDEX - 1,1) + WINDPL = MIN(WINDEX + 1,M) + LAMBDA = WORK( WINDEX ) + DONE = DONE + 1 +* Check if eigenvector computation is to be skipped + IF((WINDEX.LT.DOL).OR. + $ (WINDEX.GT.DOU)) THEN + ESKIP = .TRUE. + GOTO 125 + ELSE + ESKIP = .FALSE. + ENDIF + LEFT = WORK( WINDEX ) - WERR( WINDEX ) + RIGHT = WORK( WINDEX ) + WERR( WINDEX ) + INDEIG = INDEXW( WINDEX ) +* Note that since we compute the eigenpairs for a child, +* all eigenvalue approximations are w.r.t the same shift. +* In this case, the entries in WORK should be used for +* computing the gaps since they exhibit even very small +* differences in the eigenvalues, as opposed to the +* entries in W which might "look" the same. + + IF( K .EQ. 1) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VL, the formula +* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) +* can lead to an overestimation of the left gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small left gap. + LGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + LGAP = WGAP(WINDMN) + ENDIF + IF( K .EQ. IM) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VU, the formula +* can lead to an overestimation of the right gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small right gap. + RGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + RGAP = WGAP(WINDEX) + ENDIF + GAP = MIN( LGAP, RGAP ) + IF(( K .EQ. 1).OR.(K .EQ. IM)) THEN +* The eigenvector support can become wrong +* because significant entries could be cut off due to a +* large GAPTOL parameter in LAR1V. Prevent this. + GAPTOL = ZERO + ELSE + GAPTOL = GAP * EPS + ENDIF + ISUPMN = IN + ISUPMX = 1 +* Update WGAP so that it holds the minimum gap +* to the left or the right. This is crucial in the +* case where bisection is used to ensure that the +* eigenvalue is refined up to the required precision. +* The correct value is restored afterwards. + SAVGAP = WGAP(WINDEX) + WGAP(WINDEX) = GAP +* We want to use the Rayleigh Quotient Correction +* as often as possible since it converges quadratically +* when we are close enough to the desired eigenvalue. +* However, the Rayleigh Quotient can have the wrong sign +* and lead us away from the desired eigenvalue. In this +* case, the best we can do is to use bisection. + USEDBS = .FALSE. + USEDRQ = .FALSE. +* Bisection is initially turned off unless it is forced + NEEDBS = .NOT.TRYRQC + 120 CONTINUE +* Check if bisection should be used to refine eigenvalue + IF(NEEDBS) THEN +* Take the bisection as new iterate + USEDBS = .TRUE. + ITMP1 = IWORK( IINDR+WINDEX ) + OFFSET = INDEXW( WBEGIN ) - 1 + CALL SLARRB( IN, D(IBEGIN), + $ WORK(INDLLD+IBEGIN-1),INDEIG,INDEIG, + $ ZERO, TWO*EPS, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ ITMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -3 + RETURN + ENDIF + LAMBDA = WORK( WINDEX ) +* Reset twist index from inaccurate LAMBDA to +* force computation of true MINGMA + IWORK( IINDR+WINDEX ) = 0 + ENDIF +* Given LAMBDA, compute the eigenvector. + CALL SLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + IF(ITER .EQ. 0) THEN + BSTRES = RESID + BSTW = LAMBDA + ELSEIF(RESID.LT.BSTRES) THEN + BSTRES = RESID + BSTW = LAMBDA + ENDIF + ISUPMN = MIN(ISUPMN,ISUPPZ( 2*WINDEX-1 )) + ISUPMX = MAX(ISUPMX,ISUPPZ( 2*WINDEX )) + ITER = ITER + 1 + +* sin alpha <= |resid|/gap +* Note that both the residual and the gap are +* proportional to the matrix, so ||T|| doesn't play +* a role in the quotient + +* +* Convergence test for Rayleigh-Quotient iteration +* (omitted when Bisection has been used) +* + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ RQTOL*ABS( LAMBDA ) .AND. .NOT. USEDBS) + $ THEN +* We need to check that the RQCORR update doesn't +* move the eigenvalue away from the desired one and +* towards a neighbor. -> protection with bisection + IF(INDEIG.LE.NEGCNT) THEN +* The wanted eigenvalue lies to the left + SGNDEF = -ONE + ELSE +* The wanted eigenvalue lies to the right + SGNDEF = ONE + ENDIF +* We only use the RQCORR if it improves the +* the iterate reasonably. + IF( ( RQCORR*SGNDEF.GE.ZERO ) + $ .AND.( LAMBDA + RQCORR.LE. RIGHT) + $ .AND.( LAMBDA + RQCORR.GE. LEFT) + $ ) THEN + USEDRQ = .TRUE. +* Store new midpoint of bisection interval in WORK + IF(SGNDEF.EQ.ONE) THEN +* The current LAMBDA is on the left of the true +* eigenvalue + LEFT = LAMBDA +* We prefer to assume that the error estimate +* is correct. We could make the interval not +* as a bracket but to be modified if the RQCORR +* chooses to. In this case, the RIGHT side should +* be modified as follows: +* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) + ELSE +* The current LAMBDA is on the right of the true +* eigenvalue + RIGHT = LAMBDA +* See comment about assuming the error estimate is +* correct above. +* LEFT = MIN(LEFT, LAMBDA + RQCORR) + ENDIF + WORK( WINDEX ) = + $ HALF * (RIGHT + LEFT) +* Take RQCORR since it has the correct sign and +* improves the iterate reasonably + LAMBDA = LAMBDA + RQCORR +* Update width of error interval + WERR( WINDEX ) = + $ HALF * (RIGHT-LEFT) + ELSE + NEEDBS = .TRUE. + ENDIF + IF(RIGHT-LEFT.LT.RQTOL*ABS(LAMBDA)) THEN +* The eigenvalue is computed to bisection accuracy +* compute eigenvector and stop + USEDBS = .TRUE. + GOTO 120 + ELSEIF( ITER.LT.MAXITR ) THEN + GOTO 120 + ELSEIF( ITER.EQ.MAXITR ) THEN + NEEDBS = .TRUE. + GOTO 120 + ELSE + INFO = 5 + RETURN + END IF + ELSE + STP2II = .FALSE. + IF(USEDRQ .AND. USEDBS .AND. + $ BSTRES.LE.RESID) THEN + LAMBDA = BSTW + STP2II = .TRUE. + ENDIF + IF (STP2II) THEN +* improve error angle by second step + CALL SLAR1V( IN, 1, IN, LAMBDA, + $ D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), + $ ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + ENDIF + WORK( WINDEX ) = LAMBDA + END IF +* +* Compute FP-vector support w.r.t. whole matrix +* + ISUPPZ( 2*WINDEX-1 ) = ISUPPZ( 2*WINDEX-1 )+OLDIEN + ISUPPZ( 2*WINDEX ) = ISUPPZ( 2*WINDEX )+OLDIEN + ZFROM = ISUPPZ( 2*WINDEX-1 ) + ZTO = ISUPPZ( 2*WINDEX ) + ISUPMN = ISUPMN + OLDIEN + ISUPMX = ISUPMX + OLDIEN +* Ensure vector is ok if support in the RQI has changed + IF(ISUPMN.LT.ZFROM) THEN + DO 122 II = ISUPMN,ZFROM-1 + Z( II, WINDEX ) = ZERO + 122 CONTINUE + ENDIF + IF(ISUPMX.GT.ZTO) THEN + DO 123 II = ZTO+1,ISUPMX + Z( II, WINDEX ) = ZERO + 123 CONTINUE + ENDIF + CALL SSCAL( ZTO-ZFROM+1, NRMINV, + $ Z( ZFROM, WINDEX ), 1 ) + 125 CONTINUE +* Update W + W( WINDEX ) = LAMBDA+SIGMA +* Recompute the gaps on the left and right +* But only allow them to become larger and not +* smaller (which can only happen through "bad" +* cancellation and doesn't reflect the theory +* where the initial gaps are underestimated due +* to WERR being too crude.) + IF(.NOT.ESKIP) THEN + IF( K.GT.1) THEN + WGAP( WINDMN ) = MAX( WGAP(WINDMN), + $ W(WINDEX)-WERR(WINDEX) + $ - W(WINDMN)-WERR(WINDMN) ) + ENDIF + IF( WINDEX.LT.WEND ) THEN + WGAP( WINDEX ) = MAX( SAVGAP, + $ W( WINDPL )-WERR( WINDPL ) + $ - W( WINDEX )-WERR( WINDEX) ) + ENDIF + ENDIF + IDONE = IDONE + 1 + ENDIF +* here ends the code for the current child +* + 139 CONTINUE +* Proceed to any remaining child nodes + NEWFST = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* End of SLARRV +* + END diff --git a/dspl/liblapack/SRC/slarscl2.f b/dspl/liblapack/SRC/slarscl2.f new file mode 100644 index 0000000..5f57d33 --- /dev/null +++ b/dspl/liblapack/SRC/slarscl2.f @@ -0,0 +1,119 @@ +*> \brief \b SLARSCL2 performs reciprocal diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARSCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARSCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* REAL D( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> x <-- inv(D) * x +*> where the diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_sge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLARSCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + REAL D( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) / D( I ) + END DO + END DO + + RETURN + END diff --git a/dspl/liblapack/SRC/slartg.f b/dspl/liblapack/SRC/slartg.f new file mode 100644 index 0000000..784d4bc --- /dev/null +++ b/dspl/liblapack/SRC/slartg.f @@ -0,0 +1,204 @@ +*> \brief \b SLARTG generates a plane rotation with real cosine and real sine. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARTG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARTG( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* REAL CS, F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARTG generate a plane rotation so that +*> +*> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a slower, more accurate version of the BLAS1 routine SROTG, +*> with the following other differences: +*> F and G are unchanged on return. +*> If G=0, then CS=1 and SN=0. +*> If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any +*> floating point operations (saves work in SBDSQR when +*> there are zeros on the diagonal). +*> +*> If F exceeds G in magnitude, CS will be positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is REAL +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is REAL +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is REAL +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is REAL +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL +*> The nonzero component of the rotated vector. +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL CS, F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + REAL EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = ONE + SN = ZERO + R = F + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = ONE + R = G + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of SLARTG +* + END diff --git a/dspl/liblapack/SRC/slartgp.f b/dspl/liblapack/SRC/slartgp.f new file mode 100644 index 0000000..ad76c94 --- /dev/null +++ b/dspl/liblapack/SRC/slartgp.f @@ -0,0 +1,202 @@ +*> \brief \b SLARTGP generates a plane rotation so that the diagonal is nonnegative. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARTGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARTGP( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* REAL CS, F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARTGP generates a plane rotation so that +*> +*> [ CS SN ] . [ F ] = [ R ] where CS**2 + SN**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a slower, more accurate version of the Level 1 BLAS routine SROTG, +*> with the following other differences: +*> F and G are unchanged on return. +*> If G=0, then CS=(+/-)1 and SN=0. +*> If F=0 and (G .ne. 0), then CS=0 and SN=(+/-)1. +*> +*> The sign is chosen so that R >= 0. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is REAL +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is REAL +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is REAL +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is REAL +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is REAL +*> The nonzero component of the rotated vector. +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARTGP( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL CS, F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + REAL EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, SIGN, SQRT +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN + SAFMIN = SLAMCH( 'S' ) + EPS = SLAMCH( 'E' ) + SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( SLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* FIRST = .FALSE. +* END IF + IF( G.EQ.ZERO ) THEN + CS = SIGN( ONE, F ) + SN = ZERO + R = ABS( F ) + ELSE IF( F.EQ.ZERO ) THEN + CS = ZERO + SN = SIGN( ONE, G ) + R = ABS( G ) + ELSE + F1 = F + G1 = G + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) THEN + COUNT = 0 + 10 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMN2 + G1 = G1*SAFMN2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 20 I = 1, COUNT + R = R*SAFMX2 + 20 CONTINUE + ELSE IF( SCALE.LE.SAFMN2 ) THEN + COUNT = 0 + 30 CONTINUE + COUNT = COUNT + 1 + F1 = F1*SAFMX2 + G1 = G1*SAFMX2 + SCALE = MAX( ABS( F1 ), ABS( G1 ) ) + IF( SCALE.LE.SAFMN2 ) + $ GO TO 30 + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + DO 40 I = 1, COUNT + R = R*SAFMN2 + 40 CONTINUE + ELSE + R = SQRT( F1**2+G1**2 ) + CS = F1 / R + SN = G1 / R + END IF + IF( R.LT.ZERO ) THEN + CS = -CS + SN = -SN + R = -R + END IF + END IF + RETURN +* +* End of SLARTG +* + END diff --git a/dspl/liblapack/SRC/slartgs.f b/dspl/liblapack/SRC/slartgs.f new file mode 100644 index 0000000..59a2182 --- /dev/null +++ b/dspl/liblapack/SRC/slartgs.f @@ -0,0 +1,161 @@ +*> \brief \b SLARTGS generates a plane rotation designed to introduce a bulge in implicit QR iteration for the bidiagonal SVD problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARTGS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARTGS( X, Y, SIGMA, CS, SN ) +* +* .. Scalar Arguments .. +* REAL CS, SIGMA, SN, X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARTGS generates a plane rotation designed to introduce a bulge in +*> Golub-Reinsch-style implicit QR iteration for the bidiagonal SVD +*> problem. X and Y are the top-row entries, and SIGMA is the shift. +*> The computed CS and SN define a plane rotation satisfying +*> +*> [ CS SN ] . [ X^2 - SIGMA ] = [ R ], +*> [ -SN CS ] [ X * Y ] [ 0 ] +*> +*> with R nonnegative. If X^2 - SIGMA and X * Y are 0, then the +*> rotation is by PI/2. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is REAL +*> The (1,1) entry of an upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is REAL +*> The (1,2) entry of an upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is REAL +*> The shift. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is REAL +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is REAL +*> The sine of the rotation. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLARTGS( X, Y, SIGMA, CS, SN ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + REAL CS, SIGMA, SN, X, Y +* .. +* +* =================================================================== +* +* .. Parameters .. + REAL NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL R, S, THRESH, W, Z +* .. +* .. External Subroutines .. + EXTERNAL SLARTGP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. Executable Statements .. +* + THRESH = SLAMCH('E') +* +* Compute the first column of B**T*B - SIGMA^2*I, up to a scale +* factor. +* + IF( (SIGMA .EQ. ZERO .AND. ABS(X) .LT. THRESH) .OR. + $ (ABS(X) .EQ. SIGMA .AND. Y .EQ. ZERO) ) THEN + Z = ZERO + W = ZERO + ELSE IF( SIGMA .EQ. ZERO ) THEN + IF( X .GE. ZERO ) THEN + Z = X + W = Y + ELSE + Z = -X + W = -Y + END IF + ELSE IF( ABS(X) .LT. THRESH ) THEN + Z = -SIGMA*SIGMA + W = ZERO + ELSE + IF( X .GE. ZERO ) THEN + S = ONE + ELSE + S = NEGONE + END IF + Z = S * (ABS(X)-SIGMA) * (S+SIGMA/X) + W = S * Y + END IF +* +* Generate the rotation. +* CALL SLARTGP( Z, W, CS, SN, R ) might seem more natural; +* reordering the arguments ensures that if Z = 0 then the rotation +* is by PI/2. +* + CALL SLARTGP( W, Z, SN, CS, R ) +* + RETURN +* +* End SLARTGS +* + END + diff --git a/dspl/liblapack/SRC/slartv.f b/dspl/liblapack/SRC/slartv.f new file mode 100644 index 0000000..4cdb93d --- /dev/null +++ b/dspl/liblapack/SRC/slartv.f @@ -0,0 +1,147 @@ +*> \brief \b SLARTV applies a vector of plane rotations with real cosines and real sines to the elements of a pair of vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARTV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* REAL C( * ), S( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARTV applies a vector of real plane rotations to elements of the +*> real vectors x and y. For i = 1,2,...,n +*> +*> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +*> ( y(i) ) ( -s(i) c(i) ) ( y(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, +*> dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is REAL array, +*> dimension (1+(N-1)*INCY) +*> The vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + REAL C( * ), S( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + REAL XI, YI +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - S( IC )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of SLARTV +* + END diff --git a/dspl/liblapack/SRC/slaruv.f b/dspl/liblapack/SRC/slaruv.f new file mode 100644 index 0000000..ed84200 --- /dev/null +++ b/dspl/liblapack/SRC/slaruv.f @@ -0,0 +1,447 @@ +*> \brief \b SLARUV returns a vector of n random real numbers from a uniform distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARUV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARUV( ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* REAL X( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARUV returns a vector of n random real numbers from a uniform (0,1) +*> distribution (n <= 128). +*> +*> This is an auxiliary routine called by SLARNV and CLARNV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. N <= 128. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine uses a multiplicative congruential method with modulus +*> 2**48 and multiplier 33952834046453 (see G.S.Fishman, +*> 'Multiplicative congruential random number generators with modulus +*> 2**b: an exhaustive analysis for b = 32 and a partial analysis for +*> b = 48', Math. Comp. 189, pp 331-344, 1990). +*> +*> 48-bit integers are stored in 4 integer array elements with 12 bits +*> per element. Hence the routine is portable across machines with +*> integers of 32 bits or more. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARUV( ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + REAL X( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + INTEGER LV, IPW2 + REAL R + PARAMETER ( LV = 128, IPW2 = 4096, R = ONE / IPW2 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J +* .. +* .. Local Arrays .. + INTEGER MM( LV, 4 ) +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MOD, REAL +* .. +* .. Data statements .. + DATA ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508, + $ 2549 / + DATA ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754, + $ 1145 / + DATA ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766, + $ 2253 / + DATA ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572, + $ 305 / + DATA ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893, + $ 3301 / + DATA ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307, + $ 1065 / + DATA ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297, + $ 3133 / + DATA ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966, + $ 2913 / + DATA ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758, + $ 3285 / + DATA ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598, + $ 1241 / + DATA ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406, + $ 1197 / + DATA ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922, + $ 3729 / + DATA ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038, + $ 2501 / + DATA ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934, + $ 1673 / + DATA ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091, + $ 541 / + DATA ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451, + $ 2753 / + DATA ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580, + $ 949 / + DATA ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958, + $ 2361 / + DATA ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055, + $ 1165 / + DATA ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507, + $ 4081 / + DATA ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078, + $ 2725 / + DATA ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273, + $ 3305 / + DATA ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17, + $ 3069 / + DATA ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854, + $ 3617 / + DATA ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916, + $ 3733 / + DATA ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971, + $ 409 / + DATA ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889, + $ 2157 / + DATA ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831, + $ 1361 / + DATA ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621, + $ 3973 / + DATA ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541, + $ 1865 / + DATA ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893, + $ 2525 / + DATA ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736, + $ 1409 / + DATA ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992, + $ 3445 / + DATA ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787, + $ 3577 / + DATA ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125, + $ 77 / + DATA ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364, + $ 3761 / + DATA ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460, + $ 2149 / + DATA ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257, + $ 1449 / + DATA ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574, + $ 3005 / + DATA ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912, + $ 225 / + DATA ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216, + $ 85 / + DATA ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248, + $ 3673 / + DATA ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401, + $ 3117 / + DATA ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124, + $ 3089 / + DATA ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762, + $ 1349 / + DATA ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149, + $ 2057 / + DATA ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245, + $ 413 / + DATA ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166, + $ 65 / + DATA ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466, + $ 1845 / + DATA ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018, + $ 697 / + DATA ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399, + $ 3085 / + DATA ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190, + $ 3441 / + DATA ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879, + $ 1573 / + DATA ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153, + $ 3689 / + DATA ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320, + $ 2941 / + DATA ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18, + $ 929 / + DATA ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712, + $ 533 / + DATA ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159, + $ 2841 / + DATA ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318, + $ 4077 / + DATA ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091, + $ 721 / + DATA ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443, + $ 2821 / + DATA ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510, + $ 2249 / + DATA ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449, + $ 2397 / + DATA ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956, + $ 2817 / + DATA ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201, + $ 245 / + DATA ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137, + $ 1913 / + DATA ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399, + $ 1997 / + DATA ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321, + $ 3121 / + DATA ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271, + $ 997 / + DATA ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667, + $ 1833 / + DATA ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703, + $ 2877 / + DATA ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629, + $ 1633 / + DATA ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365, + $ 981 / + DATA ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431, + $ 2009 / + DATA ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113, + $ 941 / + DATA ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922, + $ 2449 / + DATA ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554, + $ 197 / + DATA ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184, + $ 2441 / + DATA ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099, + $ 285 / + DATA ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228, + $ 1473 / + DATA ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012, + $ 2741 / + DATA ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921, + $ 3129 / + DATA ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452, + $ 909 / + DATA ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901, + $ 2801 / + DATA ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572, + $ 421 / + DATA ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309, + $ 4073 / + DATA ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171, + $ 2813 / + DATA ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817, + $ 2337 / + DATA ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039, + $ 1429 / + DATA ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696, + $ 1177 / + DATA ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256, + $ 1901 / + DATA ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715, + $ 81 / + DATA ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077, + $ 1669 / + DATA ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019, + $ 2633 / + DATA ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497, + $ 2269 / + DATA ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101, + $ 129 / + DATA ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717, + $ 1141 / + DATA ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51, + $ 249 / + DATA ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981, + $ 3917 / + DATA ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978, + $ 2481 / + DATA ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813, + $ 3941 / + DATA ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881, + $ 2217 / + DATA ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76, + $ 2749 / + DATA ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846, + $ 3041 / + DATA ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694, + $ 1877 / + DATA ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682, + $ 345 / + DATA ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124, + $ 2861 / + DATA ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660, + $ 1809 / + DATA ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997, + $ 3141 / + DATA ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479, + $ 2825 / + DATA ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141, + $ 157 / + DATA ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886, + $ 2881 / + DATA ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514, + $ 3637 / + DATA ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301, + $ 1465 / + DATA ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604, + $ 2829 / + DATA ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888, + $ 2161 / + DATA ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836, + $ 3365 / + DATA ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990, + $ 361 / + DATA ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058, + $ 2685 / + DATA ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692, + $ 3745 / + DATA ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194, + $ 2325 / + DATA ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20, + $ 3609 / + DATA ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285, + $ 3821 / + DATA ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046, + $ 3537 / + DATA ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107, + $ 517 / + DATA ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508, + $ 3017 / + DATA ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525, + $ 2141 / + DATA ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801, + $ 1537 / +* .. +* .. Executable Statements .. +* + I1 = ISEED( 1 ) + I2 = ISEED( 2 ) + I3 = ISEED( 3 ) + I4 = ISEED( 4 ) +* + DO 10 I = 1, MIN( N, LV ) +* + 20 CONTINUE +* +* Multiply the seed by i-th power of the multiplier modulo 2**48 +* + IT4 = I4*MM( I, 4 ) + IT3 = IT4 / IPW2 + IT4 = IT4 - IPW2*IT3 + IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 ) + IT2 = IT3 / IPW2 + IT3 = IT3 - IPW2*IT2 + IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 ) + IT1 = IT2 / IPW2 + IT2 = IT2 - IPW2*IT1 + IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) + + $ I4*MM( I, 1 ) + IT1 = MOD( IT1, IPW2 ) +* +* Convert 48-bit integer to a real number in the interval (0,1) +* + X( I ) = R*( REAL( IT1 )+R*( REAL( IT2 )+R*( REAL( IT3 )+R* + $ REAL( IT4 ) ) ) ) +* + IF (X( I ).EQ.1.0) THEN +* If a real number has n bits of precision, and the first +* n bits of the 48-bit integer above happen to be all 1 (which +* will occur about once every 2**n calls), then X( I ) will +* be rounded to exactly 1.0. In IEEE single precision arithmetic, +* this will happen relatively often since n = 24. +* Since X( I ) is not supposed to return exactly 0.0 or 1.0, +* the statistically correct thing to do in this situation is +* simply to iterate again. +* N.B. the case X( I ) = 0.0 should not be possible. + I1 = I1 + 2 + I2 = I2 + 2 + I3 = I3 + 2 + I4 = I4 + 2 + GOTO 20 + END IF +* + 10 CONTINUE +* +* Return final value of seed +* + ISEED( 1 ) = IT1 + ISEED( 2 ) = IT2 + ISEED( 3 ) = IT3 + ISEED( 4 ) = IT4 + RETURN +* +* End of SLARUV +* + END diff --git a/dspl/liblapack/SRC/slarz.f b/dspl/liblapack/SRC/slarz.f new file mode 100644 index 0000000..9c9d831 --- /dev/null +++ b/dspl/liblapack/SRC/slarz.f @@ -0,0 +1,236 @@ +*> \brief \b SLARZ applies an elementary reflector (as returned by stzrzf) to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, L, LDC, M, N +* REAL TAU +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARZ applies a real elementary reflector H to a real M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**T +*> +*> where tau is a real scalar and v is a real vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> +*> H is a product of k elementary reflectors as returned by STZRZF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of entries of the vector V containing +*> the meaningful part of the Householder vectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (1+(L-1)*abs(INCV)) +*> The vector v in the representation of H as returned by +*> STZRZF. V is not used if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + REAL TAU +* .. +* .. Array Arguments .. + REAL C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SGER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = C( 1, 1:n ) +* + CALL SCOPY( N, C, LDC, WORK, 1 ) +* +* w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )**T * v( 1:l ) +* + CALL SGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V, + $ INCV, ONE, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL SAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )**T +* + CALL SGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL SCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL SGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL SAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )**T +* + CALL SGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of SLARZ +* + END diff --git a/dspl/liblapack/SRC/slarzb.f b/dspl/liblapack/SRC/slarzb.f new file mode 100644 index 0000000..3e5aab6 --- /dev/null +++ b/dspl/liblapack/SRC/slarzb.f @@ -0,0 +1,323 @@ +*> \brief \b SLARZB applies a block reflector or its transpose to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARZB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, +* LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARZB applies a real block reflector H or its transpose H**T to +*> a real distributed M-by-N C from the left or the right. +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**T from the Left +*> = 'R': apply H or H**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**T (Transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise (not supported yet) +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix V containing the +*> meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (LDV,NV). +*> If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**T*C or C*H or C*H**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, STRMM, XERBLA +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**T * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )**T +* + DO 10 J = 1, K + CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )**T * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', N, K, L, ONE, + $ C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T**T or W( 1:m, 1:k ) * T +* + CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )**T +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )**T * W( 1:n, 1:k )**T +* + IF( L.GT.0 ) + $ CALL SGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**T +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * T or W( 1:m, 1:k ) * T**T +* + CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * V( 1:k, 1:l ) +* + IF( L.GT.0 ) + $ CALL SGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) +* + END IF +* + RETURN +* +* End of SLARZB +* + END diff --git a/dspl/liblapack/SRC/slarzt.f b/dspl/liblapack/SRC/slarzt.f new file mode 100644 index 0000000..d6428d0 --- /dev/null +++ b/dspl/liblapack/SRC/slarzt.f @@ -0,0 +1,264 @@ +*> \brief \b SLARZT forms the triangular factor T of a block reflector H = I - vtvH. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLARZT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLARZT forms the triangular factor T of a real block reflector +*> H of order > n, which is defined as a product of k elementary +*> reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**T +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**T * T * V +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise (not supported yet) +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> ______V_____ +*> ( v1 v2 v3 ) / \ +*> ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +*> V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +*> ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +*> ( v1 v2 v3 ) +*> . . . +*> . . . +*> 1 . . +*> 1 . +*> 1 +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> ______V_____ +*> 1 / \ +*> . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +*> . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +*> . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +*> . . . +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> V = ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + REAL T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, STRMV, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)**T +* + CALL SGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of SLARZT +* + END diff --git a/dspl/liblapack/SRC/slas2.f b/dspl/liblapack/SRC/slas2.f new file mode 100644 index 0000000..906c8bd --- /dev/null +++ b/dspl/liblapack/SRC/slas2.f @@ -0,0 +1,183 @@ +*> \brief \b SLAS2 computes singular values of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAS2( F, G, H, SSMIN, SSMAX ) +* +* .. Scalar Arguments .. +* REAL F, G, H, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAS2 computes the singular values of the 2-by-2 matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, SSMIN is the smaller singular value and SSMAX is the +*> larger singular value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is REAL +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is REAL +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is REAL +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is REAL +*> The smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is REAL +*> The larger singular value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Barring over/underflow, all output quantities are correct to within +*> a few units in the last place (ulps), even in the absence of a guard +*> digit in addition/subtraction. +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows, or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLAS2( F, G, H, SSMIN, SSMAX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL F, G, H, SSMAX, SSMIN +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + REAL AS, AT, AU, C, FA, FHMN, FHMX, GA, HA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + FA = ABS( F ) + GA = ABS( G ) + HA = ABS( H ) + FHMN = MIN( FA, HA ) + FHMX = MAX( FA, HA ) + IF( FHMN.EQ.ZERO ) THEN + SSMIN = ZERO + IF( FHMX.EQ.ZERO ) THEN + SSMAX = GA + ELSE + SSMAX = MAX( FHMX, GA )*SQRT( ONE+ + $ ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 ) + END IF + ELSE + IF( GA.LT.FHMX ) THEN + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + AU = ( GA / FHMX )**2 + C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) ) + SSMIN = FHMN*C + SSMAX = FHMX / C + ELSE + AU = FHMX / GA + IF( AU.EQ.ZERO ) THEN +* +* Avoid possible harmful underflow if exponent range +* asymmetric (true SSMIN may not underflow even if +* AU underflows) +* + SSMIN = ( FHMN*FHMX ) / GA + SSMAX = GA + ELSE + AS = ONE + FHMN / FHMX + AT = ( FHMX-FHMN ) / FHMX + C = ONE / ( SQRT( ONE+( AS*AU )**2 )+ + $ SQRT( ONE+( AT*AU )**2 ) ) + SSMIN = ( FHMN*C )*AU + SSMIN = SSMIN + SSMIN + SSMAX = GA / ( C+C ) + END IF + END IF + END IF + RETURN +* +* End of SLAS2 +* + END diff --git a/dspl/liblapack/SRC/slascl.f b/dspl/liblapack/SRC/slascl.f new file mode 100644 index 0000000..d325714 --- /dev/null +++ b/dspl/liblapack/SRC/slascl.f @@ -0,0 +1,368 @@ +*> \brief \b SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* REAL CFROM, CTO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASCL multiplies the M by N real matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See SGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is REAL +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is REAL +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); +*> TYPE = 'B', LDA >= KL+1; +*> TYPE = 'Q', LDA >= KU+1; +*> TYPE = 'Z', LDA >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + REAL CFROM, CTO +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + REAL BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + REAL SLAMCH + EXTERNAL LSAME, SLAMCH, SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. SISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( SISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SLASCL +* + END diff --git a/dspl/liblapack/SRC/slascl2.f b/dspl/liblapack/SRC/slascl2.f new file mode 100644 index 0000000..dbe7783 --- /dev/null +++ b/dspl/liblapack/SRC/slascl2.f @@ -0,0 +1,119 @@ +*> \brief \b SLASCL2 performs diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* REAL D( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASCL2 performs a diagonal scaling on a vector: +*> x <-- D * x +*> where the diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_sge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + REAL D( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) * D( I ) + END DO + END DO + + RETURN + END diff --git a/dspl/liblapack/SRC/slasd0.f b/dspl/liblapack/SRC/slasd0.f new file mode 100644 index 0000000..3b87544 --- /dev/null +++ b/dspl/liblapack/SRC/slasd0.f @@ -0,0 +1,316 @@ +*> \brief \b SLASD0 computes the singular values of a real upper bidiagonal n-by-m matrix B with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using a divide and conquer approach, SLASD0 computes the singular +*> value decomposition (SVD) of a real upper bidiagonal N-by-M +*> matrix B with diagonal D and offdiagonal E, where M = N + SQRE. +*> The algorithm computes orthogonal matrices U and VT such that +*> B = U * S * VT. The singular values S are overwritten on D. +*> +*> A related subroutine, SLASDA, computes only the singular values, +*> and optionally, the singular vectors in compact form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, the row dimension of the upper bidiagonal matrix. +*> This is also the dimension of the main diagonal array D. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> Specifies the column dimension of the bidiagonal matrix. +*> = 0: The bidiagonal matrix has column dimension M = N; +*> = 1: The bidiagonal matrix has column dimension M = N+1; +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. +*> On exit D, if INFO = 0, contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (M-1) +*> Contains the subdiagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU, N) +*> On exit, U contains the left singular vectors. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> On entry, leading dimension of U. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT, M) +*> On exit, VT**T contains the right singular vectors. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> On entry, leading dimension of VT. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> On entry, maximum size of the subproblems at the +*> bottom of the computation tree. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*M**2+2*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK, + $ WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR, + $ NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SLASD1, SLASDQ, SLASDT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + END IF +* + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -6 + ELSE IF( LDVT.LT.M ) THEN + INFO = -8 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD0', -INFO ) + RETURN + END IF +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U, + $ LDU, WORK, INFO ) + RETURN + END IF +* +* Set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* For the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + NCC = 0 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NRP1 = NR + 1 + NLF = IC - NL + NRF = IC + 1 + SQREI = 1 + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ), + $ VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU, + $ U( NLF, NLF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + NLF - 2 + DO 10 J = 1, NL + IWORK( ITEMP+J ) = J + 10 CONTINUE + IF( I.EQ.ND ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + NRP1 = NR + SQREI + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ), + $ VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU, + $ U( NRF, NRF ), LDU, WORK, INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + ITEMP = IDXQ + IC + DO 20 J = 1, NR + IWORK( ITEMP+J-1 ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + DO 50 LVL = NLVL, 1, -1 +* +* Find the first node LF and last node LL on the +* current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + IDXQC = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + CALL SLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA, + $ U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT, + $ IWORK( IDXQC ), IWORK( IWK ), WORK, INFO ) +* +* Report the possible convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASD0 +* + END diff --git a/dspl/liblapack/SRC/slasd1.f b/dspl/liblapack/SRC/slasd1.f new file mode 100644 index 0000000..be93870 --- /dev/null +++ b/dspl/liblapack/SRC/slasd1.f @@ -0,0 +1,326 @@ +*> \brief \b SLASD1 computes the SVD of an upper bidiagonal matrix B of the specified size. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, +* IDXQ, IWORK, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDU, LDVT, NL, NR, SQRE +* REAL ALPHA, BETA +* .. +* .. Array Arguments .. +* INTEGER IDXQ( * ), IWORK( * ) +* REAL D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B, +*> where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0. +*> +*> A related subroutine SLASD7 handles the case in which the singular +*> values (and the singular vectors in factored form) are desired. +*> +*> SLASD1 computes the SVD as follows: +*> +*> ( D1(in) 0 0 0 ) +*> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) +*> ( 0 0 D2(in) 0 ) +*> +*> = U(out) * ( D(out) 0) * VT(out) +*> +*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M +*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +*> elsewhere; and the entry b is empty if SQRE = 0. +*> +*> The left singular vectors of the original matrix are stored in U, and +*> the transpose of the right singular vectors are stored in VT, and the +*> singular values are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple singular values or when there are zeros in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine SLASD2. +*> +*> The second stage consists of calculating the updated +*> singular values. This is done by finding the square roots of the +*> roots of the secular equation via the routine SLASD4 (as called +*> by SLASD3). This routine also calculates the singular vectors of +*> the current problem. +*> +*> The final stage consists of computing the updated singular vectors +*> directly using the updated singular values. The singular vectors +*> for the current problem are multiplied with the singular vectors +*> from the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (NL+NR+1). +*> N = NL+NR+1 +*> On entry D(1:NL,1:NL) contains the singular values of the +*> upper block; and D(NL+2:N) contains the singular values of +*> the lower block. On exit D(1:N) contains the singular values +*> of the modified matrix. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is REAL +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in,out] BETA +*> \verbatim +*> BETA is REAL +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is REAL array, dimension (LDU,N) +*> On entry U(1:NL, 1:NL) contains the left singular vectors of +*> the upper block; U(NL+2:N, NL+2:N) contains the left singular +*> vectors of the lower block. On exit U contains the left +*> singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,M) +*> where M = N + SQRE. +*> On entry VT(1:NL+1, 1:NL+1)**T contains the right singular +*> vectors of the upper block; VT(NL+2:M, NL+2:M)**T contains +*> the right singular vectors of the lower block. On exit +*> VT**T contains the right singular vectors of the +*> bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= max( 1, M ). +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension (N) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, i.e. +*> D( IDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*M**2+2*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT, + $ IDXQ, IWORK, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDU, LDVT, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER IDXQ( * ), IWORK( * ) + REAL D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2, + $ IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SLAMRG, SLASCL, SLASD2, SLASD3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD1', -INFO ) + RETURN + END IF +* + N = NL + NR + 1 + M = N + SQRE +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD2 and SLASD3. +* + LDU2 = N + LDVT2 = M +* + IZ = 1 + ISIGMA = IZ + M + IU2 = ISIGMA + N + IVT2 = IU2 + LDU2*N + IQ = IVT2 + LDVT2*M +* + IDX = 1 + IDXC = IDX + N + COLTYP = IDXC + N + IDXP = COLTYP + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Deflate singular values. +* + CALL SLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU, + $ VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2, + $ WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ), + $ IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO ) +* +* Solve Secular Equation and update singular vectors. +* + LDQ = K + CALL SLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ), + $ U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ), + $ LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ), + $ INFO ) +* +* Report the possible convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD1 +* + END diff --git a/dspl/liblapack/SRC/slasd2.f b/dspl/liblapack/SRC/slasd2.f new file mode 100644 index 0000000..5f414f1 --- /dev/null +++ b/dspl/liblapack/SRC/slasd2.f @@ -0,0 +1,634 @@ +*> \brief \b SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, +* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, +* IDXC, IDXQ, COLTYP, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE +* REAL ALPHA, BETA +* .. +* .. Array Arguments .. +* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), +* $ IDXQ( * ) +* REAL D( * ), DSIGMA( * ), U( LDU, * ), +* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD2 merges the two sets of singular values together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> singular values are close together or if there is a tiny entry in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> +*> SLASD2 is called from SLASD1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry D contains the singular values of the two submatrices +*> to be combined. On exit D contains the trailing (N-K) updated +*> singular values (those which were deflated) sorted into +*> increasing order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (N) +*> On exit Z contains the updating row vector in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is REAL array, dimension (LDU,N) +*> On entry U contains the left singular vectors of two +*> submatrices in the two square blocks with corners at (1,1), +*> (NL, NL), and (NL+2, NL+2), (N,N). +*> On exit U contains the trailing (N-K) updated left singular +*> vectors (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= N. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT,M) +*> On entry VT**T contains the right singular vectors of two +*> submatrices in the two square blocks with corners at (1,1), +*> (NL+1, NL+1), and (NL+2, NL+2), (M,M). +*> On exit VT**T contains the trailing (N-K) updated right singular +*> vectors (those which were deflated) in its last N-K columns. +*> In case SQRE =1, the last row of VT spans the right null +*> space. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= M. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is REAL array, dimension (N) +*> Contains a copy of the diagonal elements (K-1 singular values +*> and one zero) in the secular equation. +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is REAL array, dimension (LDU2,N) +*> Contains a copy of the first K-1 left singular vectors which +*> will be used by SLASD3 in a matrix multiply (SGEMM) to solve +*> for the new left singular vectors. U2 is arranged into four +*> blocks. The first block contains a column with 1 at NL+1 and +*> zero everywhere else; the second block contains non-zero +*> entries only at and above NL; the third contains non-zero +*> entries only below NL+1; and the fourth is dense. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2. LDU2 >= N. +*> \endverbatim +*> +*> \param[out] VT2 +*> \verbatim +*> VT2 is REAL array, dimension (LDVT2,N) +*> VT2**T contains a copy of the first K right singular vectors +*> which will be used by SLASD3 in a matrix multiply (SGEMM) to +*> solve for the new right singular vectors. VT2 is arranged into +*> three blocks. The first block contains a row that corresponds +*> to the special 0 diagonal element in SIGMA; the second block +*> contains non-zeros only at and before NL +1; the third block +*> contains non-zeros only at and after NL +2. +*> \endverbatim +*> +*> \param[in] LDVT2 +*> \verbatim +*> LDVT2 is INTEGER +*> The leading dimension of the array VT2. LDVT2 >= M. +*> \endverbatim +*> +*> \param[out] IDXP +*> \verbatim +*> IDXP is INTEGER array, dimension (N) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output IDXP(2:K) +*> points to the nondeflated D-values and IDXP(K+1:N) +*> points to the deflated singular values. +*> \endverbatim +*> +*> \param[out] IDX +*> \verbatim +*> IDX is INTEGER array, dimension (N) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[out] IDXC +*> \verbatim +*> IDXC is INTEGER array, dimension (N) +*> This will contain the permutation used to arrange the columns +*> of the deflated U matrix into three groups: the first group +*> contains non-zero entries only at and above NL, the second +*> contains non-zero entries only below NL+2, and the third is +*> dense. +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension (N) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that entries in +*> the first hlaf of this permutation must first be moved one +*> position backward; and entries in the second half +*> must first have NL+1 added to their values. +*> \endverbatim +*> +*> \param[out] COLTYP +*> \verbatim +*> COLTYP is INTEGER array, dimension (N) +*> As workspace, this will contain a label which will indicate +*> which of the following types a column in the U2 matrix or a +*> row in the VT2 matrix is: +*> 1 : non-zero in the upper half only +*> 2 : non-zero in the lower half only +*> 3 : dense +*> 4 : deflated +*> +*> On exit, it is an array of dimension 4, with COLTYP(I) being +*> the dimension of the I-th type columns. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT, + $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX, + $ IDXC, IDXQ, COLTYP, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ), + $ IDXQ( * ) + REAL D( * ), DSIGMA( * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Arrays .. + INTEGER CTOT( 4 ), PSM( 4 ) +* .. +* .. Local Scalars .. + INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, + $ N, NLP1, NLP2 + REAL C, EPS, HLFTOL, S, TAU, TOL, Z1 +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE +* + IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDVT.LT.M ) THEN + INFO = -12 + ELSE IF( LDU2.LT.N ) THEN + INFO = -15 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD2', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 +* +* Generate the first part of the vector Z; and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VT( NLP1, NLP1 ) + Z( 1 ) = Z1 + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VT( I, NLP1 ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VT( I, NLP2 ) + 20 CONTINUE +* +* Initialize some reference arrays. +* + DO 30 I = 2, NLP1 + COLTYP( I ) = 1 + 30 CONTINUE + DO 40 I = NLP2, N + COLTYP( I ) = 2 + 40 CONTINUE +* +* Sort the singular values into increasing order +* + DO 50 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 50 CONTINUE +* +* DSIGMA, IDXC, IDXC, and the first column of U2 +* are used as storage space. +* + DO 60 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + U2( I, 1 ) = Z( IDXQ( I ) ) + IDXC( I ) = COLTYP( IDXQ( I ) ) + 60 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 70 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = U2( IDXI, 1 ) + COLTYP( I ) = IDXC( IDXI ) + 70 CONTINUE +* +* Calculate the allowable deflation tolerance +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 80 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + IF( J.EQ.N ) + $ GO TO 120 + ELSE + JPREV = J + GO TO 90 + END IF + 80 CONTINUE + 90 CONTINUE + J = JPREV + 100 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 110 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + COLTYP( J ) = 4 + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + C = C / TAU + S = -S / TAU + Z( J ) = TAU + Z( JPREV ) = ZERO +* +* Apply back the Givens rotation to the left and right +* singular vector matrices. +* + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S ) + CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C, + $ S ) + IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN + COLTYP( J ) = 3 + END IF + COLTYP( JPREV ) = 4 + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 100 + 110 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + U2( K, 1 ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 120 CONTINUE +* +* Count up the total number of the various types of columns, then +* form a permutation which positions the four column types into +* four groups of uniform structure (although one or more of these +* groups may be empty). +* + DO 130 J = 1, 4 + CTOT( J ) = 0 + 130 CONTINUE + DO 140 J = 2, N + CT = COLTYP( J ) + CTOT( CT ) = CTOT( CT ) + 1 + 140 CONTINUE +* +* PSM(*) = Position in SubMatrix (of types 1 through 4) +* + PSM( 1 ) = 2 + PSM( 2 ) = 2 + CTOT( 1 ) + PSM( 3 ) = PSM( 2 ) + CTOT( 2 ) + PSM( 4 ) = PSM( 3 ) + CTOT( 3 ) +* +* Fill out the IDXC array so that the permutation which it induces +* will place all type-1 columns first, all type-2 columns next, +* then all type-3's, and finally all type-4's, starting from the +* second column. This applies similarly to the rows of VT. +* + DO 150 J = 2, N + JP = IDXP( J ) + CT = COLTYP( JP ) + IDXC( PSM( CT ) ) = J + PSM( CT ) = PSM( CT ) + 1 + 150 CONTINUE +* +* Sort the singular values and corresponding singular vectors into +* DSIGMA, U2, and VT2 respectively. The singular values/vectors +* which were not deflated go into the first K slots of DSIGMA, U2, +* and VT2 respectively, while those which were deflated go into the +* last N - K slots, except that the first column/row will be treated +* separately. +* + DO 160 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 ) + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 ) + CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 ) + 160 CONTINUE +* +* Determine DSIGMA(1), DSIGMA(2) and Z(1) +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = Z( M ) / Z( 1 ) + END IF + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Move the rest of the updating row to Z. +* + CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 ) +* +* Determine the first column of U2, the first row of VT2 and the +* last row of VT. +* + CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 ) + U2( NLP1, 1 ) = ONE + IF( M.GT.N ) THEN + DO 170 I = 1, NLP1 + VT( M, I ) = -S*VT( NLP1, I ) + VT2( 1, I ) = C*VT( NLP1, I ) + 170 CONTINUE + DO 180 I = NLP2, M + VT2( 1, I ) = S*VT( M, I ) + VT( M, I ) = C*VT( M, I ) + 180 CONTINUE + ELSE + CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 ) + END IF + IF( M.GT.N ) THEN + CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 ) + END IF +* +* The deflated singular values and their corresponding vectors go +* into the back of D, U, and V respectively. +* + IF( N.GT.K ) THEN + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) + CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ), + $ LDU ) + CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ), + $ LDVT ) + END IF +* +* Copy CTOT into COLTYP for referencing in SLASD3. +* + DO 190 J = 1, 4 + COLTYP( J ) = CTOT( J ) + 190 CONTINUE +* + RETURN +* +* End of SLASD2 +* + END diff --git a/dspl/liblapack/SRC/slasd3.f b/dspl/liblapack/SRC/slasd3.f new file mode 100644 index 0000000..add92f3 --- /dev/null +++ b/dspl/liblapack/SRC/slasd3.f @@ -0,0 +1,469 @@ +*> \brief \b SLASD3 finds all square roots of the roots of the secular equation, as defined by the values in D and Z, and then updates the singular vectors by matrix multiplication. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, +* LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, +* $ SQRE +* .. +* .. Array Arguments .. +* INTEGER CTOT( * ), IDXC( * ) +* REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), +* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD3 finds all the square roots of the roots of the secular +*> equation, as defined by the values in D and Z. It makes the +*> appropriate calls to SLASD4 and then updates the singular +*> vectors by matrix multiplication. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> +*> SLASD3 is called from SLASD1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The size of the secular equation, 1 =< K = < N. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension(K) +*> On exit the square roots of the roots of the secular equation, +*> in ascending order. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,K) +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= K. +*> \endverbatim +*> +*> \param[in,out] DSIGMA +*> \verbatim +*> DSIGMA is REAL array, dimension(K) +*> The first K elements of this array contain the old roots +*> of the deflated updating problem. These are the poles +*> of the secular equation. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, dimension (LDU, N) +*> The last N - K columns of this matrix contain the deflated +*> left singular vectors. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= N. +*> \endverbatim +*> +*> \param[in] U2 +*> \verbatim +*> U2 is REAL array, dimension (LDU2, N) +*> The first K columns of this matrix contain the non-deflated +*> left singular vectors for the split problem. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2. LDU2 >= N. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT, M) +*> The last M - K columns of VT**T contain the deflated +*> right singular vectors. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= N. +*> \endverbatim +*> +*> \param[in,out] VT2 +*> \verbatim +*> VT2 is REAL array, dimension (LDVT2, N) +*> The first K columns of VT2**T contain the non-deflated +*> right singular vectors for the split problem. +*> \endverbatim +*> +*> \param[in] LDVT2 +*> \verbatim +*> LDVT2 is INTEGER +*> The leading dimension of the array VT2. LDVT2 >= N. +*> \endverbatim +*> +*> \param[in] IDXC +*> \verbatim +*> IDXC is INTEGER array, dimension (N) +*> The permutation used to arrange the columns of U (and rows of +*> VT) into three groups: the first group contains non-zero +*> entries only at and above (or before) NL +1; the second +*> contains non-zero entries only at and below (or after) NL+2; +*> and the third is dense. The first column of U and the row of +*> VT are treated separately, however. +*> +*> The rows of the singular vectors found by SLASD4 +*> must be likewise permuted before the matrix multiplies can +*> take place. +*> \endverbatim +*> +*> \param[in] CTOT +*> \verbatim +*> CTOT is INTEGER array, dimension (4) +*> A count of the total number of the various types of columns +*> in U (or rows in VT), as described in IDXC. The fourth column +*> type is any column which has been deflated. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (K) +*> The first K elements of this array contain the components +*> of the deflation-adjusted updating row vector. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, + $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, + $ SQRE +* .. +* .. Array Arguments .. + INTEGER CTOT( * ), IDXC( * ) + REAL D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), + $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0, + $ NEGONE = -1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 + REAL RHO, TEMP +* .. +* .. External Functions .. + REAL SLAMC3, SNRM2 + EXTERNAL SLAMC3, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( NL.LT.1 ) THEN + INFO = -1 + ELSE IF( NR.LT.1 ) THEN + INFO = -2 + ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN + INFO = -3 + END IF +* + N = NL + NR + 1 + M = N + SQRE + NLP1 = NL + 1 + NLP2 = NL + 2 +* + IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.K ) THEN + INFO = -7 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDU2.LT.N ) THEN + INFO = -12 + ELSE IF( LDVT.LT.M ) THEN + INFO = -14 + ELSE IF( LDVT2.LT.M ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) + IF( Z( 1 ).GT.ZERO ) THEN + CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) + ELSE + DO 10 I = 1, N + U( I, 1 ) = -U2( I, 1 ) + 10 CONTINUE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DSIGMA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 20 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 20 CONTINUE +* +* Keep a copy of Z. +* + CALL SCOPY( K, Z, 1, Q, 1 ) +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Find the new singular values. +* + DO 30 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), + $ VT( 1, J ), INFO ) +* +* If the zero finder fails, report the convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + 30 CONTINUE +* +* Compute updated Z. +* + DO 60 I = 1, K + Z( I ) = U( I, K )*VT( I, K ) + DO 40 J = 1, I - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J ) ) / + $ ( DSIGMA( I )+DSIGMA( J ) ) ) + 40 CONTINUE + DO 50 J = I, K - 1 + Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / + $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / + $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) + 50 CONTINUE + Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) + 60 CONTINUE +* +* Compute left singular vectors of the modified diagonal matrix, +* and store related information for the right singular vectors. +* + DO 90 I = 1, K + VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) + U( 1, I ) = NEGONE + DO 70 J = 2, K + VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) + U( J, I ) = DSIGMA( J )*VT( J, I ) + 70 CONTINUE + TEMP = SNRM2( K, U( 1, I ), 1 ) + Q( 1, I ) = U( 1, I ) / TEMP + DO 80 J = 2, K + JC = IDXC( J ) + Q( J, I ) = U( JC, I ) / TEMP + 80 CONTINUE + 90 CONTINUE +* +* Update the left singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, + $ LDU ) + GO TO 100 + END IF + IF( CTOT( 1 ).GT.0 ) THEN + CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, + $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) + END IF + ELSE IF( CTOT( 3 ).GT.0 ) THEN + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), + $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) + ELSE + CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU ) + END IF + CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) + KTEMP = 2 + CTOT( 1 ) + CTEMP = CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, + $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) +* +* Generate the right singular vectors. +* + 100 CONTINUE + DO 120 I = 1, K + TEMP = SNRM2( K, VT( 1, I ), 1 ) + Q( I, 1 ) = VT( 1, I ) / TEMP + DO 110 J = 2, K + JC = IDXC( J ) + Q( I, J ) = VT( JC, I ) / TEMP + 110 CONTINUE + 120 CONTINUE +* +* Update the right singular vector matrix. +* + IF( K.EQ.2 ) THEN + CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, + $ VT, LDVT ) + RETURN + END IF + KTEMP = 1 + CTOT( 1 ) + CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, + $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) + KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) + IF( KTEMP.LE.LDVT2 ) + $ CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), + $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), + $ LDVT ) +* + KTEMP = CTOT( 1 ) + 1 + NRP1 = NR + SQRE + IF( KTEMP.GT.1 ) THEN + DO 130 I = 1, K + Q( I, KTEMP ) = Q( I, 1 ) + 130 CONTINUE + DO 140 I = NLP2, M + VT2( KTEMP, I ) = VT2( 1, I ) + 140 CONTINUE + END IF + CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) + CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, + $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) +* + RETURN +* +* End of SLASD3 +* + END diff --git a/dspl/liblapack/SRC/slasd4.f b/dspl/liblapack/SRC/slasd4.f new file mode 100644 index 0000000..31d5f7f --- /dev/null +++ b/dspl/liblapack/SRC/slasd4.f @@ -0,0 +1,1061 @@ +*> \brief \b SLASD4 computes the square root of the i-th updated eigenvalue of a positive symmetric rank-one modification to a positive diagonal matrix. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER I, INFO, N +* REAL RHO, SIGMA +* .. +* .. Array Arguments .. +* REAL D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the square root of the I-th updated +*> eigenvalue of a positive symmetric rank-one modification to +*> a positive diagonal matrix whose entries are given as the squares +*> of the corresponding entries in the array d, and that +*> +*> 0 <= D(i) < D(j) for i < j +*> +*> and that RHO > 0. This is arranged by the calling routine, and is +*> no loss in generality. The rank-one modified system is thus +*> +*> diag( D ) * diag( D ) + RHO * Z * Z_transpose. +*> +*> where we assume the Euclidean norm of Z is 1. +*> +*> The method consists of approximating the rational functions in the +*> secular equation by simpler interpolating rational functions. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of all arrays. +*> \endverbatim +*> +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. 1 <= I <= N. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension ( N ) +*> The original eigenvalues. It is assumed that they are in +*> order, 0 <= D(I) < D(J) for I < J. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( N ) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is REAL array, dimension ( N ) +*> If N .ne. 1, DELTA contains (D(j) - sigma_I) in its j-th +*> component. If N = 1, then DELTA(1) = 1. The vector DELTA +*> contains the information necessary to construct the +*> (singular) eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is REAL +*> The computed sigma_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ( N ) +*> If N .ne. 1, WORK contains (D(j) + sigma_I) in its j-th +*> component. If N = 1, then WORK( 1 ) = 1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = 1, the updating process failed. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> Logical variable ORGATI (origin-at-i?) is used for distinguishing +*> whether D(i) or D(i+1) is treated as the origin. +*> +*> ORGATI = .true. origin at i +*> ORGATI = .false. origin at i+1 +*> +*> Logical variable SWTCH3 (switch-for-3-poles?) is for noting +*> if we are working with THREE poles! +*> +*> MAXIT is the maximum number of iterations allowed for each +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I, INFO, N + REAL RHO, SIGMA +* .. +* .. Array Arguments .. + REAL D( * ), DELTA( * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 400 ) + REAL ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0, EIGHT = 8.0E+0, + $ TEN = 10.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ORGATI, SWTCH, SWTCH3, GEOMAVG + INTEGER II, IIM1, IIP1, IP1, ITER, J, NITER + REAL A, B, C, DELSQ, DELSQ2, SQ2, DPHI, DPSI, DTIIM, + $ DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS, + $ ERRETM, ETA, PHI, PREW, PSI, RHOINV, SGLB, + $ SGUB, TAU, TAU2, TEMP, TEMP1, TEMP2, W +* .. +* .. Local Arrays .. + REAL DD( 3 ), ZZ( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SLAED6, SLASD5 +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Since this routine is called in an inner loop, we do no argument +* checking. +* +* Quick return for N=1 and 2. +* + INFO = 0 + IF( N.EQ.1 ) THEN +* +* Presumably, I=1 upon entry +* + SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) ) + DELTA( 1 ) = ONE + WORK( 1 ) = ONE + RETURN + END IF + IF( N.EQ.2 ) THEN + CALL SLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK ) + RETURN + END IF +* +* Compute machine epsilon +* + EPS = SLAMCH( 'Epsilon' ) + RHOINV = ONE / RHO + TAU2= ZERO +* +* The case I = N +* + IF( I.EQ.N ) THEN +* +* Initialize some basic variables +* + II = N - 1 + NITER = 1 +* +* Calculate initial guess +* + TEMP = RHO / TWO +* +* If ||Z||_2 is not one, then TEMP should be set to +* RHO * ||Z||_2^2 / TWO +* + TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) ) + DO 10 J = 1, N + WORK( J ) = D( J ) + D( N ) + TEMP1 + DELTA( J ) = ( D( J )-D( N ) ) - TEMP1 + 10 CONTINUE +* + PSI = ZERO + DO 20 J = 1, N - 2 + PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) ) + 20 CONTINUE +* + C = RHOINV + PSI + W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) + + $ Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) ) +* + IF( W.LE.ZERO ) THEN + TEMP1 = SQRT( D( N )*D( N )+RHO ) + TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )* + $ ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) + + $ Z( N )*Z( N ) / RHO +* +* The following TAU2 is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( C.LE.TEMP ) THEN + TAU = RHO + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) + END IF +* +* It can be proved that +* D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU2 <= D(N)^2+RHO +* + ELSE + DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) ) + A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N ) + B = Z( N )*Z( N )*DELSQ +* +* The following TAU2 is to approximate +* SIGMA_n^2 - D( N )*D( N ) +* + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( SQRT( A*A+FOUR*B*C )-A ) + ELSE + TAU2 = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C ) + END IF + TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) + +* +* It can be proved that +* D(N)^2 < D(N)^2+TAU2 < SIGMA(N)^2 < D(N)^2+RHO/2 +* + END IF +* +* The following TAU is to approximate SIGMA_n - D( N ) +* +* TAU = TAU2 / ( D( N )+SQRT( D( N )*D( N )+TAU2 ) ) +* + SIGMA = D( N ) + TAU + DO 30 J = 1, N + DELTA( J ) = ( D( J )-D( N ) ) - TAU + WORK( J ) = D( J ) + D( N ) + TAU + 30 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 40 J = 1, II + TEMP = Z( J ) / ( DELTA( J )*WORK( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 40 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TEMP = Z( N ) / ( DELTA( N )*WORK( N ) ) + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI ) + B = DTNSQ*DTNSQ1*W + IF( C.LT.ZERO ) + $ C = ABS( C ) + IF( C.EQ.ZERO ) THEN + ETA = RHO - SIGMA*SIGMA + ELSE IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.GT.RHO ) + $ ETA = RHO + DTNSQ +* + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 50 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 50 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 60 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 60 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TAU2 = WORK( N )*DELTA( N ) + TEMP = Z( N ) / TAU2 + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI +* +* Main loop to update the values of the array DELTA +* + ITER = NITER + 1 +* + DO 90 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* +* Calculate the new step +* + DTNSQ1 = WORK( N-1 )*DELTA( N-1 ) + DTNSQ = WORK( N )*DELTA( N ) + C = W - DTNSQ1*DPSI - DTNSQ*DPHI + A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI ) + B = DTNSQ1*DTNSQ*W + IF( A.GE.ZERO ) THEN + ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GT.ZERO ) + $ ETA = -W / ( DPSI+DPHI ) + TEMP = ETA - DTNSQ + IF( TEMP.LE.ZERO ) + $ ETA = ETA / TWO +* + ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) ) + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 70 J = 1, N + DELTA( J ) = DELTA( J ) - ETA + WORK( J ) = WORK( J ) + ETA + 70 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 80 J = 1, II + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 80 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + TAU2 = WORK( N )*DELTA( N ) + TEMP = Z( N ) / TAU2 + PHI = Z( N )*TEMP + DPHI = TEMP*TEMP + ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +* $ + ABS( TAU2 )*( DPSI+DPHI ) +* + W = RHOINV + PHI + PSI + 90 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 + GO TO 240 +* +* End for the case I = N +* + ELSE +* +* The case for I < N +* + NITER = 1 + IP1 = I + 1 +* +* Calculate initial guess +* + DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) ) + DELSQ2 = DELSQ / TWO + SQ2=SQRT( ( D( I )*D( I )+D( IP1 )*D( IP1 ) ) / TWO ) + TEMP = DELSQ2 / ( D( I )+SQ2 ) + DO 100 J = 1, N + WORK( J ) = D( J ) + D( I ) + TEMP + DELTA( J ) = ( D( J )-D( I ) ) - TEMP + 100 CONTINUE +* + PSI = ZERO + DO 110 J = 1, I - 1 + PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 110 CONTINUE +* + PHI = ZERO + DO 120 J = N, I + 2, -1 + PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) ) + 120 CONTINUE + C = RHOINV + PSI + PHI + W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) + + $ Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) ) +* + GEOMAVG = .FALSE. + IF( W.GT.ZERO ) THEN +* +* d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2 +* +* We choose d(i) as origin. +* + ORGATI = .TRUE. + II = I + SGLB = ZERO + SGUB = DELSQ2 / ( D( I )+SQ2 ) + A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 ) + B = Z( I )*Z( I )*DELSQ + IF( A.GT.ZERO ) THEN + TAU2 = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + ELSE + TAU2 = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU2 now is an estimation of SIGMA^2 - D( I )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( I ). +* + TAU = TAU2 / ( D( I )+SQRT( D( I )*D( I )+TAU2 ) ) + TEMP = SQRT(EPS) + IF( (D(I).LE.TEMP*D(IP1)).AND.(ABS(Z(I)).LE.TEMP) + $ .AND.(D(I).GT.ZERO) ) THEN + TAU = MIN( TEN*D(I), SGUB ) + GEOMAVG = .TRUE. + END IF + ELSE +* +* (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2 +* +* We choose d(i+1) as origin. +* + ORGATI = .FALSE. + II = IP1 + SGLB = -DELSQ2 / ( D( II )+SQ2 ) + SGUB = ZERO + A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 ) + B = Z( IP1 )*Z( IP1 )*DELSQ + IF( A.LT.ZERO ) THEN + TAU2 = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) ) + ELSE + TAU2 = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C ) + END IF +* +* TAU2 now is an estimation of SIGMA^2 - D( IP1 )^2. The +* following, however, is the corresponding estimation of +* SIGMA - D( IP1 ). +* + TAU = TAU2 / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+ + $ TAU2 ) ) ) + END IF +* + SIGMA = D( II ) + TAU + DO 130 J = 1, N + WORK( J ) = D( J ) + D( II ) + TAU + DELTA( J ) = ( D( J )-D( II ) ) - TAU + 130 CONTINUE + IIM1 = II - 1 + IIP1 = II + 1 +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 150 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 150 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 160 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 160 CONTINUE +* + W = RHOINV + PHI + PSI +* +* W is the value of the secular function with +* its ii-th element removed. +* + SWTCH3 = .FALSE. + IF( ORGATI ) THEN + IF( W.LT.ZERO ) + $ SWTCH3 = .TRUE. + ELSE + IF( W.GT.ZERO ) + $ SWTCH3 = .TRUE. + END IF + IF( II.EQ.1 .OR. II.EQ.N ) + $ SWTCH3 = .FALSE. +* + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = W + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SGLB = MAX( SGLB, TAU ) + ELSE + SGUB = MIN( SGUB, TAU ) + END IF +* +* Calculate the new step +* + NITER = NITER + 1 + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI ) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIP*( DPSI+DPHI ) ) - + $ ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + C = ( TEMP - DTIIM*( DPSI+DPHI ) ) - + $ ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + ZZ( 2 ) = Z( II )*Z( II ) + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) +* + IF( INFO.NE.0 ) THEN +* +* If INFO is not 0, i.e., SLAED6 failed, switch back +* to 2 pole interpolation. +* + SWTCH3 = .FALSE. + INFO = 0 + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI) + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + END IF + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW +* + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) + TEMP = TAU + ETA + IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SGUB-TAU ) / TWO + ELSE + ETA = ( SGLB-TAU ) / TWO + END IF + IF( GEOMAVG ) THEN + IF( W .LT. ZERO ) THEN + IF( TAU .GT. ZERO ) THEN + ETA = SQRT(SGUB*TAU)-TAU + END IF + ELSE + IF( SGLB .GT. ZERO ) THEN + ETA = SQRT(SGLB*TAU)-TAU + END IF + END IF + END IF + END IF +* + PREW = W +* + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 170 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 170 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 180 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 180 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 190 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 190 CONTINUE +* + TAU2 = WORK( II )*DELTA( II ) + TEMP = Z( II ) / TAU2 + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* + SWTCH = .FALSE. + IF( ORGATI ) THEN + IF( -W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + ELSE + IF( W.GT.ABS( PREW ) / TEN ) + $ SWTCH = .TRUE. + END IF +* +* Main loop to update the values of the array DELTA and WORK +* + ITER = NITER + 1 +* + DO 230 NITER = ITER, MAXIT +* +* Test for convergence +* + IF( ABS( W ).LE.EPS*ERRETM ) THEN +* $ .OR. (SGUB-SGLB).LE.EIGHT*ABS(SGUB+SGLB) ) THEN + GO TO 240 + END IF +* + IF( W.LE.ZERO ) THEN + SGLB = MAX( SGLB, TAU ) + ELSE + SGUB = MIN( SGUB, TAU ) + END IF +* +* Calculate the new step +* + IF( .NOT.SWTCH3 ) THEN + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + ELSE +* +* Interpolation using THREE most relevant poles +* + DTIIM = WORK( IIM1 )*DELTA( IIM1 ) + DTIIP = WORK( IIP1 )*DELTA( IIP1 ) + TEMP = RHOINV + PSI + PHI + IF( SWTCH ) THEN + C = TEMP - DTIIM*DPSI - DTIIP*DPHI + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + IF( ORGATI ) THEN + TEMP1 = Z( IIM1 ) / DTIIM + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIM1 )-D( IIP1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2 + ZZ( 1 ) = Z( IIM1 )*Z( IIM1 ) + IF( DPSI.LT.TEMP1 ) THEN + ZZ( 3 ) = DTIIP*DTIIP*DPHI + ELSE + ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI ) + END IF + ELSE + TEMP1 = Z( IIP1 ) / DTIIP + TEMP1 = TEMP1*TEMP1 + TEMP2 = ( D( IIP1 )-D( IIM1 ) )* + $ ( D( IIM1 )+D( IIP1 ) )*TEMP1 + C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2 + IF( DPHI.LT.TEMP1 ) THEN + ZZ( 1 ) = DTIIM*DTIIM*DPSI + ELSE + ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) ) + END IF + ZZ( 3 ) = Z( IIP1 )*Z( IIP1 ) + END IF + END IF + DD( 1 ) = DTIIM + DD( 2 ) = DELTA( II )*WORK( II ) + DD( 3 ) = DTIIP + CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO ) +* + IF( INFO.NE.0 ) THEN +* +* If INFO is not 0, i.e., SLAED6 failed, switch +* back to two pole interpolation +* + SWTCH3 = .FALSE. + INFO = 0 + DTIPSQ = WORK( IP1 )*DELTA( IP1 ) + DTISQ = WORK( I )*DELTA( I ) + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + C = W - DTIPSQ*DW + DELSQ*( Z( I )/DTISQ )**2 + ELSE + C = W - DTISQ*DW - DELSQ*( Z( IP1 )/DTIPSQ )**2 + END IF + ELSE + TEMP = Z( II ) / ( WORK( II )*DELTA( II ) ) + IF( ORGATI ) THEN + DPSI = DPSI + TEMP*TEMP + ELSE + DPHI = DPHI + TEMP*TEMP + END IF + C = W - DTISQ*DPSI - DTIPSQ*DPHI + END IF + A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW + B = DTIPSQ*DTISQ*W + IF( C.EQ.ZERO ) THEN + IF( A.EQ.ZERO ) THEN + IF( .NOT.SWTCH ) THEN + IF( ORGATI ) THEN + A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ* + $ ( DPSI+DPHI ) + ELSE + A = Z( IP1 )*Z( IP1 ) + + $ DTISQ*DTISQ*( DPSI+DPHI ) + END IF + ELSE + A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI + END IF + END IF + ETA = B / A + ELSE IF( A.LE.ZERO ) THEN + ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C ) + ELSE + ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) + END IF + END IF + END IF +* +* Note, eta should be positive if w is negative, and +* eta should be negative otherwise. However, +* if for some reason caused by roundoff, eta*w > 0, +* we simply use one Newton step instead. This way +* will guarantee eta*w < 0. +* + IF( W*ETA.GE.ZERO ) + $ ETA = -W / DW +* + ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) ) + TEMP=TAU+ETA + IF( TEMP.GT.SGUB .OR. TEMP.LT.SGLB ) THEN + IF( W.LT.ZERO ) THEN + ETA = ( SGUB-TAU ) / TWO + ELSE + ETA = ( SGLB-TAU ) / TWO + END IF + IF( GEOMAVG ) THEN + IF( W .LT. ZERO ) THEN + IF( TAU .GT. ZERO ) THEN + ETA = SQRT(SGUB*TAU)-TAU + END IF + ELSE + IF( SGLB .GT. ZERO ) THEN + ETA = SQRT(SGLB*TAU)-TAU + END IF + END IF + END IF + END IF +* + PREW = W +* + TAU = TAU + ETA + SIGMA = SIGMA + ETA +* + DO 200 J = 1, N + WORK( J ) = WORK( J ) + ETA + DELTA( J ) = DELTA( J ) - ETA + 200 CONTINUE +* +* Evaluate PSI and the derivative DPSI +* + DPSI = ZERO + PSI = ZERO + ERRETM = ZERO + DO 210 J = 1, IIM1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PSI = PSI + Z( J )*TEMP + DPSI = DPSI + TEMP*TEMP + ERRETM = ERRETM + PSI + 210 CONTINUE + ERRETM = ABS( ERRETM ) +* +* Evaluate PHI and the derivative DPHI +* + DPHI = ZERO + PHI = ZERO + DO 220 J = N, IIP1, -1 + TEMP = Z( J ) / ( WORK( J )*DELTA( J ) ) + PHI = PHI + Z( J )*TEMP + DPHI = DPHI + TEMP*TEMP + ERRETM = ERRETM + PHI + 220 CONTINUE +* + TAU2 = WORK( II )*DELTA( II ) + TEMP = Z( II ) / TAU2 + DW = DPSI + DPHI + TEMP*TEMP + TEMP = Z( II )*TEMP + W = RHOINV + PHI + PSI + TEMP + ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV + $ + THREE*ABS( TEMP ) +* $ + ABS( TAU2 )*DW +* + IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN ) + $ SWTCH = .NOT.SWTCH +* + 230 CONTINUE +* +* Return with INFO = 1, NITER = MAXIT and not converged +* + INFO = 1 +* + END IF +* + 240 CONTINUE + RETURN +* +* End of SLASD4 +* + END diff --git a/dspl/liblapack/SRC/slasd5.f b/dspl/liblapack/SRC/slasd5.f new file mode 100644 index 0000000..a362b86 --- /dev/null +++ b/dspl/liblapack/SRC/slasd5.f @@ -0,0 +1,231 @@ +*> \brief \b SLASD5 computes the square root of the i-th eigenvalue of a positive symmetric rank-one modification of a 2-by-2 diagonal matrix. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* .. Scalar Arguments .. +* INTEGER I +* REAL DSIGMA, RHO +* .. +* .. Array Arguments .. +* REAL D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> This subroutine computes the square root of the I-th eigenvalue +*> of a positive symmetric rank-one modification of a 2-by-2 diagonal +*> matrix +*> +*> diag( D ) * diag( D ) + RHO * Z * transpose(Z) . +*> +*> The diagonal entries in the array D are assumed to satisfy +*> +*> 0 <= D(i) < D(j) for i < j . +*> +*> We also assume RHO > 0 and that the Euclidean norm of the vector +*> Z is one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I +*> \verbatim +*> I is INTEGER +*> The index of the eigenvalue to be computed. I = 1 or I = 2. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (2) +*> The original eigenvalues. We assume 0 <= D(1) < D(2). +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (2) +*> The components of the updating vector. +*> \endverbatim +*> +*> \param[out] DELTA +*> \verbatim +*> DELTA is REAL array, dimension (2) +*> Contains (D(j) - sigma_I) in its j-th component. +*> The vector DELTA contains the information necessary +*> to construct the eigenvectors. +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is REAL +*> The scalar in the symmetric updating formula. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is REAL +*> The computed sigma_I, the I-th updated eigenvalue. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2) +*> WORK contains (D(j) + sigma_I) in its j-th component. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ren-Cang Li, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I + REAL DSIGMA, RHO +* .. +* .. Array Arguments .. + REAL D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE, FOUR + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ THREE = 3.0E+0, FOUR = 4.0E+0 ) +* .. +* .. Local Scalars .. + REAL B, C, DEL, DELSQ, TAU, W +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + DEL = D( 2 ) - D( 1 ) + DELSQ = DEL*( D( 2 )+D( 1 ) ) + IF( I.EQ.1 ) THEN + W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )- + $ Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL + IF( W.GT.ZERO ) THEN + B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 1 )*Z( 1 )*DELSQ +* +* B > ZERO, always +* +* The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 ) +* + TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) ) +* +* The following TAU is DSIGMA - D( 1 ) +* + TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) ) + DSIGMA = D( 1 ) + TAU + DELTA( 1 ) = -TAU + DELTA( 2 ) = DEL - TAU + WORK( 1 ) = TWO*D( 1 ) + TAU + WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 ) +* DELTA( 1 ) = -Z( 1 ) / TAU +* DELTA( 2 ) = Z( 2 ) / ( DEL-TAU ) + ELSE + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) ) + ELSE + TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU + END IF +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + ELSE +* +* Now I=2 +* + B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) ) + C = RHO*Z( 2 )*Z( 2 )*DELSQ +* +* The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) +* + IF( B.GT.ZERO ) THEN + TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO + ELSE + TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) ) + END IF +* +* The following TAU is DSIGMA - D( 2 ) +* + TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) ) + DSIGMA = D( 2 ) + TAU + DELTA( 1 ) = -( DEL+TAU ) + DELTA( 2 ) = -TAU + WORK( 1 ) = D( 1 ) + TAU + D( 2 ) + WORK( 2 ) = TWO*D( 2 ) + TAU +* DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU ) +* DELTA( 2 ) = -Z( 2 ) / TAU +* TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) ) +* DELTA( 1 ) = DELTA( 1 ) / TEMP +* DELTA( 2 ) = DELTA( 2 ) / TEMP + END IF + RETURN +* +* End of SLASD5 +* + END diff --git a/dspl/liblapack/SRC/slasd6.f b/dspl/liblapack/SRC/slasd6.f new file mode 100644 index 0000000..ae37de0 --- /dev/null +++ b/dspl/liblapack/SRC/slasd6.f @@ -0,0 +1,443 @@ +*> \brief \b SLASD6 computes the SVD of an updated upper bidiagonal matrix obtained by merging two smaller ones by appending a row. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, +* IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, +* LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, +* $ NR, SQRE +* REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), +* $ PERM( * ) +* REAL D( * ), DIFL( * ), DIFR( * ), +* $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), +* $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD6 computes the SVD of an updated upper bidiagonal matrix B +*> obtained by merging two smaller ones by appending a row. This +*> routine is used only for the problem which requires all singular +*> values and optionally singular vector matrices in factored form. +*> B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE. +*> A related subroutine, SLASD1, handles the case in which all singular +*> values and singular vectors of the bidiagonal matrix are desired. +*> +*> SLASD6 computes the SVD as follows: +*> +*> ( D1(in) 0 0 0 ) +*> B = U(in) * ( Z1**T a Z2**T b ) * VT(in) +*> ( 0 0 D2(in) 0 ) +*> +*> = U(out) * ( D(out) 0) * VT(out) +*> +*> where Z**T = (Z1**T a Z2**T b) = u**T VT**T, and u is a vector of dimension M +*> with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros +*> elsewhere; and the entry b is empty if SQRE = 0. +*> +*> The singular values of B can be computed using D1, D2, the first +*> components of all the right singular vectors of the lower block, and +*> the last components of all the right singular vectors of the upper +*> block. These components are stored and updated in VF and VL, +*> respectively, in SLASD6. Hence U and VT are not explicitly +*> referenced. +*> +*> The singular values are stored in D. The algorithm consists of two +*> stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple singular values or if there is a zero +*> in the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine SLASD7. +*> +*> The second stage consists of calculating the updated +*> singular values. This is done by finding the roots of the +*> secular equation via the routine SLASD4 (as called by SLASD8). +*> This routine also updates VF and VL and computes the distances +*> between the updated singular values and the old singular +*> values. +*> +*> SLASD6 is called from SLASDA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors in factored form as well. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (NL+NR+1). +*> On entry D(1:NL,1:NL) contains the singular values of the +*> upper block, and D(NL+2:N) contains the singular values +*> of the lower block. On exit D(1:N) contains the singular +*> values of the modified matrix. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is REAL array, dimension (M) +*> On entry, VF(1:NL+1) contains the first components of all +*> right singular vectors of the upper block; and VF(NL+2:M) +*> contains the first components of all right singular vectors +*> of the lower block. On exit, VF contains the first components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension (M) +*> On entry, VL(1:NL+1) contains the last components of all +*> right singular vectors of the upper block; and VL(NL+2:M) +*> contains the last components of all right singular vectors of +*> the lower block. On exit, VL contains the last components of +*> all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is REAL +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in,out] BETA +*> \verbatim +*> BETA is REAL +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[in,out] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension (N) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, i.e. +*> D( IDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) to be applied +*> to each block. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value to be used in the +*> corresponding Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of GIVNUM and POLES, must be at least N. +*> \endverbatim +*> +*> \param[out] POLES +*> \verbatim +*> POLES is REAL array, dimension ( LDGNUM, 2 ) +*> On exit, POLES(1,*) is an array containing the new singular +*> values obtained from solving the secular equation, and +*> POLES(2,*) is an array containing the poles in the secular +*> equation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( N ) +*> On exit, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is REAL array, +*> dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +*> dimension ( K ) if ICOMPQ = 0. +*> On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +*> defined and will not be referenced. +*> +*> If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +*> normalizing factors for the right singular vector matrix. +*> +*> See SLASD8 for details on DIFL and DIFR. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension ( M ) +*> The first elements of this array contain the components +*> of the deflation-adjusted updating row vector. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ( 4 * M ) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension ( 3 * N ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA, + $ IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, + $ LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK, + $ IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ), + $ PERM( * ) + REAL D( * ), DIFL( * ), DIFR( * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ VF( * ), VL( * ), WORK( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M, + $ N, N1, N2 + REAL ORGNRM +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SLASCL, SLASD7, SLASD8, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -14 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD6', -INFO ) + RETURN + END IF +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in SLASD7 and SLASD8. +* + ISIGMA = 1 + IW = ISIGMA + N + IVFW = IW + M + IVLW = IVFW + M +* + IDX = 1 + IDXC = IDX + N + IDXP = IDXC + N +* +* Scale. +* + ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) ) + D( NL+1 ) = ZERO + DO 10 I = 1, N + IF( ABS( D( I ) ).GT.ORGNRM ) THEN + ORGNRM = ABS( D( I ) ) + END IF + 10 CONTINUE + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + ALPHA = ALPHA / ORGNRM + BETA = BETA / ORGNRM +* +* Sort and Deflate singular values. +* + CALL SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF, + $ WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA, + $ WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S, + $ INFO ) +* +* Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. +* + CALL SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM, + $ WORK( ISIGMA ), WORK( IW ), INFO ) +* +* Report the possible convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Save the poles if ICOMPQ = 1. +* + IF( ICOMPQ.EQ.1 ) THEN + CALL SCOPY( K, D, 1, POLES( 1, 1 ), 1 ) + CALL SCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 ) + END IF +* +* Unscale. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) +* +* Prepare the IDXQ sorting permutation. +* + N1 = K + N2 = N - K + CALL SLAMRG( N1, N2, D, 1, -1, IDXQ ) +* + RETURN +* +* End of SLASD6 +* + END diff --git a/dspl/liblapack/SRC/slasd7.f b/dspl/liblapack/SRC/slasd7.f new file mode 100644 index 0000000..2adaa5e --- /dev/null +++ b/dspl/liblapack/SRC/slasd7.f @@ -0,0 +1,580 @@ +*> \brief \b SLASD7 merges the two sets of singular values together into a single sorted set. Then it tries to deflate the size of the problem. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, +* VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* C, S, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, +* $ NR, SQRE +* REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), +* $ IDXQ( * ), PERM( * ) +* REAL D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), +* $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), +* $ ZW( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD7 merges the two sets of singular values together into a single +*> sorted set. Then it tries to deflate the size of the problem. There +*> are two ways in which deflation can occur: when two or more singular +*> values are close together or if there is a tiny entry in the Z +*> vector. For each such occurrence the order of the related +*> secular equation problem is reduced by one. +*> +*> SLASD7 is called from SLASD6. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed +*> in compact form, as follows: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors of upper +*> bidiagonal matrix in compact form. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has +*> N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, this is +*> the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension ( N ) +*> On entry D contains the singular values of the two submatrices +*> to be combined. On exit D contains the trailing (N-K) updated +*> singular values (those which were deflated) sorted into +*> increasing order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension ( M ) +*> On exit Z contains the updating row vector in the secular +*> equation. +*> \endverbatim +*> +*> \param[out] ZW +*> \verbatim +*> ZW is REAL array, dimension ( M ) +*> Workspace for Z. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is REAL array, dimension ( M ) +*> On entry, VF(1:NL+1) contains the first components of all +*> right singular vectors of the upper block; and VF(NL+2:M) +*> contains the first components of all right singular vectors +*> of the lower block. On exit, VF contains the first components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] VFW +*> \verbatim +*> VFW is REAL array, dimension ( M ) +*> Workspace for VF. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension ( M ) +*> On entry, VL(1:NL+1) contains the last components of all +*> right singular vectors of the upper block; and VL(NL+2:M) +*> contains the last components of all right singular vectors +*> of the lower block. On exit, VL contains the last components +*> of all right singular vectors of the bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] VLW +*> \verbatim +*> VLW is REAL array, dimension ( M ) +*> Workspace for VL. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> Contains the diagonal element associated with the added row. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> Contains the off-diagonal element associated with the added +*> row. +*> \endverbatim +*> +*> \param[out] DSIGMA +*> \verbatim +*> DSIGMA is REAL array, dimension ( N ) +*> Contains a copy of the diagonal elements (K-1 singular values +*> and one zero) in the secular equation. +*> \endverbatim +*> +*> \param[out] IDX +*> \verbatim +*> IDX is INTEGER array, dimension ( N ) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[out] IDXP +*> \verbatim +*> IDXP is INTEGER array, dimension ( N ) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output IDXP(2:K) +*> points to the nondeflated D-values and IDXP(K+1:N) +*> points to the deflated singular values. +*> \endverbatim +*> +*> \param[in] IDXQ +*> \verbatim +*> IDXQ is INTEGER array, dimension ( N ) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that entries in +*> the first half of this permutation must first be moved one +*> position backward; and entries in the second half +*> must first have NL+1 added to their values. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) to be applied +*> to each singular block. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value to be used in the +*> corresponding Givens rotation. Not referenced if ICOMPQ = 0. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of GIVNUM, must be at least N. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL, + $ VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ C, S, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL, + $ NR, SQRE + REAL ALPHA, BETA, C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ), + $ IDXQ( * ), PERM( * ) + REAL D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ), + $ VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ), + $ ZW( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, EIGHT + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. +* + INTEGER I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N, + $ NLP1, NLP2 + REAL EPS, HLFTOL, TAU, TOL, Z1 +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAMRG, SROT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH, SLAPY2 + EXTERNAL SLAMCH, SLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 + M = N + SQRE +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -22 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -24 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD7', -INFO ) + RETURN + END IF +* + NLP1 = NL + 1 + NLP2 = NL + 2 + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = 0 + END IF +* +* Generate the first part of the vector Z and move the singular +* values in the first part of D one position backward. +* + Z1 = ALPHA*VL( NLP1 ) + VL( NLP1 ) = ZERO + TAU = VF( NLP1 ) + DO 10 I = NL, 1, -1 + Z( I+1 ) = ALPHA*VL( I ) + VL( I ) = ZERO + VF( I+1 ) = VF( I ) + D( I+1 ) = D( I ) + IDXQ( I+1 ) = IDXQ( I ) + 1 + 10 CONTINUE + VF( 1 ) = TAU +* +* Generate the second part of the vector Z. +* + DO 20 I = NLP2, M + Z( I ) = BETA*VF( I ) + VF( I ) = ZERO + 20 CONTINUE +* +* Sort the singular values into increasing order +* + DO 30 I = NLP2, N + IDXQ( I ) = IDXQ( I ) + NLP1 + 30 CONTINUE +* +* DSIGMA, IDXC, IDXC, and ZW are used as storage space. +* + DO 40 I = 2, N + DSIGMA( I ) = D( IDXQ( I ) ) + ZW( I ) = Z( IDXQ( I ) ) + VFW( I ) = VF( IDXQ( I ) ) + VLW( I ) = VL( IDXQ( I ) ) + 40 CONTINUE +* + CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) ) +* + DO 50 I = 2, N + IDXI = 1 + IDX( I ) + D( I ) = DSIGMA( IDXI ) + Z( I ) = ZW( IDXI ) + VF( I ) = VFW( IDXI ) + VL( I ) = VLW( IDXI ) + 50 CONTINUE +* +* Calculate the allowable deflation tolerence +* + EPS = SLAMCH( 'Epsilon' ) + TOL = MAX( ABS( ALPHA ), ABS( BETA ) ) + TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL ) +* +* There are 2 kinds of deflation -- first a value in the z-vector +* is small, second two (or more) singular values are very close +* together (their difference is small). +* +* If the value in the z-vector is small, we simply permute the +* array so that the corresponding singular value is moved to the +* end. +* +* If two values in the D-vector are close, we perform a two-sided +* rotation designed to make one of the corresponding z-vector +* entries zero, and then permute the array so that the deflated +* singular value is moved to the end. +* +* If there are multiple singular values then the problem deflates. +* Here the number of equal singular values are found. As each equal +* singular value is found, an elementary reflector is computed to +* rotate the corresponding singular subspace so that the +* corresponding components of Z are zero in this new basis. +* + K = 1 + K2 = N + 1 + DO 60 J = 2, N + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JPREV = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = JPREV + 80 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + IDXP( K2 ) = J + ELSE +* +* Check if singular values are close enough to allow deflation. +* + IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN +* +* Deflation is possible. +* + S = Z( JPREV ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = SLAPY2( C, S ) + Z( J ) = TAU + Z( JPREV ) = ZERO + C = C / TAU + S = -S / TAU +* +* Record the appropriate Givens rotation +* + IF( ICOMPQ.EQ.1 ) THEN + GIVPTR = GIVPTR + 1 + IDXJP = IDXQ( IDX( JPREV )+1 ) + IDXJ = IDXQ( IDX( J )+1 ) + IF( IDXJP.LE.NLP1 ) THEN + IDXJP = IDXJP - 1 + END IF + IF( IDXJ.LE.NLP1 ) THEN + IDXJ = IDXJ - 1 + END IF + GIVCOL( GIVPTR, 2 ) = IDXJP + GIVCOL( GIVPTR, 1 ) = IDXJ + GIVNUM( GIVPTR, 2 ) = C + GIVNUM( GIVPTR, 1 ) = S + END IF + CALL SROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S ) + CALL SROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S ) + K2 = K2 - 1 + IDXP( K2 ) = JPREV + JPREV = J + ELSE + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV + JPREV = J + END IF + END IF + GO TO 80 + 90 CONTINUE +* +* Record the last singular value. +* + K = K + 1 + ZW( K ) = Z( JPREV ) + DSIGMA( K ) = D( JPREV ) + IDXP( K ) = JPREV +* + 100 CONTINUE +* +* Sort the singular values into DSIGMA. The singular values which +* were not deflated go into the first K slots of DSIGMA, except +* that DSIGMA(1) is treated separately. +* + DO 110 J = 2, N + JP = IDXP( J ) + DSIGMA( J ) = D( JP ) + VFW( J ) = VF( JP ) + VLW( J ) = VL( JP ) + 110 CONTINUE + IF( ICOMPQ.EQ.1 ) THEN + DO 120 J = 2, N + JP = IDXP( J ) + PERM( J ) = IDXQ( IDX( JP )+1 ) + IF( PERM( J ).LE.NLP1 ) THEN + PERM( J ) = PERM( J ) - 1 + END IF + 120 CONTINUE + END IF +* +* The deflated singular values go back into the last N - K slots of +* D. +* + CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 ) +* +* Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and +* VL(M). +* + DSIGMA( 1 ) = ZERO + HLFTOL = TOL / TWO + IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL ) + $ DSIGMA( 2 ) = HLFTOL + IF( M.GT.N ) THEN + Z( 1 ) = SLAPY2( Z1, Z( M ) ) + IF( Z( 1 ).LE.TOL ) THEN + C = ONE + S = ZERO + Z( 1 ) = TOL + ELSE + C = Z1 / Z( 1 ) + S = -Z( M ) / Z( 1 ) + END IF + CALL SROT( 1, VF( M ), 1, VF( 1 ), 1, C, S ) + CALL SROT( 1, VL( M ), 1, VL( 1 ), 1, C, S ) + ELSE + IF( ABS( Z1 ).LE.TOL ) THEN + Z( 1 ) = TOL + ELSE + Z( 1 ) = Z1 + END IF + END IF +* +* Restore Z, VF, and VL. +* + CALL SCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 ) + CALL SCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 ) + CALL SCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 ) +* + RETURN +* +* End of SLASD7 +* + END diff --git a/dspl/liblapack/SRC/slasd8.f b/dspl/liblapack/SRC/slasd8.f new file mode 100644 index 0000000..1d1d5cd --- /dev/null +++ b/dspl/liblapack/SRC/slasd8.f @@ -0,0 +1,342 @@ +*> \brief \b SLASD8 finds the square roots of the roots of the secular equation, and stores, for each element in D, the distance to its two nearest poles. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASD8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, +* DSIGMA, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. +* REAL D( * ), DIFL( * ), DIFR( LDDIFR, * ), +* $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), +* $ Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASD8 finds the square roots of the roots of the secular equation, +*> as defined by the values in DSIGMA and Z. It makes the appropriate +*> calls to SLASD4, and stores, for each element in D, the distance +*> to its two nearest poles (elements in DSIGMA). It also updates +*> the arrays VF and VL, the first and last components of all the +*> right singular vectors of the original bidiagonal matrix. +*> +*> SLASD8 is called from SLASD6. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form in the calling routine: +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors in factored form as well. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of terms in the rational function to be solved +*> by SLASD4. K >= 1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension ( K ) +*> On output, D contains the updated singular values. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension ( K ) +*> On entry, the first K elements of this array contain the +*> components of the deflation-adjusted updating row vector. +*> On exit, Z is updated. +*> \endverbatim +*> +*> \param[in,out] VF +*> \verbatim +*> VF is REAL array, dimension ( K ) +*> On entry, VF contains information passed through DBEDE8. +*> On exit, VF contains the first K components of the first +*> components of all right singular vectors of the bidiagonal +*> matrix. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension ( K ) +*> On entry, VL contains information passed through DBEDE8. +*> On exit, VL contains the first K components of the last +*> components of all right singular vectors of the bidiagonal +*> matrix. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( K ) +*> On exit, DIFL(I) = D(I) - DSIGMA(I). +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is REAL array, +*> dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and +*> dimension ( K ) if ICOMPQ = 0. +*> On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not +*> defined and will not be referenced. +*> +*> If ICOMPQ = 1, DIFR(1:K,2) is an array containing the +*> normalizing factors for the right singular vector matrix. +*> \endverbatim +*> +*> \param[in] LDDIFR +*> \verbatim +*> LDDIFR is INTEGER +*> The leading dimension of DIFR, must be at least K. +*> \endverbatim +*> +*> \param[in,out] DSIGMA +*> \verbatim +*> DSIGMA is REAL array, dimension ( K ) +*> On entry, the first K elements of this array contain the old +*> roots of the deflated updating problem. These are the poles +*> of the secular equation. +*> On exit, the elements of DSIGMA may be very slightly altered +*> in value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*K) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR, + $ DSIGMA, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, K, LDDIFR +* .. +* .. Array Arguments .. + REAL D( * ), DIFL( * ), DIFR( LDDIFR, * ), + $ DSIGMA( * ), VF( * ), VL( * ), WORK( * ), + $ Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J + REAL DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASCL, SLASD4, SLASET, XERBLA +* .. +* .. External Functions .. + REAL SDOT, SLAMC3, SNRM2 + EXTERNAL SDOT, SLAMC3, SNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( K.LT.1 ) THEN + INFO = -2 + ELSE IF( LDDIFR.LT.K ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASD8', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.1 ) THEN + D( 1 ) = ABS( Z( 1 ) ) + DIFL( 1 ) = D( 1 ) + IF( ICOMPQ.EQ.1 ) THEN + DIFL( 2 ) = ONE + DIFR( 1, 2 ) = ONE + END IF + RETURN + END IF +* +* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can +* be computed with high relative accuracy (barring over/underflow). +* This is a problem on machines without a guard digit in +* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). +* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), +* which on any of these machines zeros out the bottommost +* bit of DSIGMA(I) if it is 1; this makes the subsequent +* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation +* occurs. On binary machines with a guard digit (almost all +* machines) it does not change DSIGMA(I) at all. On hexadecimal +* and decimal machines with a guard digit, it slightly +* changes the bottommost bits of DSIGMA(I). It does not account +* for hexadecimal or decimal machines without guard digits +* (we know of none). We use a subroutine call to compute +* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating +* this code. +* + DO 10 I = 1, K + DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) + 10 CONTINUE +* +* Book keeping. +* + IWK1 = 1 + IWK2 = IWK1 + K + IWK3 = IWK2 + K + IWK2I = IWK2 - 1 + IWK3I = IWK3 - 1 +* +* Normalize Z. +* + RHO = SNRM2( K, Z, 1 ) + CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) + RHO = RHO*RHO +* +* Initialize WORK(IWK3). +* + CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K ) +* +* Compute the updated singular values, the arrays DIFL, DIFR, +* and the updated Z. +* + DO 40 J = 1, K + CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ), + $ WORK( IWK2 ), INFO ) +* +* If the root finder fails, report the convergence failure. +* + IF( INFO.NE.0 ) THEN + RETURN + END IF + WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J ) + DIFL( J ) = -WORK( J ) + DIFR( J, 1 ) = -WORK( J+1 ) + DO 20 I = 1, J - 1 + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 20 CONTINUE + DO 30 I = J + 1, K + WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )* + $ WORK( IWK2I+I ) / ( DSIGMA( I )- + $ DSIGMA( J ) ) / ( DSIGMA( I )+ + $ DSIGMA( J ) ) + 30 CONTINUE + 40 CONTINUE +* +* Compute updated Z. +* + DO 50 I = 1, K + Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) ) + 50 CONTINUE +* +* Update VF and VL. +* + DO 80 J = 1, K + DIFLJ = DIFL( J ) + DJ = D( J ) + DSIGJ = -DSIGMA( J ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -DSIGMA( J+1 ) + END IF + WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ ) + DO 60 I = 1, J - 1 + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ ) + $ / ( DSIGMA( I )+DJ ) + 60 CONTINUE + DO 70 I = J + 1, K + WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ ) + $ / ( DSIGMA( I )+DJ ) + 70 CONTINUE + TEMP = SNRM2( K, WORK, 1 ) + WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP + WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP + IF( ICOMPQ.EQ.1 ) THEN + DIFR( J, 2 ) = TEMP + END IF + 80 CONTINUE +* + CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 ) + CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 ) +* + RETURN +* +* End of SLASD8 +* + END + diff --git a/dspl/liblapack/SRC/slasda.f b/dspl/liblapack/SRC/slasda.f new file mode 100644 index 0000000..6e02260 --- /dev/null +++ b/dspl/liblapack/SRC/slasda.f @@ -0,0 +1,514 @@ +*> \brief \b SLASDA computes the singular value decomposition (SVD) of a real upper bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASDA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, +* DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, +* PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* REAL C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), +* $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), +* $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), +* $ Z( LDU, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using a divide and conquer approach, SLASDA computes the singular +*> value decomposition (SVD) of a real upper bidiagonal N-by-M matrix +*> B with diagonal D and offdiagonal E, where M = N + SQRE. The +*> algorithm computes the singular values in the SVD B = U * S * VT. +*> The orthogonal matrices U and VT are optionally computed in +*> compact form. +*> +*> A related subroutine, SLASD0, computes the singular values and +*> the singular vectors in explicit form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed +*> in compact form, as follows +*> = 0: Compute singular values only. +*> = 1: Compute singular vectors of upper bidiagonal +*> matrix in compact form. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row dimension of the upper bidiagonal matrix. This is +*> also the dimension of the main diagonal array D. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> Specifies the column dimension of the bidiagonal matrix. +*> = 0: The bidiagonal matrix has column dimension M = N; +*> = 1: The bidiagonal matrix has column dimension M = N + 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension ( N ) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit D, if INFO = 0, contains its singular values. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension ( M-1 ) +*> Contains the subdiagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is REAL array, +*> dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left +*> singular vector matrices of all subproblems at the bottom +*> level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, POLES, +*> GIVNUM, and Z. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is REAL array, +*> dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT**T contains the right +*> singular vector matrices of all subproblems at the bottom +*> level. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER array, dimension ( N ) +*> if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0. +*> If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th +*> secular equation on the computation tree. +*> \endverbatim +*> +*> \param[out] DIFL +*> \verbatim +*> DIFL is REAL array, dimension ( LDU, NLVL ), +*> where NLVL = floor(log_2 (N/SMLSIZ))). +*> \endverbatim +*> +*> \param[out] DIFR +*> \verbatim +*> DIFR is REAL array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and +*> dimension ( N ) if ICOMPQ = 0. +*> If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1) +*> record distances between singular values on the I-th +*> level and singular values on the (I -1)-th level, and +*> DIFR(1:N, 2 * I ) contains the normalizing factors for +*> the right singular vector matrix. See SLASD8 for details. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, +*> dimension ( LDU, NLVL ) if ICOMPQ = 1 and +*> dimension ( N ) if ICOMPQ = 0. +*> The first K elements of Z(1, I) contain the components of +*> the deflation-adjusted updating row vector for subproblems +*> on the I-th level. +*> \endverbatim +*> +*> \param[out] POLES +*> \verbatim +*> POLES is REAL array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and +*> POLES(1, 2*I) contain the new and old singular values +*> involved in the secular equations on the I-th level. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, +*> dimension ( N ) if ICOMPQ = 1, and not referenced if +*> ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records +*> the number of Givens rotations performed on the I-th +*> problem on the computation tree. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, +*> dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not +*> referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +*> GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations +*> of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( LDGCOL, NLVL ) +*> if ICOMPQ = 1, and not referenced +*> if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records +*> permutations done on the I-th level of the computation tree. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is REAL array, +*> dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not +*> referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I, +*> GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S- +*> values of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is REAL array, +*> dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. +*> If ICOMPQ = 1 and the I-th subproblem is not square, on exit, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension ( N ) if +*> ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1 +*> and the I-th subproblem is not square, on exit, S( I ) +*> contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, a singular value did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K, + $ DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL, + $ PERM, GIVNUM, C, S, WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + REAL C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ E( * ), GIVNUM( LDU, * ), POLES( LDU, * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ), + $ Z( LDU, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK, + $ J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU, + $ NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI + REAL ALPHA, BETA +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLASD6, SLASDQ, SLASDT, SLASET, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( LDU.LT.( N+SQRE ) ) THEN + INFO = -8 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -17 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDA', -INFO ) + RETURN + END IF +* + M = N + SQRE +* +* If the input matrix is too small, call SLASDQ to find the SVD. +* + IF( N.LE.SMLSIZ ) THEN + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + ELSE + CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU, + $ U, LDU, WORK, INFO ) + END IF + RETURN + END IF +* +* Book-keeping and set up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N + IDXQ = NDIMR + N + IWK = IDXQ + N +* + NCC = 0 + NRU = 0 +* + SMLSZP = SMLSIZ + 1 + VF = 1 + VL = VF + M + NWORK1 = VL + M + NWORK2 = NWORK1 + SMLSZP*SMLSZP +* + CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* for the nodes on bottom level of the tree, solve +* their subproblems by SLASDQ. +* + NDB1 = ( ND+1 ) / 2 + DO 30 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NLP1 = NL + 1 + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 + IDXQI = IDXQ + NLF - 2 + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + SQREI = 1 + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ), + $ E( NLF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NL, WORK( NWORK2 ), NL, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + NL*SMLSZP + CALL SCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU ) + CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), + $ E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU, + $ U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 10 J = 1, NL + IWORK( IDXQI+J ) = J + 10 CONTINUE + IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN + SQREI = 0 + ELSE + SQREI = 1 + END IF + IDXQI = IDXQI + NLP1 + VFI = VFI + NLP1 + VLI = VLI + NLP1 + NRP1 = NR + SQREI + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ), + $ SMLSZP ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ), + $ E( NRF ), WORK( NWORK1 ), SMLSZP, + $ WORK( NWORK2 ), NR, WORK( NWORK2 ), NR, + $ WORK( NWORK2 ), INFO ) + ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP + CALL SCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 ) + ELSE + CALL SLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU ) + CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU ) + CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), + $ E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU, + $ U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO ) + CALL SCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 ) + CALL SCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + DO 20 J = 1, NR + IWORK( IDXQI+J ) = J + 20 CONTINUE + 30 CONTINUE +* +* Now conquer each subproblem bottom-up. +* + J = 2**NLVL + DO 50 LVL = NLVL, 1, -1 + LVL2 = LVL*2 - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 40 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQREI = SQRE + ELSE + SQREI = 1 + END IF + VFI = VF + NLF - 1 + VLI = VL + NLF - 1 + IDXQI = IDXQ + NLF - 1 + ALPHA = D( IC ) + BETA = E( IC ) + IF( ICOMPQ.EQ.0 ) THEN + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL, + $ LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z, + $ K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + ELSE + J = J - 1 + CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ), + $ WORK( VFI ), WORK( VLI ), ALPHA, BETA, + $ IWORK( IDXQI ), PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, + $ POLES( NLF, LVL2 ), DIFL( NLF, LVL ), + $ DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ), + $ C( J ), S( J ), WORK( NWORK1 ), + $ IWORK( IWK ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + RETURN + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of SLASDA +* + END diff --git a/dspl/liblapack/SRC/slasdq.f b/dspl/liblapack/SRC/slasdq.f new file mode 100644 index 0000000..434d373 --- /dev/null +++ b/dspl/liblapack/SRC/slasdq.f @@ -0,0 +1,413 @@ +*> \brief \b SLASDQ computes the SVD of a real bidiagonal matrix with diagonal d and off-diagonal e. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASDQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, +* U, LDU, C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. +* REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), +* $ VT( LDVT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASDQ computes the singular value decomposition (SVD) of a real +*> (upper or lower) bidiagonal matrix with diagonal D and offdiagonal +*> E, accumulating the transformations if desired. Letting B denote +*> the input bidiagonal matrix, the algorithm computes orthogonal +*> matrices Q and P such that B = Q * S * P**T (P**T denotes the transpose +*> of P). The singular values S are overwritten on D. +*> +*> The input matrix U is changed to U * Q if desired. +*> The input matrix VT is changed to P**T * VT if desired. +*> The input matrix C is changed to Q**T * C if desired. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3, for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the input bidiagonal matrix +*> is upper or lower bidiagonal, and whether it is square are +*> not. +*> UPLO = 'U' or 'u' B is upper bidiagonal. +*> UPLO = 'L' or 'l' B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: then the input matrix is N-by-N. +*> = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and +*> (N+1)-by-N if UPLU = 'L'. +*> +*> The bidiagonal matrix has +*> N = NL + NR + 1 rows and +*> M = N + SQRE >= N columns. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of rows and columns +*> in the matrix. N must be at least 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> On entry, NCVT specifies the number of columns of +*> the matrix VT. NCVT must be at least 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> On entry, NRU specifies the number of rows of +*> the matrix U. NRU must be at least 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> On entry, NCC specifies the number of columns of +*> the matrix C. NCC must be at least 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D contains the diagonal entries of the +*> bidiagonal matrix whose SVD is desired. On normal exit, +*> D contains the singular values in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array. +*> dimension is (N-1) if SQRE = 0 and N if SQRE = 1. +*> On entry, the entries of E contain the offdiagonal entries +*> of the bidiagonal matrix whose SVD is desired. On normal +*> exit, E will contain 0. If the algorithm does not converge, +*> D and E will contain the diagonal and superdiagonal entries +*> of a bidiagonal matrix orthogonally equivalent to the one +*> given as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is REAL array, dimension (LDVT, NCVT) +*> On entry, contains a matrix which on exit has been +*> premultiplied by P**T, dimension N-by-NCVT if SQRE = 0 +*> and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0). +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> On entry, LDVT specifies the leading dimension of VT as +*> declared in the calling (sub) program. LDVT must be at +*> least 1. If NCVT is nonzero LDVT must also be at least N. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is REAL array, dimension (LDU, N) +*> On entry, contains a matrix which on exit has been +*> postmultiplied by Q, dimension NRU-by-N if SQRE = 0 +*> and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0). +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> On entry, LDU specifies the leading dimension of U as +*> declared in the calling (sub) program. LDU must be at +*> least max( 1, NRU ) . +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, NCC) +*> On entry, contains an N-by-NCC matrix which on exit +*> has been premultiplied by Q**T dimension N-by-NCC if SQRE = 0 +*> and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the leading dimension of C as +*> declared in the calling (sub) program. LDC must be at +*> least 1. If NCC is nonzero, LDC must also be at least N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> Workspace. Only referenced if one of NCVT, NRU, or NCC is +*> nonzero, and if N is at least 2. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, a value of 0 indicates a successful exit. +*> If INFO < 0, argument number -INFO is illegal. +*> If INFO > 0, the algorithm did not converge, and INFO +*> specifies how many superdiagonals did not converge. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT, + $ U, LDU, C, LDC, WORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE +* .. +* .. Array Arguments .. + REAL C( LDC, * ), D( * ), E( * ), U( LDU, * ), + $ VT( LDVT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ROTATE + INTEGER I, ISUB, IUPLO, J, NP1, SQRE1 + REAL CS, R, SMIN, SN +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SLARTG, SLASR, SSWAP, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IUPLO = 0 + IF( LSAME( UPLO, 'U' ) ) + $ IUPLO = 1 + IF( LSAME( UPLO, 'L' ) ) + $ IUPLO = 2 + IF( IUPLO.EQ.0 ) THEN + INFO = -1 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -4 + ELSE IF( NRU.LT.0 ) THEN + INFO = -5 + ELSE IF( NCC.LT.0 ) THEN + INFO = -6 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -12 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASDQ', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) + NP1 = N + 1 + SQRE1 = SQRE +* +* If matrix non-square upper bidiagonal, rotate to be lower +* bidiagonal. The rotations are on the right. +* + IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN + DO 10 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 10 CONTINUE + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + E( N ) = ZERO + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + IUPLO = 2 + SQRE1 = 0 +* +* Update singular vectors if desired. +* + IF( NCVT.GT.0 ) + $ CALL SLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ), + $ WORK( NP1 ), VT, LDVT ) + END IF +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left. +* + IF( IUPLO.EQ.2 ) THEN + DO 20 I = 1, N - 1 + CALL SLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( ROTATE ) THEN + WORK( I ) = CS + WORK( N+I ) = SN + END IF + 20 CONTINUE +* +* If matrix (N+1)-by-N lower bidiagonal, one additional +* rotation is needed. +* + IF( SQRE1.EQ.1 ) THEN + CALL SLARTG( D( N ), E( N ), CS, SN, R ) + D( N ) = R + IF( ROTATE ) THEN + WORK( N ) = CS + WORK( N+N ) = SN + END IF + END IF +* +* Update singular vectors if desired. +* + IF( NRU.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + ELSE + CALL SLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ), + $ WORK( NP1 ), U, LDU ) + END IF + END IF + IF( NCC.GT.0 ) THEN + IF( SQRE1.EQ.0 ) THEN + CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + ELSE + CALL SLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ), + $ WORK( NP1 ), C, LDC ) + END IF + END IF + END IF +* +* Call SBDSQR to compute the SVD of the reduced real +* N-by-N upper bidiagonal matrix. +* + CALL SBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, + $ LDC, WORK, INFO ) +* +* Sort the singular values into ascending order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 40 I = 1, N +* +* Scan for smallest D(I). +* + ISUB = I + SMIN = D( I ) + DO 30 J = I + 1, N + IF( D( J ).LT.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 30 CONTINUE + IF( ISUB.NE.I ) THEN +* +* Swap singular values and vectors. +* + D( ISUB ) = D( I ) + D( I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 ) + IF( NCC.GT.0 ) + $ CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC ) + END IF + 40 CONTINUE +* + RETURN +* +* End of SLASDQ +* + END diff --git a/dspl/liblapack/SRC/slasdt.f b/dspl/liblapack/SRC/slasdt.f new file mode 100644 index 0000000..fe5aaa6 --- /dev/null +++ b/dspl/liblapack/SRC/slasdt.f @@ -0,0 +1,172 @@ +*> \brief \b SLASDT creates a tree of subproblems for bidiagonal divide and conquer. Used by sbdsdc. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASDT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* .. Scalar Arguments .. +* INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. +* INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASDT creates a tree of subproblems for bidiagonal divide and +*> conquer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, the number of diagonal elements of the +*> bidiagonal matrix. +*> \endverbatim +*> +*> \param[out] LVL +*> \verbatim +*> LVL is INTEGER +*> On exit, the number of levels on the computation tree. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> On exit, the number of nodes on the tree. +*> \endverbatim +*> +*> \param[out] INODE +*> \verbatim +*> INODE is INTEGER array, dimension ( N ) +*> On exit, centers of subproblems. +*> \endverbatim +*> +*> \param[out] NDIML +*> \verbatim +*> NDIML is INTEGER array, dimension ( N ) +*> On exit, row dimensions of left children. +*> \endverbatim +*> +*> \param[out] NDIMR +*> \verbatim +*> NDIMR is INTEGER array, dimension ( N ) +*> On exit, row dimensions of right children. +*> \endverbatim +*> +*> \param[in] MSUB +*> \verbatim +*> MSUB is INTEGER +*> On entry, the maximum row dimension each subproblem at the +*> bottom of the tree can be of. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE SLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LVL, MSUB, N, ND +* .. +* .. Array Arguments .. + INTEGER INODE( * ), NDIML( * ), NDIMR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IR, LLST, MAXN, NCRNT, NLVL + REAL TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, REAL +* .. +* .. Executable Statements .. +* +* Find the number of levels on the tree. +* + MAXN = MAX( 1, N ) + TEMP = LOG( REAL( MAXN ) / REAL( MSUB+1 ) ) / LOG( TWO ) + LVL = INT( TEMP ) + 1 +* + I = N / 2 + INODE( 1 ) = I + 1 + NDIML( 1 ) = I + NDIMR( 1 ) = N - I - 1 + IL = 0 + IR = 1 + LLST = 1 + DO 20 NLVL = 1, LVL - 1 +* +* Constructing the tree at (NLVL+1)-st level. The number of +* nodes created on this level is LLST * 2. +* + DO 10 I = 0, LLST - 1 + IL = IL + 2 + IR = IR + 2 + NCRNT = LLST + I + NDIML( IL ) = NDIML( NCRNT ) / 2 + NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1 + INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1 + NDIML( IR ) = NDIMR( NCRNT ) / 2 + NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1 + INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1 + 10 CONTINUE + LLST = LLST*2 + 20 CONTINUE + ND = LLST*2 - 1 +* + RETURN +* +* End of SLASDT +* + END diff --git a/dspl/liblapack/SRC/slaset.f b/dspl/liblapack/SRC/slaset.f new file mode 100644 index 0000000..3a87e08 --- /dev/null +++ b/dspl/liblapack/SRC/slaset.f @@ -0,0 +1,184 @@ +*> \brief \b SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASET + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* REAL ALPHA, BETA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASET initializes an m-by-n matrix A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set; the strictly lower +*> triangular part of A is not changed. +*> = 'L': Lower triangular part is set; the strictly upper +*> triangular part of A is not changed. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> The constant to which the offdiagonal elements are to be set. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> The constant to which the diagonal elements are to be set. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On exit, the leading m-by-n submatrix of A is set as follows: +*> +*> if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n, +*> if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n, +*> otherwise, A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j, +*> +*> and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + REAL ALPHA, BETA +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the strictly upper triangular or trapezoidal part of the +* array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the strictly lower triangular or trapezoidal part of the +* array to ALPHA. +* + DO 40 J = 1, MIN( M, N ) + DO 30 I = J + 1, M + A( I, J ) = ALPHA + 30 CONTINUE + 40 CONTINUE +* + ELSE +* +* Set the leading m-by-n submatrix to ALPHA. +* + DO 60 J = 1, N + DO 50 I = 1, M + A( I, J ) = ALPHA + 50 CONTINUE + 60 CONTINUE + END IF +* +* Set the first min(M,N) diagonal elements to BETA. +* + DO 70 I = 1, MIN( M, N ) + A( I, I ) = BETA + 70 CONTINUE +* + RETURN +* +* End of SLASET +* + END diff --git a/dspl/liblapack/SRC/slasq1.f b/dspl/liblapack/SRC/slasq1.f new file mode 100644 index 0000000..458aece --- /dev/null +++ b/dspl/liblapack/SRC/slasq1.f @@ -0,0 +1,224 @@ +*> \brief \b SLASQ1 computes the singular values of a real square bidiagonal matrix. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ1( N, D, E, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ1 computes the singular values of a real N-by-N bidiagonal +*> matrix with diagonal D and off-diagonal E. The singular values +*> are computed to high relative accuracy, in the absence of +*> denormalization, underflow and overflow. The algorithm was first +*> presented in +*> +*> "Accurate singular values and differential qd algorithms" by K. V. +*> Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230, +*> 1994, +*> +*> and the present implementation is described in "An implementation of +*> the dqds Algorithm (Positive Case)", LAPACK Working Note. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, D contains the diagonal elements of the +*> bidiagonal matrix whose SVD is desired. On normal exit, +*> D contains the singular values in decreasing order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, elements E(1:N-1) contain the off-diagonal elements +*> of the bidiagonal matrix whose SVD is desired. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop) On exit D and E +*> represent a matrix with the same singular values +*> which the calling subroutine could use to finish the +*> computation, or even feed back into SLASQ1 +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASQ1( N, D, E, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO + REAL EPS, SCALE, SAFMIN, SIGMN, SIGMX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAS2, SLASCL, SLASQ2, SLASRT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLASQ1', -INFO ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + D( 1 ) = ABS( D( 1 ) ) + RETURN + ELSE IF( N.EQ.2 ) THEN + CALL SLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX ) + D( 1 ) = SIGMX + D( 2 ) = SIGMN + RETURN + END IF +* +* Estimate the largest singular value. +* + SIGMX = ZERO + DO 10 I = 1, N - 1 + D( I ) = ABS( D( I ) ) + SIGMX = MAX( SIGMX, ABS( E( I ) ) ) + 10 CONTINUE + D( N ) = ABS( D( N ) ) +* +* Early return if SIGMX is zero (matrix is already diagonal). +* + IF( SIGMX.EQ.ZERO ) THEN + CALL SLASRT( 'D', N, D, IINFO ) + RETURN + END IF +* + DO 20 I = 1, N + SIGMX = MAX( SIGMX, D( I ) ) + 20 CONTINUE +* +* Copy D and E into WORK (in the Z format) and scale (squaring the +* input data makes scaling by a power of the radix pointless). +* + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SCALE = SQRT( EPS / SAFMIN ) + CALL SCOPY( N, D, 1, WORK( 1 ), 2 ) + CALL SCOPY( N-1, E, 1, WORK( 2 ), 2 ) + CALL SLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1, + $ IINFO ) +* +* Compute the q's and e's. +* + DO 30 I = 1, 2*N - 1 + WORK( I ) = WORK( I )**2 + 30 CONTINUE + WORK( 2*N ) = ZERO +* + CALL SLASQ2( N, WORK, INFO ) +* + IF( INFO.EQ.0 ) THEN + DO 40 I = 1, N + D( I ) = SQRT( WORK( I ) ) + 40 CONTINUE + CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + ELSE IF( INFO.EQ.2 ) THEN +* +* Maximum number of iterations exceeded. Move data from WORK +* into D and E so the calling subroutine can try to finish +* + DO I = 1, N + D( I ) = SQRT( WORK( 2*I-1 ) ) + E( I ) = SQRT( WORK( 2*I ) ) + END DO + CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO ) + CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, E, N, IINFO ) + END IF +* + RETURN +* +* End of SLASQ1 +* + END diff --git a/dspl/liblapack/SRC/slasq2.f b/dspl/liblapack/SRC/slasq2.f new file mode 100644 index 0000000..6e5f864 --- /dev/null +++ b/dspl/liblapack/SRC/slasq2.f @@ -0,0 +1,588 @@ +*> \brief \b SLASQ2 computes all the eigenvalues of the symmetric positive definite tridiagonal matrix associated with the qd Array Z to high relative accuracy. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ2( N, Z, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ2 computes all the eigenvalues of the symmetric positive +*> definite tridiagonal matrix associated with the qd array Z to high +*> relative accuracy are computed to high relative accuracy, in the +*> absence of denormalization, underflow and overflow. +*> +*> To see the relation of Z to the tridiagonal matrix, let L be a +*> unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and +*> let U be an upper bidiagonal matrix with 1's above and diagonal +*> Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the +*> symmetric tridiagonal to which it is similar. +*> +*> Note : SLASQ2 defines a logical variable, IEEE, which is true +*> on machines which follow ieee-754 floating-point standard in their +*> handling of infinities and NaNs, and false otherwise. This variable +*> is passed to SLASQ3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns in the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension ( 4*N ) +*> On entry Z holds the qd array. On exit, entries 1 to N hold +*> the eigenvalues in decreasing order, Z( 2*N+1 ) holds the +*> trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If +*> N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 ) +*> holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of +*> shifts that failed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if the i-th argument is a scalar and had an illegal +*> value, then INFO = -i, if the i-th argument is an +*> array and the j-entry had an illegal value, then +*> INFO = -(i*100+j) +*> > 0: the algorithm failed +*> = 1, a split was marked by a positive value in E +*> = 2, current block of Z not diagonalized after 100*N +*> iterations (in inner while loop). On exit Z holds +*> a qd array with the same eigenvalues as the given Z. +*> = 3, termination criterion of outer while loop not met +*> (program created more than N unreduced blocks) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Local Variables: I0:N0 defines a current unreduced segment of Z. +*> The shifts are accumulated in SIGMA. Iteration count is in ITER. +*> Ping-pong is controlled by PP (alternates between 0 and 1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLASQ2( N, Z, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, HALF, ONE, TWO, FOUR, HUNDRD + PARAMETER ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0, + $ TWO = 2.0E0, FOUR = 4.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL IEEE + INTEGER I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, + $ KMIN, N0, NBIG, NDIV, NFAIL, PP, SPLT, TTYPE, + $ I1, N1 + REAL D, DEE, DEEMIN, DESIG, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, E, EMAX, EMIN, EPS, G, OLDEMN, QMAX, + $ QMIN, S, SAFMIN, SIGMA, T, TAU, TEMP, TOL, + $ TOL2, TRACE, ZMAX, TEMPE, TEMPQ +* .. +* .. External Subroutines .. + EXTERNAL SLASQ3, SLASRT, XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* (in case SLASQ2 is not called by SLASQ1) +* + INFO = 0 + EPS = SLAMCH( 'Precision' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SLASQ2', 1 ) + RETURN + ELSE IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN +* +* 1-by-1 case. +* + IF( Z( 1 ).LT.ZERO ) THEN + INFO = -201 + CALL XERBLA( 'SLASQ2', 2 ) + END IF + RETURN + ELSE IF( N.EQ.2 ) THEN +* +* 2-by-2 case. +* + IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN + INFO = -2 + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN + D = Z( 3 ) + Z( 3 ) = Z( 1 ) + Z( 1 ) = D + END IF + Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 ) + IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN + T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) + S = Z( 3 )*( Z( 2 ) / T ) + IF( S.LE.T ) THEN + S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( 1 ) + ( S+Z( 2 ) ) + Z( 3 ) = Z( 3 )*( Z( 1 ) / T ) + Z( 1 ) = T + END IF + Z( 2 ) = Z( 3 ) + Z( 6 ) = Z( 2 ) + Z( 1 ) + RETURN + END IF +* +* Check for negative data and compute sums of q's and e's. +* + Z( 2*N ) = ZERO + EMIN = Z( 2 ) + QMAX = ZERO + ZMAX = ZERO + D = ZERO + E = ZERO +* + DO 10 K = 1, 2*( N-1 ), 2 + IF( Z( K ).LT.ZERO ) THEN + INFO = -( 200+K ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + ELSE IF( Z( K+1 ).LT.ZERO ) THEN + INFO = -( 200+K+1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( K ) + E = E + Z( K+1 ) + QMAX = MAX( QMAX, Z( K ) ) + EMIN = MIN( EMIN, Z( K+1 ) ) + ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) ) + 10 CONTINUE + IF( Z( 2*N-1 ).LT.ZERO ) THEN + INFO = -( 200+2*N-1 ) + CALL XERBLA( 'SLASQ2', 2 ) + RETURN + END IF + D = D + Z( 2*N-1 ) + QMAX = MAX( QMAX, Z( 2*N-1 ) ) + ZMAX = MAX( QMAX, ZMAX ) +* +* Check for diagonality. +* + IF( E.EQ.ZERO ) THEN + DO 20 K = 2, N + Z( K ) = Z( 2*K-1 ) + 20 CONTINUE + CALL SLASRT( 'D', N, Z, IINFO ) + Z( 2*N-1 ) = D + RETURN + END IF +* + TRACE = D + E +* +* Check for zero data. +* + IF( TRACE.EQ.ZERO ) THEN + Z( 2*N-1 ) = ZERO + RETURN + END IF +* +* Check whether the machine is IEEE conformable. +* +* IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND. +* $ ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 +* +* [11/15/2008] The case IEEE=.TRUE. has a problem in single precision with +* some the test matrices of type 16. The double precision code is fine. +* + IEEE = .FALSE. +* +* Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). +* + DO 30 K = 2*N, 2, -2 + Z( 2*K ) = ZERO + Z( 2*K-1 ) = Z( K ) + Z( 2*K-2 ) = ZERO + Z( 2*K-3 ) = Z( K-1 ) + 30 CONTINUE +* + I0 = 1 + N0 = N +* +* Reverse the qd-array, if warranted. +* + IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + 40 CONTINUE + END IF +* +* Initial split checking via dqd and Li's test. +* + PP = 0 +* + DO 80 K = 1, 2 +* + D = Z( 4*N0+PP-3 ) + DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4 + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + D = Z( I4-3 ) + ELSE + D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) ) + END IF + 50 CONTINUE +* +* dqd maps Z to ZZ plus Li's test. +* + EMIN = Z( 4*I0+PP+1 ) + D = Z( 4*I0+PP-3 ) + DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4 + Z( I4-2*PP-2 ) = D + Z( I4-1 ) + IF( Z( I4-1 ).LE.TOL2*D ) THEN + Z( I4-1 ) = -ZERO + Z( I4-2*PP-2 ) = D + Z( I4-2*PP ) = ZERO + D = Z( I4+1 ) + ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND. + $ SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN + TEMP = Z( I4+1 ) / Z( I4-2*PP-2 ) + Z( I4-2*PP ) = Z( I4-1 )*TEMP + D = D*TEMP + ELSE + Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) ) + D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) ) + END IF + EMIN = MIN( EMIN, Z( I4-2*PP ) ) + 60 CONTINUE + Z( 4*N0-PP-2 ) = D +* +* Now find qmax. +* + QMAX = Z( 4*I0-PP-2 ) + DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4 + QMAX = MAX( QMAX, Z( I4 ) ) + 70 CONTINUE +* +* Prepare for the next iteration on K. +* + PP = 1 - PP + 80 CONTINUE +* +* Initialise variables to pass to SLASQ3. +* + TTYPE = 0 + DMIN1 = ZERO + DMIN2 = ZERO + DN = ZERO + DN1 = ZERO + DN2 = ZERO + G = ZERO + TAU = ZERO +* + ITER = 2 + NFAIL = 0 + NDIV = 2*( N0-I0 ) +* + DO 160 IWHILA = 1, N + 1 + IF( N0.LT.1 ) + $ GO TO 170 +* +* While array unfinished do +* +* E(N0) holds the value of SIGMA when submatrix in I0:N0 +* splits from the rest of the array, but is negated. +* + DESIG = ZERO + IF( N0.EQ.N ) THEN + SIGMA = ZERO + ELSE + SIGMA = -Z( 4*N0-1 ) + END IF + IF( SIGMA.LT.ZERO ) THEN + INFO = 1 + RETURN + END IF +* +* Find last unreduced submatrix's top index I0, find QMAX and +* EMIN. Find Gershgorin-type bound if Q's much greater than E's. +* + EMAX = ZERO + IF( N0.GT.I0 ) THEN + EMIN = ABS( Z( 4*N0-5 ) ) + ELSE + EMIN = ZERO + END IF + QMIN = Z( 4*N0-3 ) + QMAX = QMIN + DO 90 I4 = 4*N0, 8, -4 + IF( Z( I4-5 ).LE.ZERO ) + $ GO TO 100 + IF( QMIN.GE.FOUR*EMAX ) THEN + QMIN = MIN( QMIN, Z( I4-3 ) ) + EMAX = MAX( EMAX, Z( I4-5 ) ) + END IF + QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) ) + EMIN = MIN( EMIN, Z( I4-5 ) ) + 90 CONTINUE + I4 = 4 +* + 100 CONTINUE + I0 = I4 / 4 + PP = 0 +* + IF( N0-I0.GT.1 ) THEN + DEE = Z( 4*I0-3 ) + DEEMIN = DEE + KMIN = I0 + DO 110 I4 = 4*I0+1, 4*N0-3, 4 + DEE = Z( I4 )*( DEE /( DEE+Z( I4-2 ) ) ) + IF( DEE.LE.DEEMIN ) THEN + DEEMIN = DEE + KMIN = ( I4+3 )/4 + END IF + 110 CONTINUE + IF( (KMIN-I0)*2.LT.N0-KMIN .AND. + $ DEEMIN.LE.HALF*Z(4*N0-3) ) THEN + IPN4 = 4*( I0+N0 ) + PP = 2 + DO 120 I4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( I4-3 ) + Z( I4-3 ) = Z( IPN4-I4-3 ) + Z( IPN4-I4-3 ) = TEMP + TEMP = Z( I4-2 ) + Z( I4-2 ) = Z( IPN4-I4-2 ) + Z( IPN4-I4-2 ) = TEMP + TEMP = Z( I4-1 ) + Z( I4-1 ) = Z( IPN4-I4-5 ) + Z( IPN4-I4-5 ) = TEMP + TEMP = Z( I4 ) + Z( I4 ) = Z( IPN4-I4-4 ) + Z( IPN4-I4-4 ) = TEMP + 120 CONTINUE + END IF + END IF +* +* Put -(initial shift) into DMIN. +* + DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) ) +* +* Now I0:N0 is unreduced. +* PP = 0 for ping, PP = 1 for pong. +* PP = 2 indicates that flipping was applied to the Z array and +* and that the tests for deflation upon entry in SLASQ3 +* should not be performed. +* + NBIG = 100*( N0-I0+1 ) + DO 140 IWHILB = 1, NBIG + IF( I0.GT.N0 ) + $ GO TO 150 +* +* While submatrix unfinished take a good dqds step. +* + CALL SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* + PP = 1 - PP +* +* When EMIN is very small check for splits. +* + IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN + IF( Z( 4*N0 ).LE.TOL2*QMAX .OR. + $ Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN + SPLT = I0 - 1 + QMAX = Z( 4*I0-3 ) + EMIN = Z( 4*I0-1 ) + OLDEMN = Z( 4*I0 ) + DO 130 I4 = 4*I0, 4*( N0-3 ), 4 + IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR. + $ Z( I4-1 ).LE.TOL2*SIGMA ) THEN + Z( I4-1 ) = -SIGMA + SPLT = I4 / 4 + QMAX = ZERO + EMIN = Z( I4+3 ) + OLDEMN = Z( I4+4 ) + ELSE + QMAX = MAX( QMAX, Z( I4+1 ) ) + EMIN = MIN( EMIN, Z( I4-1 ) ) + OLDEMN = MIN( OLDEMN, Z( I4 ) ) + END IF + 130 CONTINUE + Z( 4*N0-1 ) = EMIN + Z( 4*N0 ) = OLDEMN + I0 = SPLT + 1 + END IF + END IF +* + 140 CONTINUE +* + INFO = 2 +* +* Maximum number of iterations exceeded, restore the shift +* SIGMA and place the new d's and e's in a qd array. +* This might need to be done for several blocks +* + I1 = I0 + N1 = N0 + 145 CONTINUE + TEMPQ = Z( 4*I0-3 ) + Z( 4*I0-3 ) = Z( 4*I0-3 ) + SIGMA + DO K = I0+1, N0 + TEMPE = Z( 4*K-5 ) + Z( 4*K-5 ) = Z( 4*K-5 ) * (TEMPQ / Z( 4*K-7 )) + TEMPQ = Z( 4*K-3 ) + Z( 4*K-3 ) = Z( 4*K-3 ) + SIGMA + TEMPE - Z( 4*K-5 ) + END DO +* +* Prepare to do this on the previous block if there is one +* + IF( I1.GT.1 ) THEN + N1 = I1-1 + DO WHILE( ( I1.GE.2 ) .AND. ( Z(4*I1-5).GE.ZERO ) ) + I1 = I1 - 1 + END DO + IF( I1.GE.1 ) THEN + SIGMA = -Z(4*N1-1) + GO TO 145 + END IF + END IF + + DO K = 1, N + Z( 2*K-1 ) = Z( 4*K-3 ) +* +* Only the block 1..N0 is unfinished. The rest of the e's +* must be essentially zero, although sometimes other data +* has been stored in them. +* + IF( K.LT.N0 ) THEN + Z( 2*K ) = Z( 4*K-1 ) + ELSE + Z( 2*K ) = 0 + END IF + END DO + RETURN +* +* end IWHILB +* + 150 CONTINUE +* + 160 CONTINUE +* + INFO = 3 + RETURN +* +* end IWHILA +* + 170 CONTINUE +* +* Move q's to the front. +* + DO 180 K = 2, N + Z( K ) = Z( 4*K-3 ) + 180 CONTINUE +* +* Sort and compute sum of eigenvalues. +* + CALL SLASRT( 'D', N, Z, IINFO ) +* + E = ZERO + DO 190 K = N, 1, -1 + E = E + Z( K ) + 190 CONTINUE +* +* Store trace, sum(eigenvalues) and information on performance. +* + Z( 2*N+1 ) = TRACE + Z( 2*N+2 ) = E + Z( 2*N+3 ) = REAL( ITER ) + Z( 2*N+4 ) = REAL( NDIV ) / REAL( N**2 ) + Z( 2*N+5 ) = HUNDRD*NFAIL / REAL( ITER ) + RETURN +* +* End of SLASQ2 +* + END diff --git a/dspl/liblapack/SRC/slasq3.f b/dspl/liblapack/SRC/slasq3.f new file mode 100644 index 0000000..4bc0647 --- /dev/null +++ b/dspl/liblapack/SRC/slasq3.f @@ -0,0 +1,421 @@ +*> \brief \b SLASQ3 checks for deflation, computes a shift and calls dqds. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, +* ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, +* DN2, G, TAU ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, ITER, N0, NDIV, NFAIL, PP +* REAL DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, +* $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. +*> In case of failure it changes shifts, and tries again until output +*> is positive. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in,out] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension ( 4*N0 ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in,out] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> PP=2 indicates that flipping was applied to the Z array +*> and that the initial tests for deflation should not be +*> performed. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is REAL +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] SIGMA +*> \verbatim +*> SIGMA is REAL +*> Sum of shifts used in current segment. +*> \endverbatim +*> +*> \param[in,out] DESIG +*> \verbatim +*> DESIG is REAL +*> Lower order part of SIGMA +*> \endverbatim +*> +*> \param[in] QMAX +*> \verbatim +*> QMAX is REAL +*> Maximum value of q. +*> \endverbatim +*> +*> \param[in,out] NFAIL +*> \verbatim +*> NFAIL is INTEGER +*> Increment NFAIL by 1 each time the shift was too big. +*> \endverbatim +*> +*> \param[in,out] ITER +*> \verbatim +*> ITER is INTEGER +*> Increment ITER by 1 for each iteration. +*> \endverbatim +*> +*> \param[in,out] NDIV +*> \verbatim +*> NDIV is INTEGER +*> Increment NDIV by 1 for each division. +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic (passed to SLASQ5). +*> \endverbatim +*> +*> \param[in,out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] DMIN1 +*> \verbatim +*> DMIN1 is REAL +*> \endverbatim +*> +*> \param[in,out] DMIN2 +*> \verbatim +*> DMIN2 is REAL +*> \endverbatim +*> +*> \param[in,out] DN +*> \verbatim +*> DN is REAL +*> \endverbatim +*> +*> \param[in,out] DN1 +*> \verbatim +*> DN1 is REAL +*> \endverbatim +*> +*> \param[in,out] DN2 +*> \verbatim +*> DN2 is REAL +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is REAL +*> \endverbatim +*> +*> \param[in,out] TAU +*> \verbatim +*> TAU is REAL +*> +*> These are passed as arguments in order to save their values +*> between calls to SLASQ3. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL, + $ ITER, NDIV, IEEE, TTYPE, DMIN1, DMIN2, DN, DN1, + $ DN2, G, TAU ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, ITER, N0, NDIV, NFAIL, PP + REAL DESIG, DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, + $ QMAX, SIGMA, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL CBIAS + PARAMETER ( CBIAS = 1.50E0 ) + REAL ZERO, QURTR, HALF, ONE, TWO, HUNDRD + PARAMETER ( ZERO = 0.0E0, QURTR = 0.250E0, HALF = 0.5E0, + $ ONE = 1.0E0, TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER IPN4, J4, N0IN, NN, TTYPE + REAL EPS, S, T, TEMP, TOL, TOL2 +* .. +* .. External Subroutines .. + EXTERNAL SLASQ4, SLASQ5, SLASQ6 +* .. +* .. External Function .. + REAL SLAMCH + LOGICAL SISNAN + EXTERNAL SISNAN, SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + N0IN = N0 + EPS = SLAMCH( 'Precision' ) + TOL = EPS*HUNDRD + TOL2 = TOL**2 +* +* Check for deflation. +* + 10 CONTINUE +* + IF( N0.LT.I0 ) + $ RETURN + IF( N0.EQ.I0 ) + $ GO TO 20 + NN = 4*N0 + PP + IF( N0.EQ.( I0+1 ) ) + $ GO TO 40 +* +* Check whether E(N0-1) is negligible, 1 eigenvalue. +* + IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND. + $ Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) ) + $ GO TO 30 +* + 20 CONTINUE +* + Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA + N0 = N0 - 1 + GO TO 10 +* +* Check whether E(N0-2) is negligible, 2 eigenvalues. +* + 30 CONTINUE +* + IF( Z( NN-9 ).GT.TOL2*SIGMA .AND. + $ Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) ) + $ GO TO 50 +* + 40 CONTINUE +* + IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN + S = Z( NN-3 ) + Z( NN-3 ) = Z( NN-7 ) + Z( NN-7 ) = S + END IF + T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) ) + IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2.AND.T.NE.ZERO ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / T ) + IF( S.LE.T ) THEN + S = Z( NN-3 )*( Z( NN-5 ) / + $ ( T*( ONE+SQRT( ONE+S / T ) ) ) ) + ELSE + S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) ) + END IF + T = Z( NN-7 ) + ( S+Z( NN-5 ) ) + Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T ) + Z( NN-7 ) = T + END IF + Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA + Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA + N0 = N0 - 2 + GO TO 10 +* + 50 CONTINUE + IF( PP.EQ.2 ) + $ PP = 0 +* +* Reverse the qd-array, if warranted. +* + IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN + IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN + IPN4 = 4*( I0+N0 ) + DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4 + TEMP = Z( J4-3 ) + Z( J4-3 ) = Z( IPN4-J4-3 ) + Z( IPN4-J4-3 ) = TEMP + TEMP = Z( J4-2 ) + Z( J4-2 ) = Z( IPN4-J4-2 ) + Z( IPN4-J4-2 ) = TEMP + TEMP = Z( J4-1 ) + Z( J4-1 ) = Z( IPN4-J4-5 ) + Z( IPN4-J4-5 ) = TEMP + TEMP = Z( J4 ) + Z( J4 ) = Z( IPN4-J4-4 ) + Z( IPN4-J4-4 ) = TEMP + 60 CONTINUE + IF( N0-I0.LE.4 ) THEN + Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 ) + Z( 4*N0-PP ) = Z( 4*I0-PP ) + END IF + DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) ) + Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ), + $ Z( 4*I0+PP+3 ) ) + Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ), + $ Z( 4*I0-PP+4 ) ) + QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) ) + DMIN = -ZERO + END IF + END IF +* +* Choose a shift. +* + CALL SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1, + $ DN2, TAU, TTYPE, G ) +* +* Call dqds until DMIN > 0. +* + 70 CONTINUE +* + CALL SLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, IEEE, EPS ) +* + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 +* +* Check status. +* + IF( DMIN.GE.ZERO .AND. DMIN1.GE.ZERO ) THEN +* +* Success. +* + GO TO 90 +* + ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND. + $ Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND. + $ ABS( DN ).LT.TOL*SIGMA ) THEN +* +* Convergence hidden by negative DN. +* + Z( 4*( N0-1 )-PP+2 ) = ZERO + DMIN = ZERO + GO TO 90 + ELSE IF( DMIN.LT.ZERO ) THEN +* +* TAU too big. Select new TAU and try again. +* + NFAIL = NFAIL + 1 + IF( TTYPE.LT.-22 ) THEN +* +* Failed twice. Play it safe. +* + TAU = ZERO + ELSE IF( DMIN1.GT.ZERO ) THEN +* +* Late failure. Gives excellent shift. +* + TAU = ( TAU+DMIN )*( ONE-TWO*EPS ) + TTYPE = TTYPE - 11 + ELSE +* +* Early failure. Divide by 4. +* + TAU = QURTR*TAU + TTYPE = TTYPE - 12 + END IF + GO TO 70 + ELSE IF( SISNAN( DMIN ) ) THEN +* +* NaN. +* + IF( TAU.EQ.ZERO ) THEN + GO TO 80 + ELSE + TAU = ZERO + GO TO 70 + END IF + ELSE +* +* Possible underflow. Play it safe. +* + GO TO 80 + END IF +* +* Risk of underflow. +* + 80 CONTINUE + CALL SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 ) + NDIV = NDIV + ( N0-I0+2 ) + ITER = ITER + 1 + TAU = ZERO +* + 90 CONTINUE + IF( TAU.LT.SIGMA ) THEN + DESIG = DESIG + TAU + T = SIGMA + DESIG + DESIG = DESIG - ( T-SIGMA ) + ELSE + T = SIGMA + TAU + DESIG = SIGMA - ( T-TAU ) + DESIG + END IF + SIGMA = T +* + RETURN +* +* End of SLASQ3 +* + END diff --git a/dspl/liblapack/SRC/slasq4.f b/dspl/liblapack/SRC/slasq4.f new file mode 100644 index 0000000..9931783 --- /dev/null +++ b/dspl/liblapack/SRC/slasq4.f @@ -0,0 +1,424 @@ +*> \brief \b SLASQ4 computes an approximation to the smallest eigenvalue using values of d from the previous transform. Used by sbdsqr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, +* DN1, DN2, TAU, TTYPE, G ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, N0IN, PP, TTYPE +* REAL DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ4 computes an approximation TAU to the smallest eigenvalue +*> using values of d from the previous transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( 4*N0 ) +*> Z holds the qd array. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] N0IN +*> \verbatim +*> N0IN is INTEGER +*> The value of N0 at start of EIGTEST. +*> \endverbatim +*> +*> \param[in] DMIN +*> \verbatim +*> DMIN is REAL +*> Minimum value of d. +*> \endverbatim +*> +*> \param[in] DMIN1 +*> \verbatim +*> DMIN1 is REAL +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[in] DMIN2 +*> \verbatim +*> DMIN2 is REAL +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[in] DN +*> \verbatim +*> DN is REAL +*> d(N) +*> \endverbatim +*> +*> \param[in] DN1 +*> \verbatim +*> DN1 is REAL +*> d(N-1) +*> \endverbatim +*> +*> \param[in] DN2 +*> \verbatim +*> DN2 is REAL +*> d(N-2) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL +*> This is the shift. +*> \endverbatim +*> +*> \param[out] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> Shift type. +*> \endverbatim +*> +*> \param[in,out] G +*> \verbatim +*> G is REAL +*> G is passed as an argument in order to save its value between +*> calls to SLASQ4. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> CNST1 = 9/16 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, + $ DN1, DN2, TAU, TTYPE, G ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER I0, N0, N0IN, PP, TTYPE + REAL DMIN, DMIN1, DMIN2, DN, DN1, DN2, G, TAU +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL CNST1, CNST2, CNST3 + PARAMETER ( CNST1 = 0.5630E0, CNST2 = 1.010E0, + $ CNST3 = 1.050E0 ) + REAL QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD + PARAMETER ( QURTR = 0.250E0, THIRD = 0.3330E0, + $ HALF = 0.50E0, ZERO = 0.0E0, ONE = 1.0E0, + $ TWO = 2.0E0, HUNDRD = 100.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I4, NN, NP + REAL A2, B1, B2, GAM, GAP1, GAP2, S +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* A negative DMIN forces the shift to take that absolute value +* TTYPE records the type of shift. +* + IF( DMIN.LE.ZERO ) THEN + TAU = -DMIN + TTYPE = -1 + RETURN + END IF +* + NN = 4*N0 + PP + IF( N0IN.EQ.N0 ) THEN +* +* No eigenvalues deflated. +* + IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN +* + B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) ) + B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) ) + A2 = Z( NN-7 ) + Z( NN-5 ) +* +* Cases 2 and 3. +* + IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN + GAP2 = DMIN2 - A2 - DMIN2*QURTR + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN + GAP1 = A2 - DN - ( B2 / GAP2 )*B2 + ELSE + GAP1 = A2 - DN - ( B1+B2 ) + END IF + IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN + S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN ) + TTYPE = -2 + ELSE + S = ZERO + IF( DN.GT.B1 ) + $ S = DN - B1 + IF( A2.GT.( B1+B2 ) ) + $ S = MIN( S, A2-( B1+B2 ) ) + S = MAX( S, THIRD*DMIN ) + TTYPE = -3 + END IF + ELSE +* +* Case 4. +* + TTYPE = -4 + S = QURTR*DMIN + IF( DMIN.EQ.DN ) THEN + GAM = DN + A2 = ZERO + IF( Z( NN-5 ) .GT. Z( NN-7 ) ) + $ RETURN + B2 = Z( NN-5 ) / Z( NN-7 ) + NP = NN - 9 + ELSE + NP = NN - 2*PP + GAM = DN1 + IF( Z( NP-4 ) .GT. Z( NP-2 ) ) + $ RETURN + A2 = Z( NP-4 ) / Z( NP-2 ) + IF( Z( NN-9 ) .GT. Z( NN-11 ) ) + $ RETURN + B2 = Z( NN-9 ) / Z( NN-11 ) + NP = NN - 13 + END IF +* +* Approximate contribution to norm squared from I < NN-1. +* + A2 = A2 + B2 + DO 10 I4 = NP, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 20 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 20 + 10 CONTINUE + 20 CONTINUE + A2 = CNST3*A2 +* +* Rayleigh quotient residual bound. +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + END IF + ELSE IF( DMIN.EQ.DN2 ) THEN +* +* Case 5. +* + TTYPE = -5 + S = QURTR*DMIN +* +* Compute contribution to norm squared from I > NN-2. +* + NP = NN - 2*PP + B1 = Z( NP-2 ) + B2 = Z( NP-6 ) + GAM = DN2 + IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 ) + $ RETURN + A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 ) +* +* Approximate contribution to norm squared from I < NN-2. +* + IF( N0-I0.GT.2 ) THEN + B2 = Z( NN-13 ) / Z( NN-15 ) + A2 = A2 + B2 + DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4 + IF( B2.EQ.ZERO ) + $ GO TO 40 + B1 = B2 + IF( Z( I4 ) .GT. Z( I4-2 ) ) + $ RETURN + B2 = B2*( Z( I4 ) / Z( I4-2 ) ) + A2 = A2 + B2 + IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) + $ GO TO 40 + 30 CONTINUE + 40 CONTINUE + A2 = CNST3*A2 + END IF +* + IF( A2.LT.CNST1 ) + $ S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 ) + ELSE +* +* Case 6, no information to guide us. +* + IF( TTYPE.EQ.-6 ) THEN + G = G + THIRD*( ONE-G ) + ELSE IF( TTYPE.EQ.-18 ) THEN + G = QURTR*THIRD + ELSE + G = QURTR + END IF + S = G*DMIN + TTYPE = -6 + END IF +* + ELSE IF( N0IN.EQ.( N0+1 ) ) THEN +* +* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. +* + IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN +* +* Cases 7 and 8. +* + TTYPE = -7 + S = THIRD*DMIN1 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 60 + DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + A2 = B1 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) + $ GO TO 60 + 50 CONTINUE + 60 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN1 / ( ONE+B2**2 ) + GAP2 = HALF*DMIN2 - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + TTYPE = -8 + END IF + ELSE +* +* Case 9. +* + S = QURTR*DMIN1 + IF( DMIN1.EQ.DN1 ) + $ S = HALF*DMIN1 + TTYPE = -9 + END IF +* + ELSE IF( N0IN.EQ.( N0+2 ) ) THEN +* +* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. +* +* Cases 10 and 11. +* + IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN + TTYPE = -10 + S = THIRD*DMIN2 + IF( Z( NN-5 ).GT.Z( NN-7 ) ) + $ RETURN + B1 = Z( NN-5 ) / Z( NN-7 ) + B2 = B1 + IF( B2.EQ.ZERO ) + $ GO TO 80 + DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4 + IF( Z( I4 ).GT.Z( I4-2 ) ) + $ RETURN + B1 = B1*( Z( I4 ) / Z( I4-2 ) ) + B2 = B2 + B1 + IF( HUNDRD*B1.LT.B2 ) + $ GO TO 80 + 70 CONTINUE + 80 CONTINUE + B2 = SQRT( CNST3*B2 ) + A2 = DMIN2 / ( ONE+B2**2 ) + GAP2 = Z( NN-7 ) + Z( NN-9 ) - + $ SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2 + IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN + S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) ) + ELSE + S = MAX( S, A2*( ONE-CNST2*B2 ) ) + END IF + ELSE + S = QURTR*DMIN2 + TTYPE = -11 + END IF + ELSE IF( N0IN.GT.( N0+2 ) ) THEN +* +* Case 12, more than two eigenvalues deflated. No information. +* + S = ZERO + TTYPE = -12 + END IF +* + TAU = S + RETURN +* +* End of SLASQ4 +* + END diff --git a/dspl/liblapack/SRC/slasq5.f b/dspl/liblapack/SRC/slasq5.f new file mode 100644 index 0000000..967e0a8 --- /dev/null +++ b/dspl/liblapack/SRC/slasq5.f @@ -0,0 +1,411 @@ +*> \brief SLASQ5 computes one dqds transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2, IEEE, EPS ) +* +* .. Scalar Arguments .. +* LOGICAL IEEE +* INTEGER I0, N0, PP +* REAL EPS, DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, SIGMA, TAU +* .. +* .. Array Arguments .. +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ5 computes one dqds transform in ping-pong form, one +*> version for IEEE machines another for non IEEE machines. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL +*> This is the shift. +*> \endverbatim +*> +*> \param[in] SIGMA +*> \verbatim +*> SIGMA is REAL +*> This is the accumulated shift up to this step. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is REAL +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is REAL +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is REAL +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is REAL +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is REAL +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is REAL +*> d(N0-2). +*> \endverbatim +*> +*> \param[in] IEEE +*> \verbatim +*> IEEE is LOGICAL +*> Flag for IEEE or non IEEE arithmetic. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is REAL +*> This is the value of epsilon used. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASQ5( I0, N0, Z, PP, TAU, SIGMA, DMIN, DMIN1, DMIN2, + $ DN, DNM1, DNM2, IEEE, EPS ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL IEEE + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU, + $ SIGMA, EPS +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO, HALF + PARAMETER ( ZERO = 0.0E0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, TEMP, DTHRESH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + DTHRESH = EPS*(SIGMA+TAU) + IF( TAU.LT.DTHRESH*HALF ) TAU = ZERO + IF( TAU.NE.ZERO ) THEN + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) +* + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 30 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 30 CONTINUE + ELSE + DO 40 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 40 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF +* + ELSE +* This is the version that sets d's to zero if they are small enough + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) - TAU + DMIN = D + DMIN1 = -Z( J4 ) + IF( IEEE ) THEN +* +* Code for IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 50 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + TEMP = Z( J4+1 ) / Z( J4-2 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4 ) = Z( J4-1 )*TEMP + EMIN = MIN( Z( J4 ), EMIN ) + 50 CONTINUE + ELSE + DO 60 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + TEMP = Z( J4+2 ) / Z( J4-3 ) + D = D*TEMP - TAU + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + Z( J4-1 ) = Z( J4 )*TEMP + EMIN = MIN( Z( J4-1 ), EMIN ) + 60 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + DMIN = MIN( DMIN, DN ) +* + ELSE +* +* Code for non IEEE arithmetic. +* + IF( PP.EQ.0 ) THEN + DO 70 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU + END IF + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 70 CONTINUE + ELSE + DO 80 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( D.LT.ZERO ) THEN + RETURN + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU + END IF + IF( D.LT.DTHRESH ) D = ZERO + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 80 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( DNM2.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( DNM1.LT.ZERO ) THEN + RETURN + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU + END IF + DMIN = MIN( DMIN, DN ) +* + END IF +* + END IF + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ5 +* + END diff --git a/dspl/liblapack/SRC/slasq6.f b/dspl/liblapack/SRC/slasq6.f new file mode 100644 index 0000000..afb9b81 --- /dev/null +++ b/dspl/liblapack/SRC/slasq6.f @@ -0,0 +1,254 @@ +*> \brief \b SLASQ6 computes one dqd transform in ping-pong form. Used by sbdsqr and sstegr. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASQ6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, +* DNM1, DNM2 ) +* +* .. Scalar Arguments .. +* INTEGER I0, N0, PP +* REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. +* REAL Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASQ6 computes one dqd (shift equal to zero) transform in +*> ping-pong form, with protection against underflow and overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] I0 +*> \verbatim +*> I0 is INTEGER +*> First index. +*> \endverbatim +*> +*> \param[in] N0 +*> \verbatim +*> N0 is INTEGER +*> Last index. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension ( 4*N ) +*> Z holds the qd array. EMIN is stored in Z(4*N0) to avoid +*> an extra argument. +*> \endverbatim +*> +*> \param[in] PP +*> \verbatim +*> PP is INTEGER +*> PP=0 for ping, PP=1 for pong. +*> \endverbatim +*> +*> \param[out] DMIN +*> \verbatim +*> DMIN is REAL +*> Minimum value of d. +*> \endverbatim +*> +*> \param[out] DMIN1 +*> \verbatim +*> DMIN1 is REAL +*> Minimum value of d, excluding D( N0 ). +*> \endverbatim +*> +*> \param[out] DMIN2 +*> \verbatim +*> DMIN2 is REAL +*> Minimum value of d, excluding D( N0 ) and D( N0-1 ). +*> \endverbatim +*> +*> \param[out] DN +*> \verbatim +*> DN is REAL +*> d(N0), the last value of d. +*> \endverbatim +*> +*> \param[out] DNM1 +*> \verbatim +*> DNM1 is REAL +*> d(N0-1). +*> \endverbatim +*> +*> \param[out] DNM2 +*> \verbatim +*> DNM2 is REAL +*> d(N0-2). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, + $ DNM1, DNM2 ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER I0, N0, PP + REAL DMIN, DMIN1, DMIN2, DN, DNM1, DNM2 +* .. +* .. Array Arguments .. + REAL Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameter .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER J4, J4P2 + REAL D, EMIN, SAFMIN, TEMP +* .. +* .. External Function .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( ( N0-I0-1 ).LE.0 ) + $ RETURN +* + SAFMIN = SLAMCH( 'Safe minimum' ) + J4 = 4*I0 + PP - 3 + EMIN = Z( J4+4 ) + D = Z( J4 ) + DMIN = D +* + IF( PP.EQ.0 ) THEN + DO 10 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-2 ) = D + Z( J4-1 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + D = Z( J4+1 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN + TEMP = Z( J4+1 ) / Z( J4-2 ) + Z( J4 ) = Z( J4-1 )*TEMP + D = D*TEMP + ELSE + Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) ) + D = Z( J4+1 )*( D / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4 ) ) + 10 CONTINUE + ELSE + DO 20 J4 = 4*I0, 4*( N0-3 ), 4 + Z( J4-3 ) = D + Z( J4 ) + IF( Z( J4-3 ).EQ.ZERO ) THEN + Z( J4-1 ) = ZERO + D = Z( J4+2 ) + DMIN = D + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND. + $ SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN + TEMP = Z( J4+2 ) / Z( J4-3 ) + Z( J4-1 ) = Z( J4 )*TEMP + D = D*TEMP + ELSE + Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) ) + D = Z( J4+2 )*( D / Z( J4-3 ) ) + END IF + DMIN = MIN( DMIN, D ) + EMIN = MIN( EMIN, Z( J4-1 ) ) + 20 CONTINUE + END IF +* +* Unroll last two steps. +* + DNM2 = D + DMIN2 = DMIN + J4 = 4*( N0-2 ) - PP + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM2 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DNM1 = Z( J4P2+2 ) + DMIN = DNM1 + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DNM1 = DNM2*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DNM1 ) +* + DMIN1 = DMIN + J4 = J4 + 4 + J4P2 = J4 + 2*PP - 1 + Z( J4-2 ) = DNM1 + Z( J4P2 ) + IF( Z( J4-2 ).EQ.ZERO ) THEN + Z( J4 ) = ZERO + DN = Z( J4P2+2 ) + DMIN = DN + EMIN = ZERO + ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND. + $ SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN + TEMP = Z( J4P2+2 ) / Z( J4-2 ) + Z( J4 ) = Z( J4P2 )*TEMP + DN = DNM1*TEMP + ELSE + Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) ) + DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) + END IF + DMIN = MIN( DMIN, DN ) +* + Z( J4+2 ) = DN + Z( 4*N0-PP ) = EMIN + RETURN +* +* End of SLASQ6 +* + END diff --git a/dspl/liblapack/SRC/slasr.f b/dspl/liblapack/SRC/slasr.f new file mode 100644 index 0000000..6e18337 --- /dev/null +++ b/dspl/liblapack/SRC/slasr.f @@ -0,0 +1,436 @@ +*> \brief \b SLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASR applies a sequence of plane rotations to a real matrix A, +*> from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + REAL CTEMP, STEMP, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of SLASR +* + END diff --git a/dspl/liblapack/SRC/slasrt.f b/dspl/liblapack/SRC/slasrt.f new file mode 100644 index 0000000..ef6aa4d --- /dev/null +++ b/dspl/liblapack/SRC/slasrt.f @@ -0,0 +1,303 @@ +*> \brief \b SLASRT sorts numbers in increasing or decreasing order. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASRT( ID, N, D, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ID +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Sort the numbers in D in increasing order (if ID = 'I') or +*> in decreasing order (if ID = 'D' ). +*> +*> Use Quick Sort, reverting to Insertion sort on arrays of +*> size <= 20. Dimension of STACK limits N to about 2**32. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ID +*> \verbatim +*> ID is CHARACTER*1 +*> = 'I': sort D in increasing order; +*> = 'D': sort D in decreasing order. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the array D. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the array to be sorted. +*> On exit, D has been sorted into increasing order +*> (D(1) <= ... <= D(N) ) or into decreasing order +*> (D(1) >= ... >= D(N) ), depending on ID. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SLASRT( ID, N, D, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ID + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER SELECT + PARAMETER ( SELECT = 20 ) +* .. +* .. Local Scalars .. + INTEGER DIR, ENDD, I, J, START, STKPNT + REAL D1, D2, D3, DMNMX, TMP +* .. +* .. Local Arrays .. + INTEGER STACK( 2, 32 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DIR = -1 + IF( LSAME( ID, 'D' ) ) THEN + DIR = 0 + ELSE IF( LSAME( ID, 'I' ) ) THEN + DIR = 1 + END IF + IF( DIR.EQ.-1 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + STKPNT = 1 + STACK( 1, 1 ) = 1 + STACK( 2, 1 ) = N + 10 CONTINUE + START = STACK( 1, STKPNT ) + ENDD = STACK( 2, STKPNT ) + STKPNT = STKPNT - 1 + IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN +* +* Do Insertion sort on D( START:ENDD ) +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + DO 30 I = START + 1, ENDD + DO 20 J = I, START + 1, -1 + IF( D( J ).GT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE +* + ELSE +* +* Sort into increasing order +* + DO 50 I = START + 1, ENDD + DO 40 J = I, START + 1, -1 + IF( D( J ).LT.D( J-1 ) ) THEN + DMNMX = D( J ) + D( J ) = D( J-1 ) + D( J-1 ) = DMNMX + ELSE + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + END IF +* + ELSE IF( ENDD-START.GT.SELECT ) THEN +* +* Partition D( START:ENDD ) and stack parts, largest one first +* +* Choose partition entry as median of 3 +* + D1 = D( START ) + D2 = D( ENDD ) + I = ( START+ENDD ) / 2 + D3 = D( I ) + IF( D1.LT.D2 ) THEN + IF( D3.LT.D1 ) THEN + DMNMX = D1 + ELSE IF( D3.LT.D2 ) THEN + DMNMX = D3 + ELSE + DMNMX = D2 + END IF + ELSE + IF( D3.LT.D2 ) THEN + DMNMX = D2 + ELSE IF( D3.LT.D1 ) THEN + DMNMX = D3 + ELSE + DMNMX = D1 + END IF + END IF +* + IF( DIR.EQ.0 ) THEN +* +* Sort into decreasing order +* + I = START - 1 + J = ENDD + 1 + 60 CONTINUE + 70 CONTINUE + J = J - 1 + IF( D( J ).LT.DMNMX ) + $ GO TO 70 + 80 CONTINUE + I = I + 1 + IF( D( I ).GT.DMNMX ) + $ GO TO 80 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 60 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + ELSE +* +* Sort into increasing order +* + I = START - 1 + J = ENDD + 1 + 90 CONTINUE + 100 CONTINUE + J = J - 1 + IF( D( J ).GT.DMNMX ) + $ GO TO 100 + 110 CONTINUE + I = I + 1 + IF( D( I ).LT.DMNMX ) + $ GO TO 110 + IF( I.LT.J ) THEN + TMP = D( I ) + D( I ) = D( J ) + D( J ) = TMP + GO TO 90 + END IF + IF( J-START.GT.ENDD-J-1 ) THEN + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + ELSE + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = J + 1 + STACK( 2, STKPNT ) = ENDD + STKPNT = STKPNT + 1 + STACK( 1, STKPNT ) = START + STACK( 2, STKPNT ) = J + END IF + END IF + END IF + IF( STKPNT.GT.0 ) + $ GO TO 10 + RETURN +* +* End of SLASRT +* + END diff --git a/dspl/liblapack/SRC/slassq.f b/dspl/liblapack/SRC/slassq.f new file mode 100644 index 0000000..35b40f0 --- /dev/null +++ b/dspl/liblapack/SRC/slassq.f @@ -0,0 +1,155 @@ +*> \brief \b SLASSQ updates a sum of squares represented in scaled form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASSQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. +* REAL X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASSQ returns the values scl and smsq such that +*> +*> ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +*> +*> where x( i ) = X( 1 + ( i - 1 )*INCX ). The value of sumsq is +*> assumed to be non-negative and scl returns the value +*> +*> scl = max( scale, abs( x( i ) ) ). +*> +*> scale and sumsq must be supplied in SCALE and SUMSQ and +*> scl and smsq are overwritten on SCALE and SUMSQ respectively. +*> +*> The routine makes only one pass through the vector x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements to be used from the vector X. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (N) +*> The vector for which a scaled sum of squares is computed. +*> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> INCX > 0. +*> \endverbatim +*> +*> \param[in,out] SCALE +*> \verbatim +*> SCALE is REAL +*> On entry, the value scale in the equation above. +*> On exit, SCALE is overwritten with scl , the scaling factor +*> for the sum of squares. +*> \endverbatim +*> +*> \param[in,out] SUMSQ +*> \verbatim +*> SUMSQ is REAL +*> On entry, the value sumsq in the equation above. +*> On exit, SUMSQ is overwritten with smsq , the basic sum of +*> squares from which scl has been factored out. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SCALE, SUMSQ +* .. +* .. Array Arguments .. + REAL X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + REAL ABSXI +* .. +* .. External Functions .. + LOGICAL SISNAN + EXTERNAL SISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + ABSXI = ABS( X( IX ) ) + IF( ABSXI.GT.ZERO.OR.SISNAN( ABSXI ) ) THEN + IF( SCALE.LT.ABSXI ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2 + SCALE = ABSXI + ELSE + SUMSQ = SUMSQ + ( ABSXI / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF + RETURN +* +* End of SLASSQ +* + END diff --git a/dspl/liblapack/SRC/slasv2.f b/dspl/liblapack/SRC/slasv2.f new file mode 100644 index 0000000..3cd556e --- /dev/null +++ b/dspl/liblapack/SRC/slasv2.f @@ -0,0 +1,325 @@ +*> \brief \b SLASV2 computes the singular value decomposition of a 2-by-2 triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* .. Scalar Arguments .. +* REAL CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASV2 computes the singular value decomposition of a 2-by-2 +*> triangular matrix +*> [ F G ] +*> [ 0 H ]. +*> On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the +*> smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and +*> right singular vectors for abs(SSMAX), giving the decomposition +*> +*> [ CSL SNL ] [ F G ] [ CSR -SNR ] = [ SSMAX 0 ] +*> [-SNL CSL ] [ 0 H ] [ SNR CSR ] [ 0 SSMIN ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is REAL +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is REAL +*> The (1,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is REAL +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is REAL +*> abs(SSMIN) is the smaller singular value. +*> \endverbatim +*> +*> \param[out] SSMAX +*> \verbatim +*> SSMAX is REAL +*> abs(SSMAX) is the larger singular value. +*> \endverbatim +*> +*> \param[out] SNL +*> \verbatim +*> SNL is REAL +*> \endverbatim +*> +*> \param[out] CSL +*> \verbatim +*> CSL is REAL +*> The vector (CSL, SNL) is a unit left singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +*> +*> \param[out] SNR +*> \verbatim +*> SNR is REAL +*> \endverbatim +*> +*> \param[out] CSR +*> \verbatim +*> CSR is REAL +*> The vector (CSR, SNR) is a unit right singular vector for the +*> singular value abs(SSMAX). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Any input parameter may be aliased with any output parameter. +*> +*> Barring over/underflow and assuming a guard digit in subtraction, all +*> output quantities are correct to within a few units in the last +*> place (ulps). +*> +*> In IEEE arithmetic, the code works correctly if one matrix element is +*> infinite. +*> +*> Overflow will not occur unless the largest singular value itself +*> overflows or is within a few ulps of overflow. (On machines with +*> partial overflow, like the Cray, overflow may occur if the largest +*> singular value is within a factor of 2 of overflow.) +*> +*> Underflow is harmless if underflow is gradual. Otherwise, results +*> may correspond to a matrix modified by perturbations of size near +*> the underflow threshold. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E0 ) + REAL HALF + PARAMETER ( HALF = 0.5E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) + REAL TWO + PARAMETER ( TWO = 2.0E0 ) + REAL FOUR + PARAMETER ( FOUR = 4.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL GASMAL, SWAP + INTEGER PMAX + REAL A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M, + $ MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + FT = F + FA = ABS( FT ) + HT = H + HA = ABS( H ) +* +* PMAX points to the maximum absolute element of matrix +* PMAX = 1 if F largest in absolute values +* PMAX = 2 if G largest in absolute values +* PMAX = 3 if H largest in absolute values +* + PMAX = 1 + SWAP = ( HA.GT.FA ) + IF( SWAP ) THEN + PMAX = 3 + TEMP = FT + FT = HT + HT = TEMP + TEMP = FA + FA = HA + HA = TEMP +* +* Now FA .ge. HA +* + END IF + GT = G + GA = ABS( GT ) + IF( GA.EQ.ZERO ) THEN +* +* Diagonal matrix +* + SSMIN = HA + SSMAX = FA + CLT = ONE + CRT = ONE + SLT = ZERO + SRT = ZERO + ELSE + GASMAL = .TRUE. + IF( GA.GT.FA ) THEN + PMAX = 2 + IF( ( FA / GA ).LT.SLAMCH( 'EPS' ) ) THEN +* +* Case of very large GA +* + GASMAL = .FALSE. + SSMAX = GA + IF( HA.GT.ONE ) THEN + SSMIN = FA / ( GA / HA ) + ELSE + SSMIN = ( FA / GA )*HA + END IF + CLT = ONE + SLT = HT / GT + SRT = ONE + CRT = FT / GT + END IF + END IF + IF( GASMAL ) THEN +* +* Normal case +* + D = FA - HA + IF( D.EQ.FA ) THEN +* +* Copes with infinite F or H +* + L = ONE + ELSE + L = D / FA + END IF +* +* Note that 0 .le. L .le. 1 +* + M = GT / FT +* +* Note that abs(M) .le. 1/macheps +* + T = TWO - L +* +* Note that T .ge. 1 +* + MM = M*M + TT = T*T + S = SQRT( TT+MM ) +* +* Note that 1 .le. S .le. 1 + 1/macheps +* + IF( L.EQ.ZERO ) THEN + R = ABS( M ) + ELSE + R = SQRT( L*L+MM ) + END IF +* +* Note that 0 .le. R .le. 1 + 1/macheps +* + A = HALF*( S+R ) +* +* Note that 1 .le. A .le. 1 + abs(M) +* + SSMIN = HA / A + SSMAX = FA*A + IF( MM.EQ.ZERO ) THEN +* +* Note that M is very tiny +* + IF( L.EQ.ZERO ) THEN + T = SIGN( TWO, FT )*SIGN( ONE, GT ) + ELSE + T = GT / SIGN( D, FT ) + M / T + END IF + ELSE + T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A ) + END IF + L = SQRT( T*T+FOUR ) + CRT = TWO / L + SRT = T / L + CLT = ( CRT+SRT*M ) / A + SLT = ( HT / FT )*SRT / A + END IF + END IF + IF( SWAP ) THEN + CSL = SRT + SNL = CRT + CSR = SLT + SNR = CLT + ELSE + CSL = CLT + SNL = SLT + CSR = CRT + SNR = SRT + END IF +* +* Correct signs of SSMAX and SSMIN +* + IF( PMAX.EQ.1 ) + $ TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F ) + IF( PMAX.EQ.2 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G ) + IF( PMAX.EQ.3 ) + $ TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H ) + SSMAX = SIGN( SSMAX, TSIGN ) + SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) ) + RETURN +* +* End of SLASV2 +* + END diff --git a/dspl/liblapack/SRC/slaswlq.f b/dspl/liblapack/SRC/slaswlq.f new file mode 100644 index 0000000..27b5b80 --- /dev/null +++ b/dspl/liblapack/SRC/slaswlq.f @@ -0,0 +1,258 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASWLQ computes a blocked Short-Wide LQ factorization of a +*> M-by-N matrix A, where N >= M: +*> A = L * Q +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal +*> of the array contain the N-by-N lower triangular matrix L; +*> the elements above the diagonal represent Q by the rows +*> of blocked V (see Further Details). +*> +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((N-M)/(NB-M)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= MB * M. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, + $ INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ), T( LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL SGELQT, SGEQRT, STPLQT, STPQRT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.LT.M ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.M .AND. M.GT.0 )) THEN + INFO = -3 + ELSE IF( NB.LE.M ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -8 + ELSE IF( ( LWORK.LT.M*MB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = MB*M + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLASWLQ', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF((M.GE.N).OR.(NB.LE.M).OR.(NB.GE.N)) THEN + CALL SGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF +* + KK = MOD((N-M),(NB-M)) + II=N-KK+1 +* +* Compute the LQ factorization of the first block A(1:M,1:NB) +* + CALL SGELQT( M, NB, MB, A(1,1), LDA, T, LDT, WORK, INFO) + CTR = 1 +* + DO I = NB+1, II-NB+M , (NB-M) +* +* Compute the QR factorization of the current block A(1:M,I:I+NB-M) +* + CALL STPLQT( M, NB-M, 0, MB, A(1,1), LDA, A( 1, I ), + $ LDA, T(1, CTR * M + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(1:M,II:N) +* + IF (II.LE.N) THEN + CALL STPLQT( M, KK, 0, MB, A(1,1), LDA, A( 1, II ), + $ LDA, T(1, CTR * M + 1), LDT, + $ WORK, INFO ) + END IF +* + WORK( 1 ) = M * MB + RETURN +* +* End of SLASWLQ +* + END diff --git a/dspl/liblapack/SRC/slaswp.f b/dspl/liblapack/SRC/slaswp.f new file mode 100644 index 0000000..4fcef5b --- /dev/null +++ b/dspl/liblapack/SRC/slaswp.f @@ -0,0 +1,193 @@ +*> \brief \b SLASWP performs a series of row interchanges on a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASWP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASWP performs a series of row interchanges on the matrix A. +*> One row interchange is initiated for each of rows K1 through K2 of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the matrix of column dimension N to which the row +*> interchanges will be applied. +*> On exit, the permuted matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> \endverbatim +*> +*> \param[in] K1 +*> \verbatim +*> K1 is INTEGER +*> The first element of IPIV for which a row interchange will +*> be done. +*> \endverbatim +*> +*> \param[in] K2 +*> \verbatim +*> K2 is INTEGER +*> (K2-K1+1) is the number of elements of IPIV for which a row +*> interchange will be done. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) +*> The vector of pivot indices. Only the elements in positions +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of IPIV. If INCX +*> is negative, the pivots are applied in reverse order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by +*> R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + REAL TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = K1 + ( K1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of SLASWP +* + END diff --git a/dspl/liblapack/SRC/slasy2.f b/dspl/liblapack/SRC/slasy2.f new file mode 100644 index 0000000..72b835e --- /dev/null +++ b/dspl/liblapack/SRC/slasy2.f @@ -0,0 +1,482 @@ +*> \brief \b SLASY2 solves the Sylvester matrix equation where the matrices are of order 1 or 2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, +* LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL LTRANL, LTRANR +* INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 +* REAL SCALE, XNORM +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in +*> +*> op(TL)*X + ISGN*X*op(TR) = SCALE*B, +*> +*> where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or +*> -1. op(T) = T or T**T, where T**T denotes the transpose of T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] LTRANL +*> \verbatim +*> LTRANL is LOGICAL +*> On entry, LTRANL specifies the op(TL): +*> = .FALSE., op(TL) = TL, +*> = .TRUE., op(TL) = TL**T. +*> \endverbatim +*> +*> \param[in] LTRANR +*> \verbatim +*> LTRANR is LOGICAL +*> On entry, LTRANR specifies the op(TR): +*> = .FALSE., op(TR) = TR, +*> = .TRUE., op(TR) = TR**T. +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> On entry, ISGN specifies the sign of the equation +*> as described before. ISGN may only be 1 or -1. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> On entry, N1 specifies the order of matrix TL. +*> N1 may only be 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> On entry, N2 specifies the order of matrix TR. +*> N2 may only be 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] TL +*> \verbatim +*> TL is REAL array, dimension (LDTL,2) +*> On entry, TL contains an N1 by N1 matrix. +*> \endverbatim +*> +*> \param[in] LDTL +*> \verbatim +*> LDTL is INTEGER +*> The leading dimension of the matrix TL. LDTL >= max(1,N1). +*> \endverbatim +*> +*> \param[in] TR +*> \verbatim +*> TR is REAL array, dimension (LDTR,2) +*> On entry, TR contains an N2 by N2 matrix. +*> \endverbatim +*> +*> \param[in] LDTR +*> \verbatim +*> LDTR is INTEGER +*> The leading dimension of the matrix TR. LDTR >= max(1,N2). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,2) +*> On entry, the N1 by N2 matrix B contains the right-hand +*> side of the equation. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1,N1). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> less than or equal to 1 to prevent the solution overflowing. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,2) +*> On exit, X contains the N1 by N2 solution. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the matrix X. LDX >= max(1,N1). +*> \endverbatim +*> +*> \param[out] XNORM +*> \verbatim +*> XNORM is REAL +*> On exit, XNORM is the infinity-norm of the solution. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO is set to +*> 0: successful exit. +*> 1: TL and TR have too close eigenvalues, so TL or +*> TR is perturbed to get a nonsingular equation. +*> NOTE: In the interests of speed, this routine does not +*> check the inputs for errors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYauxiliary +* +* ===================================================================== + SUBROUTINE SLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR, + $ LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL LTRANL, LTRANR + INTEGER INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2 + REAL SCALE, XNORM +* .. +* .. Array Arguments .. + REAL B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWO, HALF, EIGHT + PARAMETER ( TWO = 2.0E+0, HALF = 0.5E+0, EIGHT = 8.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL BSWAP, XSWAP + INTEGER I, IP, IPIV, IPSV, J, JP, JPSV, K + REAL BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1, + $ TEMP, U11, U12, U22, XMAX +* .. +* .. Local Arrays .. + LOGICAL BSWPIV( 4 ), XSWPIV( 4 ) + INTEGER JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ), + $ LOCU22( 4 ) + REAL BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Data statements .. + DATA LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / , + $ LOCU22 / 4, 3, 2, 1 / + DATA XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. / + DATA BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. / +* .. +* .. Executable Statements .. +* +* Do not check the input parameters for errors +* + INFO = 0 +* +* Quick return if possible +* + IF( N1.EQ.0 .OR. N2.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + SGN = ISGN +* + K = N1 + N1 + N2 - 2 + GO TO ( 10, 20, 30, 50 )K +* +* 1 by 1: TL11*X + SGN*X*TR11 = B11 +* + 10 CONTINUE + TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 ) + BET = ABS( TAU1 ) + IF( BET.LE.SMLNUM ) THEN + TAU1 = SMLNUM + BET = SMLNUM + INFO = 1 + END IF +* + SCALE = ONE + GAM = ABS( B( 1, 1 ) ) + IF( SMLNUM*GAM.GT.BET ) + $ SCALE = ONE / GAM +* + X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1 + XNORM = ABS( X( 1, 1 ) ) + RETURN +* +* 1 by 2: +* TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12] = [B11 B12] +* [TR21 TR22] +* + 20 CONTINUE +* + SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ), + $ ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + IF( LTRANR ) THEN + TMP( 2 ) = SGN*TR( 2, 1 ) + TMP( 3 ) = SGN*TR( 1, 2 ) + ELSE + TMP( 2 ) = SGN*TR( 1, 2 ) + TMP( 3 ) = SGN*TR( 2, 1 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 1, 2 ) + GO TO 40 +* +* 2 by 1: +* op[TL11 TL12]*[X11] + ISGN* [X11]*TR11 = [B11] +* [TL21 TL22] [X21] [X21] [B21] +* + 30 CONTINUE + SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ), + $ ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ), + $ SMLNUM ) + TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + IF( LTRANL ) THEN + TMP( 2 ) = TL( 1, 2 ) + TMP( 3 ) = TL( 2, 1 ) + ELSE + TMP( 2 ) = TL( 2, 1 ) + TMP( 3 ) = TL( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + 40 CONTINUE +* +* Solve 2 by 2 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + IPIV = ISAMAX( 4, TMP, 1 ) + U11 = TMP( IPIV ) + IF( ABS( U11 ).LE.SMIN ) THEN + INFO = 1 + U11 = SMIN + END IF + U12 = TMP( LOCU12( IPIV ) ) + L21 = TMP( LOCL21( IPIV ) ) / U11 + U22 = TMP( LOCU22( IPIV ) ) - U12*L21 + XSWAP = XSWPIV( IPIV ) + BSWAP = BSWPIV( IPIV ) + IF( ABS( U22 ).LE.SMIN ) THEN + INFO = 1 + U22 = SMIN + END IF + IF( BSWAP ) THEN + TEMP = BTMP( 2 ) + BTMP( 2 ) = BTMP( 1 ) - L21*TEMP + BTMP( 1 ) = TEMP + ELSE + BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 ) + END IF + SCALE = ONE + IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR. + $ ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN + SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + END IF + X2( 2 ) = BTMP( 2 ) / U22 + X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 ) + IF( XSWAP ) THEN + TEMP = X2( 2 ) + X2( 2 ) = X2( 1 ) + X2( 1 ) = TEMP + END IF + X( 1, 1 ) = X2( 1 ) + IF( N1.EQ.1 ) THEN + X( 1, 2 ) = X2( 2 ) + XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) ) + ELSE + X( 2, 1 ) = X2( 2 ) + XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) ) + END IF + RETURN +* +* 2 by 2: +* op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12] +* [TL21 TL22] [X21 X22] [X21 X22] [TR21 TR22] [B21 B22] +* +* Solve equivalent 4 by 4 system using complete pivoting. +* Set pivots less than SMIN to SMIN. +* + 50 CONTINUE + SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ), + $ ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ) + SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ), + $ ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ) + SMIN = MAX( EPS*SMIN, SMLNUM ) + BTMP( 1 ) = ZERO + CALL SCOPY( 16, BTMP, 0, T16, 1 ) + T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 ) + T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 ) + T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 ) + T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 ) + IF( LTRANL ) THEN + T16( 1, 2 ) = TL( 2, 1 ) + T16( 2, 1 ) = TL( 1, 2 ) + T16( 3, 4 ) = TL( 2, 1 ) + T16( 4, 3 ) = TL( 1, 2 ) + ELSE + T16( 1, 2 ) = TL( 1, 2 ) + T16( 2, 1 ) = TL( 2, 1 ) + T16( 3, 4 ) = TL( 1, 2 ) + T16( 4, 3 ) = TL( 2, 1 ) + END IF + IF( LTRANR ) THEN + T16( 1, 3 ) = SGN*TR( 1, 2 ) + T16( 2, 4 ) = SGN*TR( 1, 2 ) + T16( 3, 1 ) = SGN*TR( 2, 1 ) + T16( 4, 2 ) = SGN*TR( 2, 1 ) + ELSE + T16( 1, 3 ) = SGN*TR( 2, 1 ) + T16( 2, 4 ) = SGN*TR( 2, 1 ) + T16( 3, 1 ) = SGN*TR( 1, 2 ) + T16( 4, 2 ) = SGN*TR( 1, 2 ) + END IF + BTMP( 1 ) = B( 1, 1 ) + BTMP( 2 ) = B( 2, 1 ) + BTMP( 3 ) = B( 1, 2 ) + BTMP( 4 ) = B( 2, 2 ) +* +* Perform elimination +* + DO 100 I = 1, 3 + XMAX = ZERO + DO 70 IP = I, 4 + DO 60 JP = I, 4 + IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( T16( IP, JP ) ) + IPSV = IP + JPSV = JP + END IF + 60 CONTINUE + 70 CONTINUE + IF( IPSV.NE.I ) THEN + CALL SSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 ) + TEMP = BTMP( I ) + BTMP( I ) = BTMP( IPSV ) + BTMP( IPSV ) = TEMP + END IF + IF( JPSV.NE.I ) + $ CALL SSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 ) + JPIV( I ) = JPSV + IF( ABS( T16( I, I ) ).LT.SMIN ) THEN + INFO = 1 + T16( I, I ) = SMIN + END IF + DO 90 J = I + 1, 4 + T16( J, I ) = T16( J, I ) / T16( I, I ) + BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I ) + DO 80 K = I + 1, 4 + T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K ) + 80 CONTINUE + 90 CONTINUE + 100 CONTINUE + IF( ABS( T16( 4, 4 ) ).LT.SMIN ) THEN + INFO = 1 + T16( 4, 4 ) = SMIN + END IF + SCALE = ONE + IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR. + $ ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN + SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ), + $ ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) ) + BTMP( 1 ) = BTMP( 1 )*SCALE + BTMP( 2 ) = BTMP( 2 )*SCALE + BTMP( 3 ) = BTMP( 3 )*SCALE + BTMP( 4 ) = BTMP( 4 )*SCALE + END IF + DO 120 I = 1, 4 + K = 5 - I + TEMP = ONE / T16( K, K ) + TMP( K ) = BTMP( K )*TEMP + DO 110 J = K + 1, 4 + TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J ) + 110 CONTINUE + 120 CONTINUE + DO 130 I = 1, 3 + IF( JPIV( 4-I ).NE.4-I ) THEN + TEMP = TMP( 4-I ) + TMP( 4-I ) = TMP( JPIV( 4-I ) ) + TMP( JPIV( 4-I ) ) = TEMP + END IF + 130 CONTINUE + X( 1, 1 ) = TMP( 1 ) + X( 2, 1 ) = TMP( 2 ) + X( 1, 2 ) = TMP( 3 ) + X( 2, 2 ) = TMP( 4 ) + XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ), + $ ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) ) + RETURN +* +* End of SLASY2 +* + END diff --git a/dspl/liblapack/SRC/slasyf.f b/dspl/liblapack/SRC/slasyf.f new file mode 100644 index 0000000..58dace2 --- /dev/null +++ b/dspl/liblapack/SRC/slasyf.f @@ -0,0 +1,822 @@ +*> \brief \b SLASYF computes a partial factorization of a real symmetric matrix using the Bunch-Kaufman diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASYF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASYF computes a partial factorization of a real symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The partial +*> factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> SLASYF is an auxiliary routine called by SSYTRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + REAL ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1, + $ ROWMAX, T +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SGEMV, SSCAL, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL SCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL SCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL SCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( IMAX, KW+1 ), LDW, ONE, + $ W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ISAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL SCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + IF( KP.GT.1 ) + $ CALL SCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL SSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL SSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored. +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = ONE / A( K, K ) + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored. +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL SGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE, + $ A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL SSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL SCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL SCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL SCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ISAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL SCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + IF( KP.LT.N ) + $ CALL SCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL SSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL SSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL SSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of SLASYF +* + END diff --git a/dspl/liblapack/SRC/slasyf_aa.f b/dspl/liblapack/SRC/slasyf_aa.f new file mode 100644 index 0000000..ed4ef62 --- /dev/null +++ b/dspl/liblapack/SRC/slasyf_aa.f @@ -0,0 +1,493 @@ +*> \brief \b SLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a real symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by SSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (M) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is REAL workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + REAL PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + EXTERNAL LSAME, ILAENV, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEMV, SSCAL, SCOPY, SSWAP, SLASET, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from SSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL SGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:M, i) into WORK +* + CALL SCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) +* + ALPHA = -A( K-1, J ) + CALL SAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL SAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = ISAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL SSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) +* + CALL SSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL SSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL SSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J:M, J), +* + CALL SCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL SCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL SSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL SLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from SSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL SGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:M, J) into WORK +* + CALL SCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL SAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL SAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = ISAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL SSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) +* + CALL SSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL SSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL SSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J+1:M, J), +* + CALL SCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL SCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL SSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL SLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of SLASYF_AA +* + END diff --git a/dspl/liblapack/SRC/slasyf_rk.f b/dspl/liblapack/SRC/slasyf_rk.f new file mode 100644 index 0000000..b1b3717 --- /dev/null +++ b/dspl/liblapack/SRC/slasyf_rk.f @@ -0,0 +1,965 @@ +*> \brief \b SLASYF_RK computes a partial factorization of a real symmetric indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASYF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SLASYF_RK computes a partial factorization of a real symmetric +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> SLASYF_RK is an auxiliary routine called by SSYTRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, K, KK, KW, KKW, + $ KP, KSTEP, P, II + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ STEMP, R1, ROWMAX, T, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SGEMV, SSCAL, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = ZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL SCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = ZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL SCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL SCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ ONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ISAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ISAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = ABS( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(ABS( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL SCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL SCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL SSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL SSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL SCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL SSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL SSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = ONE / ( D11*D22-ONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = ZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL SGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -ONE, A( 1, K+1 ), LDA, W( J, KW+1 ), + $ LDW, ONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = ZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL SCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = ZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL SCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL SCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ISAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ISAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = ABS( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL SCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL SCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL SSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL SSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL SCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL SSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL SSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = ZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF +* + RETURN +* +* End of SLASYF_RK +* + END diff --git a/dspl/liblapack/SRC/slasyf_rook.f b/dspl/liblapack/SRC/slasyf_rook.f new file mode 100644 index 0000000..65bb2ad --- /dev/null +++ b/dspl/liblapack/SRC/slasyf_rook.f @@ -0,0 +1,892 @@ +*> \brief \b SLASYF_ROOK computes a partial factorization of a real symmetric matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLASYF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLASYF_ROOK computes a partial factorization of a real symmetric +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> SLASYF_ROOK is an auxiliary routine called by SSYTRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, JP1, JP2, K, KK, + $ KW, KKW, KP, KSTEP, P, II + + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ STEMP, R1, ROWMAX, T, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMM, SGEMV, SSCAL, SSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL SCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = ABS( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL SCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL SCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL SGEMV( 'No transpose', K, N-K, -ONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ ONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ISAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = ABS( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ISAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + STEMP = ABS( W( ITEMP, KW-1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(ABS( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL SCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL SCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL SSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL SSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL SCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL SSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL SSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = ONE / ( D11*D22-ONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', JJ-J+1, N-K, -ONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL SGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -ONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ ONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL SSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = J - 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL SSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL SCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, ONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = ABS( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL SCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL SCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ ONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ISAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = ABS( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ISAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + STEMP = ABS( W( ITEMP, K+1 ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL SCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL SCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL SSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL SSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL SCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL SCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL SSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL SSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN + R1 = ONE / A( K, K ) + CALL SSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.ZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL SGEMV( 'No transpose', J+JB-JJ, K-1, -ONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ ONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL SSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = J + 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL SSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of SLASYF_ROOK +* + END diff --git a/dspl/liblapack/SRC/slatbs.f b/dspl/liblapack/SRC/slatbs.f new file mode 100644 index 0000000..a06838c --- /dev/null +++ b/dspl/liblapack/SRC/slatbs.f @@ -0,0 +1,812 @@ +*> \brief \b SLATBS solves a triangular banded system of equations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATBS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, +* SCALE, CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, KD, LDAB, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATBS solves one of the triangular systems +*> +*> A *x = s*b or A**T*x = s*b +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular band matrix. Here A**T denotes the transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine STBSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of subdiagonals or superdiagonals in the +*> triangular matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, STBSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = SASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = SASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STBSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AB( MAIND, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL SAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL SAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + 100 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + SUMJ = SDOT( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ SUMJ = SDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 110 I = 1, JLEN + SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 110 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 120 I = 1, JLEN + SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATBS +* + END diff --git a/dspl/liblapack/SRC/slatdf.f b/dspl/liblapack/SRC/slatdf.f new file mode 100644 index 0000000..5496f9d --- /dev/null +++ b/dspl/liblapack/SRC/slatdf.f @@ -0,0 +1,323 @@ +*> \brief \b SLATDF uses the LU factorization of the n-by-n matrix computed by sgetc2 and computes a contribution to the reciprocal Dif-estimate. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATDF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, +* JPIV ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, LDZ, N +* REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* REAL RHS( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATDF uses the LU factorization of the n-by-n matrix Z computed by +*> SGETC2 and computes a contribution to the reciprocal Dif-estimate +*> by solving Z * x = b for x, and choosing the r.h.s. b such that +*> the norm of x is as large as possible. On entry RHS = b holds the +*> contribution from earlier solved sub-systems, and on return RHS = x. +*> +*> The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q, +*> where P and Q are permutation matrices. L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> IJOB = 2: First compute an approximative null-vector e +*> of Z using SGECON, e is normalized and solve for +*> Zx = +-e - f with the sign giving the greater value +*> of 2-norm(x). About 5 times as expensive as Default. +*> IJOB .ne. 2: Local look ahead strategy where all entries of +*> the r.h.s. b is chosen as either +1 or -1 (Default). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Z. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix Z computed by SGETC2: Z = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is REAL array, dimension N. +*> On entry, RHS contains contributions from other subsystems. +*> On exit, RHS contains the solution of the subsystem with +*> entries acoording to the value of IJOB (see above). +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is REAL +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by STGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is REAL +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when STGSY2 is called by +*> STGSYL. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> This routine is a further developed implementation of algorithm +*> BSOLVE in [1] using complete pivoting in the LU factorization. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> +*> [1] Bo Kagstrom and Lars Westin, +*> Generalized Schur Methods with Condition Estimators for +*> Solving the Generalized Sylvester Equation, IEEE Transactions +*> on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +*> +*> [2] Peter Poromaa, +*> On Efficient and Robust Estimators for the Separation +*> between two Regular Matrix Pairs with Applications in +*> Condition Estimation. Report IMINF-95.05, Departement of +*> Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + REAL RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + REAL RHS( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 8 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + REAL BM, BP, PMONE, SMINU, SPLUS, TEMP +* .. +* .. Local Arrays .. + INTEGER IWORK( MAXDIM ) + REAL WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGECON, SGESC2, SLASSQ, SLASWP, + $ SSCAL +* .. +* .. External Functions .. + REAL SASUM, SDOT + EXTERNAL SASUM, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL SLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -ONE +* + DO 10 J = 1, N - 1 + BP = RHS( J ) + ONE + BM = RHS( J ) - ONE + SPLUS = ONE +* +* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and +* SMIN computed more efficiently than in BSOLVE [1]. +* + SPLUS = SPLUS + SDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 ) + SMINU = SDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + SPLUS = SPLUS*RHS( J ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens +* we choose -1, thereafter +1. This is a simple way to +* get good estimates of matrices like Byers well-known +* example (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = ONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL SAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) +* + 10 CONTINUE +* +* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done +* in BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL SCOPY( N-1, RHS, 1, XP, 1 ) + XP( N ) = RHS( N ) + ONE + RHS( N ) = RHS( N ) - ONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = ONE / Z( I, I ) + XP( I ) = XP( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( XP( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL SCOPY( N, XP, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL SLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + ELSE +* +* IJOB = 2, Compute approximate nullvector XM of Z +* + CALL SGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO ) + CALL SCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL SLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = ONE / SQRT( SDOT( N, XM, 1, XM, 1 ) ) + CALL SSCAL( N, TEMP, XM, 1 ) + CALL SCOPY( N, XM, 1, XP, 1 ) + CALL SAXPY( N, ONE, RHS, 1, XP, 1 ) + CALL SAXPY( N, -ONE, XM, 1, RHS, 1 ) + CALL SGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP ) + CALL SGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP ) + IF( SASUM( N, XP, 1 ).GT.SASUM( N, RHS, 1 ) ) + $ CALL SCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM ) +* + END IF +* + RETURN +* +* End of SLATDF +* + END diff --git a/dspl/liblapack/SRC/slatps.f b/dspl/liblapack/SRC/slatps.f new file mode 100644 index 0000000..83151c1 --- /dev/null +++ b/dspl/liblapack/SRC/slatps.f @@ -0,0 +1,795 @@ +*> \brief \b SLATPS solves a triangular system of equations with the matrix held in packed storage. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL AP( * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATPS solves one of the triangular systems +*> +*> A *x = s*b or A**T*x = s*b +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular matrix stored in packed form. Here A**T denotes the +*> transpose of A, x and b are n-element vectors, and s is a scaling +*> factor, usually less than or equal to 1, chosen so that the +*> components of x will be less than the overflow threshold. If the +*> unscaled problem will not cause overflow, the Level 2 BLAS routine +*> STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +*> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, STPSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine STPSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call STPSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL AP( * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = SASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = SASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STPSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( AP( IP ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL SAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL SAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 100 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = SDOT( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = SDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 110 I = 1, J - 1 + SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 110 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 120 I = 1, N - J + SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATPS +* + END diff --git a/dspl/liblapack/SRC/slatrd.f b/dspl/liblapack/SRC/slatrd.f new file mode 100644 index 0000000..f562ce3 --- /dev/null +++ b/dspl/liblapack/SRC/slatrd.f @@ -0,0 +1,336 @@ +*> \brief \b SLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an orthogonal similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATRD reduces NB rows and columns of a real symmetric matrix A to +*> symmetric tridiagonal form by an orthogonal similarity +*> transformation Q**T * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', SLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', SLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by SSYTRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the orthogonal matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= (1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup doubleOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a symmetric rank-2k update of the form: +*> A := A - V*W**T - W*V**T. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), E( * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, HALF + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IW + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SGEMV, SLARFG, SSCAL, SSYMV +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + CALL SGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL SGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + CALL SLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = A( I-1, I ) + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL SSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL SGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ), + $ LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL SGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 ) + CALL SGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL SSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*SDOT( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL SAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = A( I+1, I ) + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL SSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( 1, I ), 1 ) + CALL SGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL SSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*SDOT( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of SLATRD +* + END diff --git a/dspl/liblapack/SRC/slatrs.f b/dspl/liblapack/SRC/slatrs.f new file mode 100644 index 0000000..d62debc --- /dev/null +++ b/dspl/liblapack/SRC/slatrs.f @@ -0,0 +1,787 @@ +*> \brief \b SLATRS solves a triangular system of equations with the scale factor set to prevent overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, LDA, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATRS solves one of the triangular systems +*> +*> A *x = s*b or A**T*x = s*b +*> +*> with scaling to prevent overflow. Here A is an upper or lower +*> triangular matrix, A**T denotes the transpose of A, x and b are +*> n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine STRSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T* x = s*b (Transpose) +*> = 'C': Solve A**T* x = s*b (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max (1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scaling factor s for the triangular system +*> A * x = s*b or A**T* x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is REAL array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, STRSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine STRSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T*x = b. The basic +*> algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]**T * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]**T * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call STRSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), CNORM( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + REAL BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS, + $ TMAX, TSCAL, USCAL, XBND, XJ, XMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SASUM, SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SASUM, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = SASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = SASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM. +* + IMAX = ISAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM ) THEN + TSCAL = ONE + ELSE + TSCAL = ONE / ( SMLNUM*TMAX ) + CALL SSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine STRSV can be used. +* + J = ISAMAX( N, X, 1 ) + XMAX = ABS( X( J ) ) + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 50 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 30 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* M(j) = G(j-1) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 30 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 50 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 40 CONTINUE + END IF + 50 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 80 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = ONE / MAX( XBND, SMLNUM ) + XBND = GROW + DO 60 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + TJJ = ABS( A( J, J ) ) + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + 60 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) ) + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 80 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 70 CONTINUE + END IF + 80 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = BIGNUM / XMAX + CALL SSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 100 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 95 + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + XJ = ABS( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 90 I = 1, N + X( I ) = ZERO + 90 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 95 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL SSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL SAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = ISAMAX( J-1, X, 1 ) + XMAX = ABS( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL SAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + ISAMAX( N-J, X( J+1 ), 1 ) + XMAX = ABS( X( I ) ) + END IF + END IF + 100 CONTINUE +* + ELSE +* +* Solve A**T * x = b +* + DO 140 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = ABS( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = ABS( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = USCAL / TJJS + END IF + IF( REC.LT.ONE ) THEN + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + SUMJ = ZERO + IF( USCAL.EQ.ONE ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call SDOT to perform the dot product. +* + IF( UPPER ) THEN + SUMJ = SDOT( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + SUMJ = SDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 110 I = 1, J - 1 + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 110 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 120 I = J + 1, N + SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I ) + 120 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.TSCAL ) THEN +* +* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - SUMJ + XJ = ABS( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 135 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = ABS( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = X( J ) / TJJS + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL SSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = X( J ) / TJJS + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A**T*x = 0. +* + DO 130 I = 1, N + X( I ) = ZERO + 130 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 135 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - sumj if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = X( J ) / TJJS - SUMJ + END IF + XMAX = MAX( XMAX, ABS( X( J ) ) ) + 140 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL SSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of SLATRS +* + END diff --git a/dspl/liblapack/SRC/slatrz.f b/dspl/liblapack/SRC/slatrz.f new file mode 100644 index 0000000..16e9732 --- /dev/null +++ b/dspl/liblapack/SRC/slatrz.f @@ -0,0 +1,200 @@ +*> \brief \b SLATRZ factors an upper trapezoidal matrix by means of orthogonal transformations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLATRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* .. Scalar Arguments .. +* INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATRZ factors the M-by-(M+L) real upper trapezoidal matrix +*> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z, by means +*> of orthogonal transformations. Z is an (M+L)-by-(M+L) orthogonal +*> matrix and, R and A1 are M-by-M upper triangular matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing the +*> meaningful part of the Householder vectors. N-M >= L >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements N-L+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (M) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**T, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an l element vector. tau and z( k ) +*> are chosen to annihilate the elements of the kth row of A2. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A2, such that the elements of z( k ) are +*> in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A1. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SLARZ +* .. +* .. Executable Statements .. +* +* Test the input arguments +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL SLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL SLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ TAU( I ), A( 1, I ), LDA, WORK ) +* + 20 CONTINUE +* + RETURN +* +* End of SLATRZ +* + END diff --git a/dspl/liblapack/SRC/slatsqr.f b/dspl/liblapack/SRC/slatsqr.f new file mode 100644 index 0000000..d6d6827 --- /dev/null +++ b/dspl/liblapack/SRC/slatsqr.f @@ -0,0 +1,255 @@ +* +* Definition: +* =========== +* +* SUBROUTINE SLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATSQR computes a blocked Tall-Skinny QR factorization of +*> an M-by-N matrix A, where M >= N: +*> A = Q * R . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> MB > N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal +*> of the array contain the N-by-N upper triangular matrix R; +*> the elements below the diagonal represent Q by the columns +*> of blocked V (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((M-N)/(MB-N)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) REAL array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= NB*N. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( * ), T(LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL SGEQRT, STPQRT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. ( NB.GT.N .AND. N.GT.0 )) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(N*NB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = NB*N + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLATSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF ((MB.LE.N).OR.(MB.GE.M)) THEN + CALL SGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF + KK = MOD((M-N),(MB-N)) + II=M-KK+1 +* +* Compute the QR factorization of the first block A(1:MB,1:N) +* + CALL SGEQRT( MB, N, NB, A(1,1), LDA, T, LDT, WORK, INFO ) +* + CTR = 1 + DO I = MB+1, II-MB+N , (MB-N) +* +* Compute the QR factorization of the current block A(I:I+MB-N,1:N) +* + CALL STPQRT( MB-N, N, 0, NB, A(1,1), LDA, A( I, 1 ), LDA, + $ T(1, CTR * N + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(II:M,1:N) +* + IF (II.LE.M) THEN + CALL STPQRT( KK, N, 0, NB, A(1,1), LDA, A( II, 1 ), LDA, + $ T(1, CTR * N + 1), LDT, + $ WORK, INFO ) + END IF +* + work( 1 ) = N*NB + RETURN +* +* End of SLATSQR +* + END diff --git a/dspl/liblapack/SRC/slauu2.f b/dspl/liblapack/SRC/slauu2.f new file mode 100644 index 0000000..c9f774c --- /dev/null +++ b/dspl/liblapack/SRC/slauu2.f @@ -0,0 +1,198 @@ +*> \brief \b SLAUU2 computes the product UUH or LHL, where U and L are upper or lower triangular matrices (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAUU2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAUU2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAUU2 computes the product U * U**T or L**T * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the unblocked form of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**T; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**T * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U**T. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA ) + CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 ) + ELSE + CALL SSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L**T * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = SDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 ) + CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA, + $ A( I+1, I ), 1, AII, A( I, 1 ), LDA ) + ELSE + CALL SSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of SLAUU2 +* + END diff --git a/dspl/liblapack/SRC/slauum.f b/dspl/liblapack/SRC/slauum.f new file mode 100644 index 0000000..c23c4b3 --- /dev/null +++ b/dspl/liblapack/SRC/slauum.f @@ -0,0 +1,218 @@ +*> \brief \b SLAUUM computes the product UUH or LHL, where U and L are upper or lower triangular matrices (blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SLAUUM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SLAUUM( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLAUUM computes the product U * U**T or L**T * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the blocked form of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**T; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**T * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLAUU2, SSYRK, STRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U**T. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Non-unit', + $ I-1, IB, ONE, A( I, I ), LDA, A( 1, I ), + $ LDA ) + CALL SLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'No transpose', 'Transpose', I-1, IB, + $ N-I-IB+1, ONE, A( 1, I+IB ), LDA, + $ A( I, I+IB ), LDA, ONE, A( 1, I ), LDA ) + CALL SSYRK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L**T * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL STRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB, + $ I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA ) + CALL SLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL SGEMM( 'Transpose', 'No transpose', IB, I-1, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA ) + CALL SSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE, + $ A( I+IB, I ), LDA, ONE, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of SLAUUM +* + END diff --git a/dspl/liblapack/SRC/sopgtr.f b/dspl/liblapack/SRC/sopgtr.f new file mode 100644 index 0000000..567f76a --- /dev/null +++ b/dspl/liblapack/SRC/sopgtr.f @@ -0,0 +1,232 @@ +*> \brief \b SOPGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SOPGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. +* REAL AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SOPGTR generates a real orthogonal matrix Q which is defined as the +*> product of n-1 elementary reflectors H(i) of order n, as returned by +*> SSPTRD using packed storage: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to SSPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to SSPTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The vectors which define the elementary reflectors, as +*> returned by SSPTRD. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SSPTRD. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> The N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N-1) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + REAL AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORG2L, SORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SOPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = ZERO + 30 CONTINUE + Q( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL SORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to SSPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = ONE + DO 40 I = 2, N + Q( I, 1 ) = ZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = ZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL SORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of SOPGTR +* + END diff --git a/dspl/liblapack/SRC/sopmtr.f b/dspl/liblapack/SRC/sopmtr.f new file mode 100644 index 0000000..809f8b2 --- /dev/null +++ b/dspl/liblapack/SRC/sopmtr.f @@ -0,0 +1,339 @@ +*> \brief \b SOPMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SOPMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. +* REAL AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SOPMTR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by SSPTRD using packed +*> storage: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to SSPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to SSPTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension +*> (M*(M+1)/2) if SIDE = 'L' +*> (N*(N+1)/2) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by SSPTRD. AP is modified by the routine but +*> restored on exit. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (M-1) if SIDE = 'L' +*> or (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SSPTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + REAL AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SOPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) +* + AII = AP( II ) + AP( II ) = ONE + CALL SLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to SSPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) +* + CALL SLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of SOPMTR +* + END diff --git a/dspl/liblapack/SRC/sorbdb.f b/dspl/liblapack/SRC/sorbdb.f new file mode 100644 index 0000000..2e460aa --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb.f @@ -0,0 +1,689 @@ +*> \brief \b SORBDB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, +* X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, +* TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIGNS, TRANS +* INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, +* $ Q +* .. +* .. Array Arguments .. +* REAL PHI( * ), THETA( * ) +* REAL TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), +* $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), +* $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORBDB simultaneously bidiagonalizes the blocks of an M-by-M +*> partitioned orthogonal matrix X: +*> +*> [ B11 | B12 0 0 ] +*> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**T +*> X = [-----------] = [---------] [----------------] [---------] . +*> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] +*> [ 0 | 0 0 I ] +*> +*> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is +*> not the case, then X must be transposed and/or permuted. This can be +*> done in constant time using the TRANS and SIGNS options. See SORCSD +*> for details.) +*> +*> The orthogonal matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- +*> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are +*> represented implicitly by Householder vectors. +*> +*> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, the top-left block of the orthogonal matrix to be +*> reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X11) specify reflectors for P1, +*> the rows of triu(X11,1) specify reflectors for Q1; +*> else TRANS = 'T', and +*> the rows of triu(X11) specify reflectors for P1, +*> the columns of tril(X11,-1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. If TRANS = 'N', then LDX11 >= +*> P; else LDX11 >= Q. +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is REAL array, dimension (LDX12,M-Q) +*> On entry, the top-right block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X12) specify the first P reflectors for +*> Q2; +*> else TRANS = 'T', and +*> the columns of tril(X12) specify the first P reflectors +*> for Q2. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. If TRANS = 'N', then LDX12 >= +*> P; else LDX11 >= M-Q. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, the bottom-left block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X21) specify reflectors for P2; +*> else TRANS = 'T', and +*> the rows of triu(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. If TRANS = 'N', then LDX21 >= +*> M-P; else LDX21 >= Q. +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is REAL array, dimension (LDX22,M-Q) +*> On entry, the bottom-right block of the orthogonal matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X22(Q+1:M-P,P+1:M-Q)) specify the last +*> M-P-Q reflectors for Q2, +*> else TRANS = 'T', and +*> the columns of tril(X22(P+1:M-Q,Q+1:M-P)) specify the last +*> M-P-Q reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X22. If TRANS = 'N', then LDX22 >= +*> M-P; else LDX22 >= M-Q. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is REAL array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is REAL array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is REAL array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] TAUQ2 +*> \verbatim +*> TAUQ2 is REAL array, dimension (M-Q) +*> The scalar factors of the elementary reflectors that define +*> Q2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The bidiagonal blocks B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1), ..., THETA(Q) and PHI(1), ..., +*> PHI(Q-1). B11 and B21 are upper bidiagonal, while B21 and B22 are +*> lower bidiagonal. Every entry in each bidiagonal band is a product +*> of a sine or cosine of a THETA with a sine or cosine of a PHI. See +*> [1] or SORCSD for details. +*> +*> P1, P2, Q1, and Q2 are represented as products of elementary +*> reflectors. See SORCSD for details on generating P1, P2, Q1, and Q2 +*> using SORGQR and SORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, + $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIGNS, TRANS + INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, + $ Q +* .. +* .. Array Arguments .. + REAL PHI( * ), THETA( * ) + REAL TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), + $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), + $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL REALONE + PARAMETER ( REALONE = 1.0E0 ) + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY + INTEGER I, LWORKMIN, LWORKOPT + REAL Z1, Z2, Z3, Z4 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARF, SLARFGP, SSCAL, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + LOGICAL LSAME + EXTERNAL SNRM2, LSAME +* .. +* .. Intrinsic Functions + INTRINSIC ATAN2, COS, MAX, SIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + IF( .NOT. LSAME( SIGNS, 'O' ) ) THEN + Z1 = REALONE + Z2 = REALONE + Z3 = REALONE + Z4 = REALONE + ELSE + Z1 = REALONE + Z2 = -REALONE + Z3 = REALONE + Z4 = -REALONE + END IF + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -3 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -4 + ELSE IF( Q .LT. 0 .OR. Q .GT. P .OR. Q .GT. M-P .OR. + $ Q .GT. M-Q ) THEN + INFO = -5 + ELSE IF( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( .NOT.COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -7 + ELSE IF( COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -9 + ELSE IF( .NOT.COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -9 + ELSE IF( COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -11 + ELSE IF( .NOT.COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF( COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -13 + ELSE IF( .NOT.COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + LWORKOPT = M - Q + LWORKMIN = M - Q + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -21 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'xORBDB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Handle column-major and row-major separately +* + IF( COLMAJOR ) THEN +* +* Reduce columns 1, ..., Q of X11, X12, X21, and X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL SSCAL( P-I+1, Z1, X11(I,I), 1 ) + ELSE + CALL SSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), 1 ) + CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I,I-1), + $ 1, X11(I,I), 1 ) + END IF + IF( I .EQ. 1 ) THEN + CALL SSCAL( M-P-I+1, Z2, X21(I,I), 1 ) + ELSE + CALL SSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), 1 ) + CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I,I-1), + $ 1, X21(I,I), 1 ) + END IF +* + THETA(I) = ATAN2( SNRM2( M-P-I+1, X21(I,I), 1 ), + $ SNRM2( P-I+1, X11(I,I), 1 ) ) +* + IF( P .GT. I ) THEN + CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + ELSE IF( P .EQ. I ) THEN + CALL SLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) + END IF + X11(I,I) = ONE + IF ( M-P .GT. I ) THEN + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, + $ TAUP2(I) ) + ELSE IF ( M-P .EQ. I ) THEN + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), + $ X11(I,I+1), LDX11, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL SLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, TAUP1(I), + $ X12(I,I), LDX12, WORK ) + END IF + IF ( Q .GT. I ) THEN + CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL SLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, TAUP2(I), + $ X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL SSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I,I+1), + $ LDX11 ) + CALL SAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I,I+1), LDX21, + $ X11(I,I+1), LDX11 ) + END IF + CALL SSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), LDX12 ) + CALL SAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), LDX22, + $ X12(I,I), LDX12 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( SNRM2( Q-I, X11(I,I+1), LDX11 ), + $ SNRM2( M-Q-I+1, X12(I,I), LDX12 ) ) +* + IF( I .LT. Q ) THEN + IF ( Q-I .EQ. 1 ) THEN + CALL SLARFGP( Q-I, X11(I,I+1), X11(I,I+1), LDX11, + $ TAUQ1(I) ) + ELSE + CALL SLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, + $ TAUQ1(I) ) + END IF + X11(I,I+1) = ONE + END IF + IF ( Q+I-1 .LT. M ) THEN + IF ( M-Q .EQ. I ) THEN + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL SLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL SLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + IF ( P .GT. I ) THEN + CALL SLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF ( M-P .GT. I ) THEN + CALL SLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + END IF +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL SSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), LDX12 ) + IF ( I .GE. M-Q ) THEN + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL SLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL SLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL SSCAL( M-P-Q-I+1, Z2*Z4, X22(Q+I,P+I), LDX22 ) + IF ( I .EQ. M-P-Q ) THEN + CALL SLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I), + $ LDX22, TAUQ2(P+I) ) + ELSE + CALL SLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), + $ LDX22, TAUQ2(P+I) ) + END IF + X22(Q+I,P+I) = ONE + IF ( I .LT. M-P-Q ) THEN + CALL SLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, + $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) + END IF +* + END DO +* + ELSE +* +* Reduce columns 1, ..., Q of X11, X12, X21, X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL SSCAL( P-I+1, Z1, X11(I,I), LDX11 ) + ELSE + CALL SSCAL( P-I+1, Z1*COS(PHI(I-1)), X11(I,I), LDX11 ) + CALL SAXPY( P-I+1, -Z1*Z3*Z4*SIN(PHI(I-1)), X12(I-1,I), + $ LDX12, X11(I,I), LDX11 ) + END IF + IF( I .EQ. 1 ) THEN + CALL SSCAL( M-P-I+1, Z2, X21(I,I), LDX21 ) + ELSE + CALL SSCAL( M-P-I+1, Z2*COS(PHI(I-1)), X21(I,I), LDX21 ) + CALL SAXPY( M-P-I+1, -Z2*Z3*Z4*SIN(PHI(I-1)), X22(I-1,I), + $ LDX22, X21(I,I), LDX21 ) + END IF +* + THETA(I) = ATAN2( SNRM2( M-P-I+1, X21(I,I), LDX21 ), + $ SNRM2( P-I+1, X11(I,I), LDX11 ) ) +* + CALL SLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) + X11(I,I) = ONE + IF ( I .EQ. M-P ) THEN + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, + $ TAUP2(I) ) + ELSE + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL SLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL SLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, + $ TAUP1(I), X12(I,I), LDX12, WORK ) + END IF + IF ( Q .GT. I ) THEN + CALL SLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), + $ X21(I+1,I), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL SLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL SSCAL( Q-I, -Z1*Z3*SIN(THETA(I)), X11(I+1,I), 1 ) + CALL SAXPY( Q-I, Z2*Z3*COS(THETA(I)), X21(I+1,I), 1, + $ X11(I+1,I), 1 ) + END IF + CALL SSCAL( M-Q-I+1, -Z1*Z4*SIN(THETA(I)), X12(I,I), 1 ) + CALL SAXPY( M-Q-I+1, Z2*Z4*COS(THETA(I)), X22(I,I), 1, + $ X12(I,I), 1 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( SNRM2( Q-I, X11(I+1,I), 1 ), + $ SNRM2( M-Q-I+1, X12(I,I), 1 ) ) +* + IF( I .LT. Q ) THEN + IF ( Q-I .EQ. 1) THEN + CALL SLARFGP( Q-I, X11(I+1,I), X11(I+1,I), 1, + $ TAUQ1(I) ) + ELSE + CALL SLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, + $ TAUQ1(I) ) + END IF + X11(I+1,I) = ONE + END IF + IF ( M-Q .GT. I ) THEN + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, + $ TAUQ2(I) ) + ELSE + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I,I), 1, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL SLARF( 'L', Q-I, P-I, X11(I+1,I), 1, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL SLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + CALL SLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) + IF ( M-P-I .GT. 0 ) THEN + CALL SLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, TAUQ2(I), + $ X22(I,I+1), LDX22, WORK ) + END IF +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL SSCAL( M-Q-I+1, -Z1*Z4, X12(I,I), 1 ) + CALL SLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL SLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, TAUQ2(I), + $ X12(I,I+1), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL SLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, TAUQ2(I), + $ X22(I,Q+1), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL SSCAL( M-P-Q-I+1, Z2*Z4, X22(P+I,Q+I), 1 ) + IF ( M-P-Q .EQ. I ) THEN + CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I,Q+I), 1, + $ TAUQ2(P+I) ) + X22(P+I,Q+I) = ONE + ELSE + CALL SLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + $ TAUQ2(P+I) ) + X22(P+I,Q+I) = ONE + CALL SLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, + $ TAUQ2(P+I), X22(P+I,Q+I+1), LDX22, WORK ) + END IF +* +* + END DO +* + END IF +* + RETURN +* +* End of SORBDB +* + END + diff --git a/dspl/liblapack/SRC/sorbdb1.f b/dspl/liblapack/SRC/sorbdb1.f new file mode 100644 index 0000000..98bc8e7 --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb1.f @@ -0,0 +1,323 @@ +*> \brief \b SORBDB1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, +*> M-P, or M-Q. Routines SORBDB2, SORBDB3, and SORBDB4 handle cases in +*> which Q is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is REAL array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is REAL array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is REAL array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or SORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See SORCSD2BY1 for details on generating P1, P2, and Q1 using SORGQR +*> and SORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + EXTERNAL SNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. Q .OR. M-P .LT. Q ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. M-Q .LT. Q ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-2 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., Q of X11 and X21 +* + DO I = 1, Q +* + CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + THETA(I) = ATAN2( X21(I,I), X11(I,I) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I) = ONE + X21(I,I) = ONE + CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) + CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + IF( I .LT. Q ) THEN + CALL SROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, S ) + CALL SLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + S = X21(I,I+1) + X21(I,I+1) = ONE + CALL SLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL SLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + C = SQRT( SNRM2( P-I, X11(I+1,I+1), 1 )**2 + $ + SNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + CALL SORBDB5( P-I, M-P-I, Q-I-1, X11(I+1,I+1), 1, + $ X21(I+1,I+1), 1, X11(I+1,I+2), LDX11, + $ X21(I+1,I+2), LDX21, WORK(IORBDB5), LORBDB5, + $ CHILDINFO ) + END IF +* + END DO +* + RETURN +* +* End of SORBDB1 +* + END + diff --git a/dspl/liblapack/SRC/sorbdb2.f b/dspl/liblapack/SRC/sorbdb2.f new file mode 100644 index 0000000..684778e --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb2.f @@ -0,0 +1,332 @@ +*> \brief \b SORBDB2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, +*> Q, or M-Q. Routines SORBDB1, SORBDB3, and SORBDB4 handle cases in +*> which P is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= min(M-P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is REAL array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is REAL array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is REAL array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or SORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See SORCSD2BY1 for details on generating P1, P2, and Q1 using SORGQR +*> and SORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL NEGONE, ONE + PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, SSCAL, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + EXTERNAL SNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. 0 .OR. P .GT. M-P ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. Q .LT. P .OR. M-Q .LT. P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., P of X11 and X21 +* + DO I = 1, P +* + IF( I .GT. 1 ) THEN + CALL SROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, S ) + END IF + CALL SLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + C = X11(I,I) + X11(I,I) = ONE + CALL SLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + S = SQRT( SNRM2( P-I, X11(I+1,I), 1 )**2 + $ + SNRM2( M-P-I+1, X21(I,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL SORBDB5( P-I, M-P-I+1, Q-I, X11(I+1,I), 1, X21(I,I), 1, + $ X11(I+1,I+1), LDX11, X21(I,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL SSCAL( P-I, NEGONE, X11(I+1,I), 1 ) + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + IF( I .LT. P ) THEN + CALL SLARFGP( P-I, X11(I+1,I), X11(I+2,I), 1, TAUP1(I) ) + PHI(I) = ATAN2( X11(I+1,I), X21(I,I) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X11(I+1,I) = ONE + CALL SLARF( 'L', P-I, Q-I, X11(I+1,I), 1, TAUP1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + END IF + X21(I,I) = ONE + CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X21 to the identity matrix +* + DO I = P + 1, Q + CALL SLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + X21(I,I) = ONE + CALL SLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, TAUP2(I), + $ X21(I,I+1), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of SORBDB2 +* + END + diff --git a/dspl/liblapack/SRC/sorbdb3.f b/dspl/liblapack/SRC/sorbdb3.f new file mode 100644 index 0000000..d849613 --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb3.f @@ -0,0 +1,333 @@ +*> \brief \b SORBDB3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, +*> Q, or M-Q. Routines SORBDB1, SORBDB2, and SORBDB4 handle cases in +*> which M-P is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. M-P <= min(P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is REAL array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is REAL array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is REAL array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or SORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See SORCSD2BY1 for details on generating P1, P2, and Q1 using SORGQR +*> and SORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + REAL TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + EXTERNAL SNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( 2*P .LT. M .OR. P .GT. M ) THEN + INFO = -2 + ELSE IF( Q .LT. M-P .OR. M-Q .LT. M-P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., M-P of X11 and X21 +* + DO I = 1, M-P +* + IF( I .GT. 1 ) THEN + CALL SROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, S ) + END IF +* + CALL SLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + S = X21(I,I) + X21(I,I) = ONE + CALL SLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL SLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + C = SQRT( SNRM2( P-I+1, X11(I,I), 1 )**2 + $ + SNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL SORBDB5( P-I+1, M-P-I, Q-I, X11(I,I), 1, X21(I+1,I), 1, + $ X11(I,I+1), LDX11, X21(I+1,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + IF( I .LT. M-P ) THEN + CALL SLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + PHI(I) = ATAN2( X21(I+1,I), X11(I,I) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X21(I+1,I) = ONE + CALL SLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, TAUP2(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + END IF + X11(I,I) = ONE + CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X11 to the identity matrix +* + DO I = M-P + 1, Q + CALL SLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + X11(I,I) = ONE + CALL SLARF( 'L', P-I+1, Q-I, X11(I,I), 1, TAUP1(I), X11(I,I+1), + $ LDX11, WORK(ILARF) ) + END DO +* + RETURN +* +* End of SORBDB3 +* + END + diff --git a/dspl/liblapack/SRC/sorbdb4.f b/dspl/liblapack/SRC/sorbdb4.f new file mode 100644 index 0000000..42bbcbd --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb4.f @@ -0,0 +1,378 @@ +*> \brief \b SORBDB4 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* REAL PHI(*), THETA(*) +* REAL PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), +* $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, +*> M-P, or Q. Routines SORBDB1, SORBDB2, and SORBDB3 handle cases in +*> which M-Q is not the minimum dimension. +*> +*> The orthogonal matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M and +*> M-Q <= min(P,M-P,Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is REAL array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is REAL array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is REAL array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is REAL array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] PHANTOM +*> \verbatim +*> PHANTOM is REAL array, dimension (M) +*> The routine computes an M-by-1 column vector Y that is +*> orthogonal to the columns of [ X11; X21 ]. PHANTOM(1:P) and +*> PHANTOM(P+1:M) contain Householder vectors for Y(1:P) and +*> Y(P+1:M), respectively. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or SORCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See SORCSD2BY1 for details on generating P1, P2, and Q1 using SORGQR +*> and SORGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + REAL PHI(*), THETA(*) + REAL PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), + $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + REAL NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + REAL C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, J, LLARF, + $ LORBDB5, LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SLARFGP, SORBDB5, SROT, SSCAL, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + EXTERNAL SNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. M-Q .OR. M-P .LT. M-Q ) THEN + INFO = -2 + ELSE IF( Q .LT. M-Q .OR. Q .GT. M ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( Q-1, P-1, M-P-1 ) + IORBDB5 = 2 + LORBDB5 = Q + LWORKOPT = ILARF + LLARF - 1 + LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB4', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., M-Q of X11 and X21 +* + DO I = 1, M-Q +* + IF( I .EQ. 1 ) THEN + DO J = 1, M + PHANTOM(J) = ZERO + END DO + CALL SORBDB5( P, M-P, Q, PHANTOM(1), 1, PHANTOM(P+1), 1, + $ X11, LDX11, X21, LDX21, WORK(IORBDB5), + $ LORBDB5, CHILDINFO ) + CALL SSCAL( P, NEGONE, PHANTOM(1), 1 ) + CALL SLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) + CALL SLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + THETA(I) = ATAN2( PHANTOM(1), PHANTOM(P+1) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + PHANTOM(1) = ONE + PHANTOM(P+1) = ONE + CALL SLARF( 'L', P, Q, PHANTOM(1), 1, TAUP1(1), X11, LDX11, + $ WORK(ILARF) ) + CALL SLARF( 'L', M-P, Q, PHANTOM(P+1), 1, TAUP2(1), X21, + $ LDX21, WORK(ILARF) ) + ELSE + CALL SORBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, + $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), + $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL SSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) + CALL SLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL SLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, + $ TAUP2(I) ) + THETA(I) = ATAN2( X11(I,I-1), X21(I,I-1) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I-1) = ONE + X21(I,I-1) = ONE + CALL SLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, TAUP1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL SLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, TAUP2(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + END IF +* + CALL SROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) + CALL SLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + C = X21(I,I) + X21(I,I) = ONE + CALL SLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + IF( I .LT. M-Q ) THEN + S = SQRT( SNRM2( P-I, X11(I+1,I), 1 )**2 + $ + SNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + END IF +* + END DO +* +* Reduce the bottom-right portion of X11 to [ I 0 ] +* + DO I = M - Q + 1, P + CALL SLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + X11(I,I) = ONE + CALL SLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL SLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + END DO +* +* Reduce the bottom-right portion of X21 to [ 0 I ] +* + DO I = P + 1, Q + CALL SLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + $ TAUQ1(I) ) + X21(M-Q+I-P,I) = ONE + CALL SLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), + $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of SORBDB4 +* + END + diff --git a/dspl/liblapack/SRC/sorbdb5.f b/dspl/liblapack/SRC/sorbdb5.f new file mode 100644 index 0000000..f5af1db --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb5.f @@ -0,0 +1,274 @@ +*> \brief \b SORBDB5 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* REAL Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB5 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then some other vector from the orthogonal complement +*> is returned. This vector is chosen in an arbitrary but deterministic +*> way. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is REAL array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is REAL array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is REAL array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is REAL array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + REAL Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, J +* .. +* .. External Subroutines .. + EXTERNAL SORBDB6, XERBLA +* .. +* .. External Functions .. + REAL SNRM2 + EXTERNAL SNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB5', -INFO ) + RETURN + END IF +* +* Project X onto the orthogonal complement of Q +* + CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, + $ WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( SNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF +* +* Project each standard basis vector e_1,...,e_M1 in turn, stopping +* when a nonzero projection is found +* + DO I = 1, M1 + DO J = 1, M1 + X1(J) = ZERO + END DO + X1(I) = ONE + DO J = 1, M2 + X2(J) = ZERO + END DO + CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( SNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* +* Project each standard basis vector e_(M1+1),...,e_(M1+M2) in turn, +* stopping when a nonzero projection is found +* + DO I = 1, M2 + DO J = 1, M1 + X1(J) = ZERO + END DO + DO J = 1, M2 + X2(J) = ZERO + END DO + X2(I) = ONE + CALL SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( SNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. SNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* + RETURN +* +* End of SORBDB5 +* + END + diff --git a/dspl/liblapack/SRC/sorbdb6.f b/dspl/liblapack/SRC/sorbdb6.f new file mode 100644 index 0000000..beedbfc --- /dev/null +++ b/dspl/liblapack/SRC/sorbdb6.f @@ -0,0 +1,312 @@ +*> \brief \b SORBDB6 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORBDB6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* REAL Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORBDB6 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then the zero vector is returned. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is REAL array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is REAL array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is REAL array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is REAL array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + REAL Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ALPHASQ, REALONE, REALZERO + PARAMETER ( ALPHASQ = 0.01E0, REALONE = 1.0E0, + $ REALZERO = 0.0E0 ) + REAL NEGONE, ONE, ZERO + PARAMETER ( NEGONE = -1.0E0, ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL NORMSQ1, NORMSQ2, SCL1, SCL2, SSQ1, SSQ2 +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLASSQ, XERBLA +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORBDB6', -INFO ) + RETURN + END IF +* +* First, project X onto the orthogonal complement of Q's column +* space +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL SLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL SLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ1 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL SGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL SGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL SGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL SGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL SLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL SLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If projection is sufficiently large in norm, then stop. +* If projection is zero, then stop. +* Otherwise, project again. +* + IF( NORMSQ2 .GE. ALPHASQ*NORMSQ1 ) THEN + RETURN + END IF +* + IF( NORMSQ2 .EQ. ZERO ) THEN + RETURN + END IF +* + NORMSQ1 = NORMSQ2 +* + DO I = 1, N + WORK(I) = ZERO + END DO +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL SGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL SGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL SGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL SGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL SLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL SLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If second projection is sufficiently large in norm, then do +* nothing more. Alternatively, if it shrunk significantly, then +* truncate it to zero. +* + IF( NORMSQ2 .LT. ALPHASQ*NORMSQ1 ) THEN + DO I = 1, M1 + X1(I) = ZERO + END DO + DO I = 1, M2 + X2(I) = ZERO + END DO + END IF +* + RETURN +* +* End of SORBDB6 +* + END + diff --git a/dspl/liblapack/SRC/sorcsd.f b/dspl/liblapack/SRC/sorcsd.f new file mode 100644 index 0000000..06c77d8 --- /dev/null +++ b/dspl/liblapack/SRC/sorcsd.f @@ -0,0 +1,615 @@ +*> \brief \b SORCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE SORCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, +* SIGNS, M, P, Q, X11, LDX11, X12, +* LDX12, X21, LDX21, X22, LDX22, THETA, +* U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, +* LDV2T, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, +* $ LDX21, LDX22, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL THETA( * ) +* REAL U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), +* $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, +* $ * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORCSD computes the CS decomposition of an M-by-M partitioned +*> orthogonal matrix X: +*> +*> [ I 0 0 | 0 0 0 ] +*> [ 0 C 0 | 0 -S 0 ] +*> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**T +*> X = [-----------] = [---------] [---------------------] [---------] . +*> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] +*> [ 0 S 0 | 0 C 0 ] +*> [ 0 0 I | 0 0 0 ] +*> +*> X11 is P-by-Q. The orthogonal matrices U1, U2, V1, and V2 are P-by-P, +*> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are +*> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in +*> which R = MIN(P,M-P,Q,M-Q). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is computed; +*> otherwise: V2T is not computed. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is REAL array, dimension (LDX12,M-Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. LDX12 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X11. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is REAL array, dimension (LDX22,M-Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X11. LDX22 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is REAL array, dimension (LDU1,P) +*> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is REAL array, dimension (LDU2,M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is REAL array, dimension (LDV1T,Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] V2T +*> \verbatim +*> V2T is REAL array, dimension (LDV2T,M-Q) +*> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) orthogonal +*> matrix V2**T. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >= +*> MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> If INFO > 0 on exit, WORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P, M-P, Q, M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: SBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE SORCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, + $ SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, X21, LDX21, X22, LDX22, THETA, + $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, + $ LDX21, LDX22, LWORK, M, P, Q +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL THETA( * ) + REAL U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), + $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, + $ * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, + $ ZERO = 0.0E+0 ) +* .. +* .. Local Arrays .. + REAL DUMMY(1) +* .. +* .. Local Scalars .. + CHARACTER TRANST, SIGNST + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN, + $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN, + $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN, + $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN, + $ LORGQRWORKOPT, LWORKMIN, LWORKOPT + LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2, + $ WANTV1T, WANTV2T +* .. +* .. External Subroutines .. + EXTERNAL SBBCSD, SLACPY, SLAPMR, SLAPMT, + $ SORBDB, SORGLQ, SORGQR, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' ) + LQUERY = LWORK .EQ. -1 + IF( M .LT. 0 ) THEN + INFO = -7 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -8 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -9 + ELSE IF ( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -11 + ELSE IF (.NOT. COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF (COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF (.NOT. COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + ELSE IF (COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -15 + ELSE IF (.NOT. COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -15 + ELSE IF (COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -17 + ELSE IF (.NOT. COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -17 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -20 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -22 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -24 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -26 + END IF +* +* Work with transpose if convenient +* + IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN + IF( COLMAJOR ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL SORCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M, + $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22, + $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1, + $ U2, LDU2, WORK, LWORK, IWORK, INFO ) + RETURN + END IF +* +* Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if +* convenient +* + IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL SORCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M, + $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11, + $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T, + $ LDV1T, WORK, LWORK, IWORK, INFO ) + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN +* + IPHI = 2 + ITAUP1 = IPHI + MAX( 1, Q - 1 ) + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M - P ) + ITAUQ2 = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ2 + MAX( 1, M - Q ) + CALL SORGQR( M-Q, M-Q, M-Q, DUMMY, MAX(1,M-Q), DUMMY, WORK, -1, + $ CHILDINFO ) + LORGQRWORKOPT = INT( WORK(1) ) + LORGQRWORKMIN = MAX( 1, M - Q ) + IORGLQ = ITAUQ2 + MAX( 1, M - Q ) + CALL SORGLQ( M-Q, M-Q, M-Q, DUMMY, MAX(1,M-Q), DUMMY, WORK, -1, + $ CHILDINFO ) + LORGLQWORKOPT = INT( WORK(1) ) + LORGLQWORKMIN = MAX( 1, M - Q ) + IORBDB = ITAUQ2 + MAX( 1, M - Q ) + CALL SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, + $ DUMMY,WORK,-1,CHILDINFO ) + LORBDBWORKOPT = INT( WORK(1) ) + LORBDBWORKMIN = LORBDBWORKOPT + IB11D = ITAUQ2 + MAX( 1, M - Q ) + IB11E = IB11D + MAX( 1, Q ) + IB12D = IB11E + MAX( 1, Q - 1 ) + IB12E = IB12D + MAX( 1, Q ) + IB21D = IB12E + MAX( 1, Q - 1 ) + IB21E = IB21D + MAX( 1, Q ) + IB22D = IB21E + MAX( 1, Q - 1 ) + IB22E = IB22D + MAX( 1, Q ) + IBBCSD = IB22E + MAX( 1, Q - 1 ) + CALL SBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ DUMMY, DUMMY, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, DUMMY, + $ DUMMY, DUMMY, WORK, -1, CHILDINFO ) + LBBCSDWORKOPT = INT( WORK(1) ) + LBBCSDWORKMIN = LBBCSDWORKOPT + LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT, + $ IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKOPT ) - 1 + LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN, + $ IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKMIN ) - 1 + WORK(1) = MAX(LWORKOPT,LWORKMIN) +* + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -22 + ELSE + LORGQRWORK = LWORK - IORGQR + 1 + LORGLQWORK = LWORK - IORGLQ + 1 + LORBDBWORK = LWORK - IORBDB + 1 + LBBCSDWORK = LWORK - IBBCSD + 1 + END IF + END IF +* +* Abort if any illegal arguments +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Transform to bidiagonal block form +* + CALL SORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21, + $ LDX21, X22, LDX22, THETA, WORK(IPHI), WORK(ITAUP1), + $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2), + $ WORK(IORBDB), LORBDBWORK, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( COLMAJOR ) THEN + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL SORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQRWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL SORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL SORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL SLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T ) + CALL SLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + CALL SORGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + ELSE + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 ) + CALL SORGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ), + $ LORGLQWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 ) + CALL SORGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL SORGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL SLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T ) + CALL SLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + CALL SORGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + END IF +* +* Compute the CSD of the matrix in bidiagonal-block form +* + CALL SBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA, + $ WORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), WORK(IB22D), + $ WORK(IB22E), WORK(IBBCSD), LBBCSDWORK, INFO ) +* +* Permute rows and columns to place identity submatrices in top- +* left corner of (1,1)-block and/or bottom-right corner of (1,2)- +* block and/or bottom-right corner of (2,1)-block and/or top-left +* corner of (2,2)-block +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + IF( COLMAJOR ) THEN + CALL SLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + ELSE + CALL SLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + END IF + IF( M .GT. 0 .AND. WANTV2T ) THEN + DO I = 1, P + IWORK(I) = M - P - Q + I + END DO + DO I = P + 1, M - Q + IWORK(I) = I - P + END DO + IF( .NOT. COLMAJOR ) THEN + CALL SLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + ELSE + CALL SLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + END IF + END IF +* + RETURN +* +* End SORCSD +* + END + diff --git a/dspl/liblapack/SRC/sorcsd2by1.f b/dspl/liblapack/SRC/sorcsd2by1.f new file mode 100644 index 0000000..18a2a79 --- /dev/null +++ b/dspl/liblapack/SRC/sorcsd2by1.f @@ -0,0 +1,740 @@ +*> \brief \b SORCSD2BY1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORCSD2BY1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, +* X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, +* LDV1T, WORK, LWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T +* INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, +* $ M, P, Q +* .. +* .. Array Arguments .. +* REAL THETA(*) +* REAL U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* INTEGER IWORK(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> SORCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with +*> orthonormal columns that has been partitioned into a 2-by-1 block +*> structure: +*> +*> [ I1 0 0 ] +*> [ 0 C 0 ] +*> [ X11 ] [ U1 | ] [ 0 0 0 ] +*> X = [-----] = [---------] [----------] V1**T . +*> [ X21 ] [ | U2 ] [ 0 0 0 ] +*> [ 0 S 0 ] +*> [ 0 0 I2] +*> +*> X11 is P-by-Q. The orthogonal matrices U1, U2, and V1 are P-by-P, +*> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R +*> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which +*> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a +*> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is REAL array, dimension (LDX11,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is REAL array, dimension (LDX21,Q) +*> On entry, part of the orthogonal matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is REAL array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is REAL array, dimension (P) +*> If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is REAL array, dimension (M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is REAL array, dimension (Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> If INFO > 0 on exit, WORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: SBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, + $ X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, + $ LDV1T, WORK, LWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T + INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, + $ M, P, Q +* .. +* .. Array Arguments .. + REAL THETA(*) + REAL U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) + INTEGER IWORK(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ J, LBBCSD, LORBDB, LORGLQ, LORGLQMIN, + $ LORGLQOPT, LORGQR, LORGQRMIN, LORGQROPT, + $ LWORKMIN, LWORKOPT, R + LOGICAL LQUERY, WANTU1, WANTU2, WANTV1T +* .. +* .. Local Arrays .. + REAL DUM1(1), DUM2(1,1) +* .. +* .. External Subroutines .. + EXTERNAL SBBCSD, SCOPY, SLACPY, SLAPMR, SLAPMT, SORBDB1, + $ SORBDB2, SORBDB3, SORBDB4, SORGLQ, SORGQR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Function .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -4 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -5 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -6 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -10 + ELSE IF( WANTU1 .AND. LDU1 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF( WANTU2 .AND. LDU2 .LT. MAX( 1, M - P ) ) THEN + INFO = -15 + ELSE IF( WANTV1T .AND. LDV1T .LT. MAX( 1, Q ) ) THEN + INFO = -17 + END IF +* + R = MIN( P, M-P, Q, M-Q ) +* +* Compute workspace +* +* WORK layout: +* |-------------------------------------------------------| +* | LWORKOPT (1) | +* |-------------------------------------------------------| +* | PHI (MAX(1,R-1)) | +* |-------------------------------------------------------| +* | TAUP1 (MAX(1,P)) | B11D (R) | +* | TAUP2 (MAX(1,M-P)) | B11E (R-1) | +* | TAUQ1 (MAX(1,Q)) | B12D (R) | +* |-----------------------------------------| B12E (R-1) | +* | SORBDB WORK | SORGQR WORK | SORGLQ WORK | B21D (R) | +* | | | | B21E (R-1) | +* | | | | B22D (R) | +* | | | | B22E (R-1) | +* | | | | SBBCSD WORK | +* |-------------------------------------------------------| +* + IF( INFO .EQ. 0 ) THEN + IPHI = 2 + IB11D = IPHI + MAX( 1, R-1 ) + IB11E = IB11D + MAX( 1, R ) + IB12D = IB11E + MAX( 1, R - 1 ) + IB12E = IB12D + MAX( 1, R ) + IB21D = IB12E + MAX( 1, R - 1 ) + IB21E = IB21D + MAX( 1, R ) + IB22D = IB21E + MAX( 1, R - 1 ) + IB22E = IB22D + MAX( 1, R ) + IBBCSD = IB22E + MAX( 1, R - 1 ) + ITAUP1 = IPHI + MAX( 1, R-1 ) + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M-P ) + IORBDB = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ1 + MAX( 1, Q ) + IORGLQ = ITAUQ1 + MAX( 1, Q ) + LORGQRMIN = 1 + LORGQROPT = 1 + LORGLQMIN = 1 + LORGLQOPT = 1 + IF( R .EQ. Q ) THEN + CALL SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, WORK, -1, + $ CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SORGQR( P, P, Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + ENDIF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SORGQR( M-P, M-P, Q, U2, LDU2, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SORGLQ( Q-1, Q-1, Q-1, V1T, LDV1T, + $ DUM1, WORK(1), -1, CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q-1 ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL SBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ DUM1, U1, LDU1, U2, LDU2, V1T, LDV1T, DUM2, + $ 1, DUM1, DUM1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, WORK(1), -1, CHILDINFO + $ ) + LBBCSD = INT( WORK(1) ) + ELSE IF( R .EQ. P ) THEN + CALL SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SORGQR( P-1, P-1, P-1, U1(2,2), LDU1, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SORGQR( M-P, M-P, Q, U2, LDU2, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SORGLQ( Q, Q, R, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL SBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ DUM1, V1T, LDV1T, DUM2, 1, U1, LDU1, U2, + $ LDU2, DUM1, DUM1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, WORK(1), -1, CHILDINFO + $ ) + LBBCSD = INT( WORK(1) ) + ELSE IF( R .EQ. M-P ) THEN + CALL SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SORGQR( P, P, Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SORGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SORGLQ( Q, Q, R, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL SBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, DUM1, DUM2, 1, V1T, LDV1T, U2, LDU2, + $ U1, LDU1, DUM1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LBBCSD = INT( WORK(1) ) + ELSE + CALL SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ DUM1, DUM1, DUM1, DUM1, DUM1, + $ WORK(1), -1, CHILDINFO ) + LORBDB = M + INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SORGQR( P, P, M-Q, U1, LDU1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SORGQR( M-P, M-P, M-Q, U2, LDU2, DUM1, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SORGLQ( Q, Q, Q, V1T, LDV1T, DUM1, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL SBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, DUM1, U2, LDU2, U1, LDU1, DUM2, 1, + $ V1T, LDV1T, DUM1, DUM1, DUM1, DUM1, + $ DUM1, DUM1, DUM1, DUM1, WORK(1), -1, + $ CHILDINFO ) + LBBCSD = INT( WORK(1) ) + END IF + LWORKMIN = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQRMIN-1, + $ IORGLQ+LORGLQMIN-1, + $ IBBCSD+LBBCSD-1 ) + LWORKOPT = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQROPT-1, + $ IORGLQ+LORGLQOPT-1, + $ IBBCSD+LBBCSD-1 ) + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'SORCSD2BY1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF + LORGQR = LWORK-IORGQR+1 + LORGLQ = LWORK-IORGLQ+1 +* +* Handle four cases separately: R = Q, R = P, R = M-P, and R = M-Q, +* in which R = MIN(P,M-P,Q,M-Q) +* + IF( R .EQ. Q ) THEN +* +* Case 1: R = Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL SORBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL SORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL SORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + V1T(1,1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL SLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2), + $ LDV1T ) + CALL SORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL SBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ WORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, + $ DUM2, 1, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), + $ WORK(IB22D), WORK(IB22E), WORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place zero submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL SLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. P ) THEN +* +* Case 2: R = P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL SORBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + U1(1,1) = ONE + DO J = 2, P + U1(1,J) = ZERO + U1(J,1) = ZERO + END DO + CALL SLACPY( 'L', P-1, P-1, X11(2,1), LDX11, U1(2,2), LDU1 ) + CALL SORGQR( P-1, P-1, P-1, U1(2,2), LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL SORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SLACPY( 'U', P, Q, X11, LDX11, V1T, LDV1T ) + CALL SORGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL SBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ WORK(IPHI), V1T, LDV1T, DUM1, 1, U1, LDU1, U2, + $ LDU2, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), + $ WORK(IB22D), WORK(IB22E), WORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL SLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. M-P ) THEN +* +* Case 3: R = M-P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL SORBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL SORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + U2(1,1) = ONE + DO J = 2, M-P + U2(1,J) = ZERO + U2(J,1) = ZERO + END DO + CALL SLACPY( 'L', M-P-1, M-P-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL SORGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, + $ WORK(ITAUP2), WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SLACPY( 'U', M-P, Q, X21, LDX21, V1T, LDV1T ) + CALL SORGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL SBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, WORK(IPHI), DUM1, 1, V1T, LDV1T, U2, + $ LDU2, U1, LDU1, WORK(IB11D), WORK(IB11E), + $ WORK(IB12D), WORK(IB12E), WORK(IB21D), + $ WORK(IB21E), WORK(IB22D), WORK(IB22E), + $ WORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. R ) THEN + DO I = 1, R + IWORK(I) = Q - R + I + END DO + DO I = R + 1, Q + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL SLAPMT( .FALSE., P, Q, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL SLAPMR( .FALSE., Q, Q, V1T, LDV1T, IWORK ) + END IF + END IF + ELSE +* +* Case 4: R = M-Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL SORBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ WORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), WORK(IORBDB+M), + $ LORBDB-M, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL SCOPY( P, WORK(IORBDB), 1, U1, 1 ) + DO J = 2, P + U1(1,J) = ZERO + END DO + CALL SLACPY( 'L', P-1, M-Q-1, X11(2,1), LDX11, U1(2,2), + $ LDU1 ) + CALL SORGQR( P, P, M-Q, U1, LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL SCOPY( M-P, WORK(IORBDB+P), 1, U2, 1 ) + DO J = 2, M-P + U2(1,J) = ZERO + END DO + CALL SLACPY( 'L', M-P-1, M-Q-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL SORGQR( M-P, M-P, M-Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL SLACPY( 'U', M-Q, Q, X21, LDX21, V1T, LDV1T ) + CALL SLACPY( 'U', P-(M-Q), Q-(M-Q), X11(M-Q+1,M-Q+1), LDX11, + $ V1T(M-Q+1,M-Q+1), LDV1T ) + CALL SLACPY( 'U', -P+Q, Q-P, X21(M-Q+1,P+1), LDX21, + $ V1T(P+1,P+1), LDV1T ) + CALL SORGLQ( Q, Q, Q, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL SBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, WORK(IPHI), U2, LDU2, U1, LDU1, DUM1, 1, + $ V1T, LDV1T, WORK(IB11D), WORK(IB11E), WORK(IB12D), + $ WORK(IB12E), WORK(IB21D), WORK(IB21E), + $ WORK(IB22D), WORK(IB22E), WORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( P .GT. R ) THEN + DO I = 1, R + IWORK(I) = P - R + I + END DO + DO I = R + 1, P + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL SLAPMT( .FALSE., P, P, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL SLAPMR( .FALSE., P, Q, V1T, LDV1T, IWORK ) + END IF + END IF + END IF +* + RETURN +* +* End of SORCSD2BY1 +* + END + diff --git a/dspl/liblapack/SRC/sorg2l.f b/dspl/liblapack/SRC/sorg2l.f new file mode 100644 index 0000000..e26c70d --- /dev/null +++ b/dspl/liblapack/SRC/sorg2l.f @@ -0,0 +1,198 @@ +*> \brief \b SORG2L generates all or part of the orthogonal matrix Q from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORG2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORG2L generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL SLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL SSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2L +* + END diff --git a/dspl/liblapack/SRC/sorg2r.f b/dspl/liblapack/SRC/sorg2r.f new file mode 100644 index 0000000..a301f60 --- /dev/null +++ b/dspl/liblapack/SRC/sorg2r.f @@ -0,0 +1,200 @@ +*> \brief \b SORG2R generates all or part of the orthogonal matrix Q from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORG2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORG2R generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL SSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORG2R +* + END diff --git a/dspl/liblapack/SRC/sorgbr.f b/dspl/liblapack/SRC/sorgbr.f new file mode 100644 index 0000000..dccdbb5 --- /dev/null +++ b/dspl/liblapack/SRC/sorgbr.f @@ -0,0 +1,337 @@ +*> \brief \b SORGBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGBR generates one of the real orthogonal matrices Q or P**T +*> determined by SGEBRD when reducing a real matrix A to bidiagonal +*> form: A = Q * B * P**T. Q and P**T are defined as products of +*> elementary reflectors H(i) or G(i) respectively. +*> +*> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +*> is of order M: +*> if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n +*> columns of Q, where m >= n >= k; +*> if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an +*> M-by-M matrix. +*> +*> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T +*> is of order N: +*> if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m +*> rows of P**T, where n >= m >= k; +*> if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as +*> an N-by-N matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether the matrix Q or the matrix P**T is +*> required, as defined in the transformation applied by SGEBRD: +*> = 'Q': generate Q; +*> = 'P': generate P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q or P**T to be returned. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q or P**T to be returned. +*> N >= 0. +*> If VECT = 'Q', M >= N >= min(M,K); +*> if VECT = 'P', N >= M >= min(N,K). +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original M-by-K +*> matrix reduced by SGEBRD. +*> If VECT = 'P', the number of rows in the original K-by-N +*> matrix reduced by SGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by SGEBRD. +*> On exit, the M-by-N matrix Q or P**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension +*> (min(M,K)) if VECT = 'Q' +*> (min(N,K)) if VECT = 'P' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i), which determines Q or P**T, as +*> returned by SGEBRD in its array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,min(M,N)). +*> For optimum performance LWORK >= min(M,N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realGBcomputational +* +* ===================================================================== + SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGLQ, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = 1 + IF( WANTQ ) THEN + IF( M.GE.K ) THEN + CALL SORGQR( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( M.GT.1 ) THEN + CALL SORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + ELSE + IF( K.LT.N ) THEN + CALL SORGLQ( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( N.GT.1 ) THEN + CALL SORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + END IF + LWKOPT = WORK( 1 ) + LWKOPT = MAX (LWKOPT, MN) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to SGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL SORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P**T, determined by a call to SGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P**T to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P**T(2:n,2:n) +* + CALL SORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGBR +* + END diff --git a/dspl/liblapack/SRC/sorghr.f b/dspl/liblapack/SRC/sorghr.f new file mode 100644 index 0000000..3efea72 --- /dev/null +++ b/dspl/liblapack/SRC/sorghr.f @@ -0,0 +1,240 @@ +*> \brief \b SORGHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGHR generates a real orthogonal matrix Q which is defined as the +*> product of IHI-ILO elementary reflectors of order N, as returned by +*> SGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of SGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by SGEHRD. +*> On exit, the N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEHRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= IHI-ILO. +*> For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL SORGQR, XERBLA +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SORGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL SORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGHR +* + END diff --git a/dspl/liblapack/SRC/sorgl2.f b/dspl/liblapack/SRC/sorgl2.f new file mode 100644 index 0000000..276a01e --- /dev/null +++ b/dspl/liblapack/SRC/sorgl2.f @@ -0,0 +1,204 @@ +*> \brief \b SORGL2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGL2 generates an m by n real matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by SGELQF in the first k rows of its array argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAU( I ), A( I+1, I ), LDA, WORK ) + END IF + CALL SSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - TAU( I ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORGL2 +* + END diff --git a/dspl/liblapack/SRC/sorglq.f b/dspl/liblapack/SRC/sorglq.f new file mode 100644 index 0000000..90806fc --- /dev/null +++ b/dspl/liblapack/SRC/sorglq.f @@ -0,0 +1,289 @@ +*> \brief \b SORGLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGLQ generates an M-by-N real matrix Q with orthonormal rows, +*> which is defined as the first M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by SGELQF in the first k rows of its array argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORGL2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL SORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(i+ib:m,i:n) from the right +* + CALL SLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise', + $ M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK, + $ LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H**T to columns i:n of current block +* + CALL SORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGLQ +* + END diff --git a/dspl/liblapack/SRC/sorgql.f b/dspl/liblapack/SRC/sorgql.f new file mode 100644 index 0000000..b46b00c --- /dev/null +++ b/dspl/liblapack/SRC/sorgql.f @@ -0,0 +1,296 @@ +*> \brief \b SORGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGQL generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'SORGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL SORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL SORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQL +* + END diff --git a/dspl/liblapack/SRC/sorgqr.f b/dspl/liblapack/SRC/sorgqr.f new file mode 100644 index 0000000..dbc9faf --- /dev/null +++ b/dspl/liblapack/SRC/sorgqr.f @@ -0,0 +1,290 @@ +*> \brief \b SORGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGQR generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORG2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL SORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL SLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL SORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGQR +* + END diff --git a/dspl/liblapack/SRC/sorgr2.f b/dspl/liblapack/SRC/sorgr2.f new file mode 100644 index 0000000..5a03e7d --- /dev/null +++ b/dspl/liblapack/SRC/sorgr2.f @@ -0,0 +1,202 @@ +*> \brief \b SORGR2 generates all or part of the orthogonal matrix Q from an RQ factorization determined by sgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGR2 generates an m by n real matrix Q with orthonormal rows, +*> which is defined as the last m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGERQF in the last k rows of its array argument +*> A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL SLARF, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the right +* + A( II, N-M+II ) = ONE + CALL SLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ), + $ A, LDA, WORK ) + CALL SSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - TAU( I ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of SORGR2 +* + END diff --git a/dspl/liblapack/SRC/sorgrq.f b/dspl/liblapack/SRC/sorgrq.f new file mode 100644 index 0000000..b5ecdea --- /dev/null +++ b/dspl/liblapack/SRC/sorgrq.f @@ -0,0 +1,296 @@ +*> \brief \b SORGRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGRQ generates an M-by-N real matrix Q with orthonormal rows, +*> which is defined as the last M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by SGERQF in the last k rows of its array argument +*> A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORGR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'SORGRQ', ' ', M, N, K, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SORGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL SORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**T to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL SLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise', + $ II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK, + $ LDWORK, A, LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H**T to columns 1:n-k+i+ib-1 of current block +* + CALL SORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of SORGRQ +* + END diff --git a/dspl/liblapack/SRC/sorgtr.f b/dspl/liblapack/SRC/sorgtr.f new file mode 100644 index 0000000..da9a655 --- /dev/null +++ b/dspl/liblapack/SRC/sorgtr.f @@ -0,0 +1,255 @@ +*> \brief \b SORGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORGTR generates a real orthogonal matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> SSYTRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from SSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from SSYTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by SSYTRD. +*> On exit, the N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SSYTRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N-1). +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORGQL, SORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NB = ILAENV( 1, 'SORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'SORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL SORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to SSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL SORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORGTR +* + END diff --git a/dspl/liblapack/SRC/sorm22.f b/dspl/liblapack/SRC/sorm22.f new file mode 100644 index 0000000..265e976 --- /dev/null +++ b/dspl/liblapack/SRC/sorm22.f @@ -0,0 +1,441 @@ +*> \brief \b SORM22 multiplies a general matrix by a banded orthogonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORM22 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. +* REAL Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose +* ============ +*> +*> \verbatim +*> +*> +*> SORM22 overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order NQ, with NQ = M if +*> SIDE = 'L' and NQ = N if SIDE = 'R'. +*> The orthogonal matrix Q processes a 2-by-2 block structure +*> +*> [ Q11 Q12 ] +*> Q = [ ] +*> [ Q21 Q22 ], +*> +*> where Q12 is an N1-by-N1 lower triangular matrix and Q21 is an +*> N2-by-N2 upper triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose); +*> = 'C': apply Q**T (Conjugate transpose). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \param[in] N2 +*> \verbatim +*> N1 is INTEGER +*> N2 is INTEGER +*> The dimension of Q12 and Q21, respectively. N1, N2 >= 0. +*> The following requirement must be satisfied: +*> N1 + N2 = M if SIDE = 'L' and N1 + N2 = N if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is REAL array, dimension +*> (LDQ,M) if SIDE = 'L' +*> (LDQ,N) if SIDE = 'R' +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if SIDE = 'L'; LDQ >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= M*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, LDWORK, LEN, LWKOPT, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, STRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC REAL, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q; +* NW is the minimum dimension of WORK. +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + NW = NQ + IF( N1.EQ.0 .OR. N2.EQ.0 ) NW = 1 + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N1.LT.0 .OR. N1+N2.NE.NQ ) THEN + INFO = -5 + ELSE IF( N2.LT.0 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = M*N + WORK( 1 ) = REAL( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM22', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Degenerate cases (N1 = 0 or N2 = 0) are handled using STRMM. +* + IF( N1.EQ.0 ) THEN + CALL STRMM( SIDE, 'Upper', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + ELSE IF( N2.EQ.0 ) THEN + CALL STRMM( SIDE, 'Lower', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + END IF +* +* Compute the largest chunk size available from the workspace. +* + NB = MAX( 1, MIN( LWORK, LWKOPT ) / NQ ) +* + IF( LEFT ) THEN + IF( NOTRAN ) THEN + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q12. +* + CALL SLACPY( 'All', N1, LEN, C( N2+1, I ), LDC, WORK, + $ LDWORK ) + CALL STRMM( 'Left', 'Lower', 'No Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11. +* + CALL SGEMM( 'No Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q21. +* + CALL SLACPY( 'All', N2, LEN, C( 1, I ), LDC, + $ WORK( N1+1 ), LDWORK ) + CALL STRMM( 'Left', 'Upper', 'No Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( N1+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22. +* + CALL SGEMM( 'No Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N2+1, I ), LDC, + $ ONE, WORK( N1+1 ), LDWORK ) +* +* Copy everything back. +* + CALL SLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + ELSE + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q21**T. +* + CALL SLACPY( 'All', N2, LEN, C( N1+1, I ), LDC, WORK, + $ LDWORK ) + CALL STRMM( 'Left', 'Upper', 'Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11**T. +* + CALL SGEMM( 'Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q12**T. +* + CALL SLACPY( 'All', N1, LEN, C( 1, I ), LDC, + $ WORK( N2+1 ), LDWORK ) + CALL STRMM( 'Left', 'Lower', 'Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( N2+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22**T. +* + CALL SGEMM( 'Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N1+1, I ), LDC, + $ ONE, WORK( N2+1 ), LDWORK ) +* +* Copy everything back. +* + CALL SLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + END IF + ELSE + IF( NOTRAN ) THEN + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q21. +* + CALL SLACPY( 'All', LEN, N2, C( I, N1+1 ), LDC, WORK, + $ LDWORK ) + CALL STRMM( 'Right', 'Upper', 'No Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11. +* + CALL SGEMM( 'No Transpose', 'No Transpose', LEN, N2, N1, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q12. +* + CALL SLACPY( 'All', LEN, N1, C( I, 1 ), LDC, + $ WORK( 1 + N2*LDWORK ), LDWORK ) + CALL STRMM( 'Right', 'Lower', 'No Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22. +* + CALL SGEMM( 'No Transpose', 'No Transpose', LEN, N1, N2, + $ ONE, C( I, N1+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL SLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + ELSE + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q12**T. +* + CALL SLACPY( 'All', LEN, N1, C( I, N2+1 ), LDC, WORK, + $ LDWORK ) + CALL STRMM( 'Right', 'Lower', 'Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11**T. +* + CALL SGEMM( 'No Transpose', 'Transpose', LEN, N1, N2, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q21**T. +* + CALL SLACPY( 'All', LEN, N2, C( I, 1 ), LDC, + $ WORK( 1 + N1*LDWORK ), LDWORK ) + CALL STRMM( 'Right', 'Upper', 'Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22**T. +* + CALL SGEMM( 'No Transpose', 'Transpose', LEN, N2, N1, + $ ONE, C( I, N2+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL SLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + END IF + END IF +* + WORK( 1 ) = REAL( LWKOPT ) + RETURN +* +* End of SORM22 +* + END diff --git a/dspl/liblapack/SRC/sorm2l.f b/dspl/liblapack/SRC/sorm2l.f new file mode 100644 index 0000000..0542ae2 --- /dev/null +++ b/dspl/liblapack/SRC/sorm2l.f @@ -0,0 +1,278 @@ +*> \brief \b SORM2L multiplies a general matrix by the orthogonal matrix from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORM2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORM2L overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T * C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGEQLF in the last k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC, + $ WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORM2L +* + END diff --git a/dspl/liblapack/SRC/sorm2r.f b/dspl/liblapack/SRC/sorm2r.f new file mode 100644 index 0000000..61560d6 --- /dev/null +++ b/dspl/liblapack/SRC/sorm2r.f @@ -0,0 +1,282 @@ +*> \brief \b SORM2R multiplies a general matrix by the orthogonal matrix from a QR factorization determined by sgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORM2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORM2R overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGEQRF in the first k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORM2R +* + END diff --git a/dspl/liblapack/SRC/sormbr.f b/dspl/liblapack/SRC/sormbr.f new file mode 100644 index 0000000..425e04c --- /dev/null +++ b/dspl/liblapack/SRC/sormbr.f @@ -0,0 +1,374 @@ +*> \brief \b SORMBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, VECT +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': P * C C * P +*> TRANS = 'T': P**T * C C * P**T +*> +*> Here Q and P**T are the orthogonal matrices determined by SGEBRD when +*> reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and +*> P**T are defined as products of elementary reflectors H(i) and G(i) +*> respectively. +*> +*> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +*> order of the orthogonal matrix Q or P**T that is applied. +*> +*> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +*> if nq >= k, Q = H(1) H(2) . . . H(k); +*> if nq < k, Q = H(1) H(2) . . . H(nq-1). +*> +*> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +*> if k < nq, P = G(1) G(2) . . . G(k); +*> if k >= nq, P = G(1) G(2) . . . G(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'Q': apply Q or Q**T; +*> = 'P': apply P or P**T. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q, Q**T, P or P**T from the Left; +*> = 'R': apply Q, Q**T, P or P**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q or P; +*> = 'T': Transpose, apply Q**T or P**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original +*> matrix reduced by SGEBRD. +*> If VECT = 'P', the number of rows in the original +*> matrix reduced by SGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,min(nq,K)) if VECT = 'Q' +*> (LDA,nq) if VECT = 'P' +*> The vectors which define the elementary reflectors H(i) and +*> G(i), whose products determine the matrices Q and P, as +*> returned by SGEBRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If VECT = 'Q', LDA >= max(1,nq); +*> if VECT = 'P', LDA >= max(1,min(nq,K)). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (min(nq,K)) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i) which determines Q or P, as returned +*> by SGEBRD in the array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q +*> or P*C or P**T*C or C*P or C*P**T. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMLQ, SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + WORK( 1 ) = 1 + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to SGEBRD with nq >= k +* + CALL SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to SGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to SGEBRD with nq > k +* + CALL SORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to SGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL SORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMBR +* + END diff --git a/dspl/liblapack/SRC/sormhr.f b/dspl/liblapack/SRC/sormhr.f new file mode 100644 index 0000000..10e0b3c --- /dev/null +++ b/dspl/liblapack/SRC/sormhr.f @@ -0,0 +1,296 @@ +*> \brief \b SORMHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMHR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> IHI-ILO elementary reflectors, as returned by SGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of SGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +*> ILO = 1 and IHI = 0, if M = 0; +*> if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +*> ILO = 1 and IHI = 0, if N = 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by SGEHRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEHRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL SORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMHR +* + END diff --git a/dspl/liblapack/SRC/sorml2.f b/dspl/liblapack/SRC/sorml2.f new file mode 100644 index 0000000..b9242ce --- /dev/null +++ b/dspl/liblapack/SRC/sorml2.f @@ -0,0 +1,282 @@ +*> \brief \b SORML2 multiplies a general matrix by the orthogonal matrix from a LQ factorization determined by sgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORML2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORML2 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGELQF in the first k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) +* + AII = A( I, I ) + A( I, I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of SORML2 +* + END diff --git a/dspl/liblapack/SRC/sormlq.f b/dspl/liblapack/SRC/sormlq.f new file mode 100644 index 0000000..5cc4b87 --- /dev/null +++ b/dspl/liblapack/SRC/sormlq.f @@ -0,0 +1,349 @@ +*> \brief \b SORMLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGELQF in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORML2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMLQ +* + END diff --git a/dspl/liblapack/SRC/sormql.f b/dspl/liblapack/SRC/sormql.f new file mode 100644 index 0000000..5162569 --- /dev/null +++ b/dspl/liblapack/SRC/sormql.f @@ -0,0 +1,341 @@ +*> \brief \b SORMQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMQL overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGEQLF in the last k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORM2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**T is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**T +* + CALL SLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMQL +* + END diff --git a/dspl/liblapack/SRC/sormqr.f b/dspl/liblapack/SRC/sormqr.f new file mode 100644 index 0000000..5132f1a --- /dev/null +++ b/dspl/liblapack/SRC/sormqr.f @@ -0,0 +1,342 @@ +*> \brief \b SORMQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGEQRF in the first k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORM2R, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMQR +* + END diff --git a/dspl/liblapack/SRC/sormr2.f b/dspl/liblapack/SRC/sormr2.f new file mode 100644 index 0000000..8062115 --- /dev/null +++ b/dspl/liblapack/SRC/sormr2.f @@ -0,0 +1,278 @@ +*> \brief \b SORMR2 multiplies a general matrix by the orthogonal matrix from a RQ factorization determined by sgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMR2 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'T', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'T', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGERQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q' (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGERQF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + REAL AII +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) ) + $ THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) +* + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL SLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC, + $ WORK ) + A( I, NQ-K+I ) = AII + 10 CONTINUE + RETURN +* +* End of SORMR2 +* + END diff --git a/dspl/liblapack/SRC/sormr3.f b/dspl/liblapack/SRC/sormr3.f new file mode 100644 index 0000000..461957a --- /dev/null +++ b/dspl/liblapack/SRC/sormr3.f @@ -0,0 +1,299 @@ +*> \brief \b SORMR3 multiplies a general matrix by the orthogonal matrix from a RZ factorization determined by stzrzf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMR3 overwrites the general real m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**T* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**T if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by STZRZF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left +*> = 'R': apply Q or Q**T from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'T': apply Q**T (Transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> STZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by STZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**T +* + CALL SLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ), + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of SORMR3 +* + END diff --git a/dspl/liblapack/SRC/sormrq.f b/dspl/liblapack/SRC/sormrq.f new file mode 100644 index 0000000..ec7775b --- /dev/null +++ b/dspl/liblapack/SRC/sormrq.f @@ -0,0 +1,348 @@ +*> \brief \b SORMRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMRQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by SGERQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> SGERQF in the last k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, SLARFT, SORMR2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**T is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**T is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**T +* + CALL SLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMRQ +* + END diff --git a/dspl/liblapack/SRC/sormrz.f b/dspl/liblapack/SRC/sormrz.f new file mode 100644 index 0000000..e86d1aa --- /dev/null +++ b/dspl/liblapack/SRC/sormrz.f @@ -0,0 +1,379 @@ +*> \brief \b SORMRZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMRZ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by STZRZF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> STZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by STZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLARZB, SLARZT, SORMR3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), WORK( IWT ), LDT ) +* + IF( LEFT ) THEN +* +* H or H**T is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**T is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**T +* + CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SORMRZ +* + END diff --git a/dspl/liblapack/SRC/sormtr.f b/dspl/liblapack/SRC/sormtr.f new file mode 100644 index 0000000..5d46ff9 --- /dev/null +++ b/dspl/liblapack/SRC/sormtr.f @@ -0,0 +1,312 @@ +*> \brief \b SORMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SORMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( LDC, * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SORMTR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**T * C C * Q**T +*> +*> where Q is a real orthogonal matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by SSYTRD: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from SSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from SSYTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by SSYTRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is REAL array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by SSYTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( LDC, * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NI, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SORMQL, SORMQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SORMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to SSYTRD with UPLO = 'U' +* + CALL SORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to SSYTRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of SORMTR +* + END diff --git a/dspl/liblapack/SRC/spbcon.f b/dspl/liblapack/SRC/spbcon.f new file mode 100644 index 0000000..a32e605 --- /dev/null +++ b/dspl/liblapack/SRC/spbcon.f @@ -0,0 +1,271 @@ +*> \brief \b SPBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite band matrix using the +*> Cholesky factorization A = U**T*U or A = L*L**T computed by SPBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the symmetric band matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ), + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL SLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ), + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of SPBCON +* + END diff --git a/dspl/liblapack/SRC/spbequ.f b/dspl/liblapack/SRC/spbequ.f new file mode 100644 index 0000000..6379831 --- /dev/null +++ b/dspl/liblapack/SRC/spbequ.f @@ -0,0 +1,242 @@ +*> \brief \b SPBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite band matrix A and reduce its condition +*> number (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular of A is stored; +*> = 'L': Lower triangular of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AB( J, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = AB( J, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPBEQU +* + END diff --git a/dspl/liblapack/SRC/spbrfs.f b/dspl/liblapack/SRC/spbrfs.f new file mode 100644 index 0000000..1f83244 --- /dev/null +++ b/dspl/liblapack/SRC/spbrfs.f @@ -0,0 +1,443 @@ +*> \brief \b SPBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, +* LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and banded, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A as computed by +*> SPBTRF, in the same storage format as A (see AB). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SPBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, SPBTRS, SSBMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK + S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( N+I )*WORK( I ) + 120 CONTINUE + CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPBRFS +* + END diff --git a/dspl/liblapack/SRC/spbstf.f b/dspl/liblapack/SRC/spbstf.f new file mode 100644 index 0000000..dba3d70 --- /dev/null +++ b/dspl/liblapack/SRC/spbstf.f @@ -0,0 +1,319 @@ +*> \brief \b SPBSTF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBSTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBSTF computes a split Cholesky factorization of a real +*> symmetric positive definite band matrix A. +*> +*> This routine is designed to be used in conjunction with SSBGST. +*> +*> The factorization has the form A = S**T*S where S is a band matrix +*> of the same bandwidth as A and the following structure: +*> +*> S = ( U ) +*> ( M L ) +*> +*> where U is upper triangular of order m = (n+kd)/2, and L is lower +*> triangular of order n-m. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first kd+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the factor S from the split Cholesky +*> factorization A = S**T*S. See Further Details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the factorization could not be completed, +*> because the updated element a(i,i) was negative; the +*> matrix A is not positive definite. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 7, KD = 2: +*> +*> S = ( s11 s12 s13 ) +*> ( s22 s23 s24 ) +*> ( s33 s34 ) +*> ( s44 ) +*> ( s53 s54 s55 ) +*> ( s64 s65 s66 ) +*> ( s75 s76 s77 ) +*> +*> If UPLO = 'U', the array AB holds: +*> +*> on entry: on exit: +*> +*> * * a13 a24 a35 a46 a57 * * s13 s24 s53 s64 s75 +*> * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54 s65 s76 +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> +*> If UPLO = 'L', the array AB holds: +*> +*> on entry: on exit: +*> +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> a21 a32 a43 a54 a65 a76 * s12 s23 s34 s54 s65 s76 * +*> a31 a42 a53 a64 a64 * * s13 s24 s53 s64 s75 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL SSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL SSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL SSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL SSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL SSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL SSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**T*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 50 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL SSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL SSYR( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of SPBSTF +* + END diff --git a/dspl/liblapack/SRC/spbsv.f b/dspl/liblapack/SRC/spbsv.f new file mode 100644 index 0000000..aab1c74 --- /dev/null +++ b/dspl/liblapack/SRC/spbsv.f @@ -0,0 +1,229 @@ +*> \brief SPBSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix, with the same number of superdiagonals or +*> subdiagonals as A. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPBTRF, SPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPBSV +* + END diff --git a/dspl/liblapack/SRC/spbsvx.f b/dspl/liblapack/SRC/spbsvx.f new file mode 100644 index 0000000..c43a079 --- /dev/null +++ b/dspl/liblapack/SRC/spbsvx.f @@ -0,0 +1,545 @@ +*> \brief SPBSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, +* EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), S( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AB and AFB will not +*> be modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array, except +*> if FACT = 'F' and EQUED = 'Y', then A must contain the +*> equilibrated matrix diag(S)*A*diag(S). The j-th column of A +*> is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is REAL array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the band matrix +*> A, in the same storage format as A (see AB). If EQUED = 'Y', +*> then AFB is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 +*> a22 a23 a24 +*> a33 a34 a35 +*> a44 a45 a46 +*> a55 a56 +*> (aij=conjg(aji)) a66 +*> +*> Band storage of the upper triangle of A: +*> +*> * * a13 a24 a35 a46 +*> * a12 a23 a34 a45 a56 +*> a11 a22 a33 a44 a55 a66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> a11 a22 a33 a44 a55 a66 +*> a21 a32 a43 a54 a65 * +*> a31 a42 a53 a64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAQSB, SPBCON, SPBEQU, SPBRFS, + $ SPBTRF, SPBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T *U or A = L*L**T. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL SCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL SCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL SPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSB( '1', UPLO, N, KD, AB, LDAB, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SPBSVX +* + END diff --git a/dspl/liblapack/SRC/spbtf2.f b/dspl/liblapack/SRC/spbtf2.f new file mode 100644 index 0000000..6da0108 --- /dev/null +++ b/dspl/liblapack/SRC/spbtf2.f @@ -0,0 +1,263 @@ +*> \brief \b SPBTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite band matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBTF2 computes the Cholesky factorization of a real symmetric +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**T * U , if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix, U**T is the transpose of U, and +*> L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AB( KD+1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL SSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AB( 1, J ) + IF( AJJ.LE.ZERO ) + $ GO TO 30 + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL SSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL SSYR( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of SPBTF2 +* + END diff --git a/dspl/liblapack/SRC/spbtrf.f b/dspl/liblapack/SRC/spbtrf.f new file mode 100644 index 0000000..3cd39fa --- /dev/null +++ b/dspl/liblapack/SRC/spbtrf.f @@ -0,0 +1,435 @@ +*> \brief \b SPBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBTRF computes the Cholesky factorization of a real symmetric +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== + SUBROUTINE SPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + REAL WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPBTF2, SPOTF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'SPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL SPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I2, ONE, AB( KD+1, I ), + $ LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Upper', 'Transpose', I2, IB, -ONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL STRSM( 'Left', 'Upper', 'Transpose', + $ 'Non-unit', IB, I3, ONE, AB( KD+1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'Transpose', 'No Transpose', I2, I3, + $ IB, -ONE, AB( KD+1-IB, I+IB ), + $ LDAB-1, WORK, LDWORK, ONE, + $ AB( 1+IB, I+KD ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Upper', 'Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( KD+1, I+KD ), + $ LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a symmetric band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL SPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I2, IB, ONE, AB( 1, I ), + $ LDAB-1, AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL SSYRK( 'Lower', 'No Transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL STRSM( 'Right', 'Lower', 'Transpose', + $ 'Non-unit', I3, IB, ONE, AB( 1, I ), + $ LDAB-1, WORK, LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL SGEMM( 'No transpose', 'Transpose', I3, I2, + $ IB, -ONE, WORK, LDWORK, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1+KD-IB, I+IB ), LDAB-1 ) +* +* Update A33 +* + CALL SSYRK( 'Lower', 'No Transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of SPBTRF +* + END diff --git a/dspl/liblapack/SRC/spbtrs.f b/dspl/liblapack/SRC/spbtrs.f new file mode 100644 index 0000000..81ddaab --- /dev/null +++ b/dspl/liblapack/SRC/spbtrs.f @@ -0,0 +1,220 @@ +*> \brief \b SPBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPBTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite band matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by SPBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T *U. +* + DO 10 J = 1, NRHS +* +* Solve U**T *X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L**T. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL STBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of SPBTRS +* + END diff --git a/dspl/liblapack/SRC/spftrf.f b/dspl/liblapack/SRC/spftrf.f new file mode 100644 index 0000000..6f48c00 --- /dev/null +++ b/dspl/liblapack/SRC/spftrf.f @@ -0,0 +1,457 @@ +*> \brief \b SPFTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPFTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* REAL A( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPFTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension ( N*(N+1)/2 ); +*> On entry, the symmetric matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the NT elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization RFP A = U**T*U or RFP A = L*L**T. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER N, INFO +* .. +* .. Array Arguments .. + REAL A( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYRK, SPOTRF, STRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPFTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL SPOTRF( 'L', N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'R', 'L', 'T', 'N', N2, N1, ONE, A( 0 ), N, + $ A( N1 ), N ) + CALL SSYRK( 'U', 'N', N2, N1, -ONE, A( N1 ), N, ONE, + $ A( N ), N ) + CALL SPOTRF( 'U', N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL SPOTRF( 'L', N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'L', 'L', 'N', 'N', N1, N2, ONE, A( N2 ), N, + $ A( 0 ), N ) + CALL SSYRK( 'U', 'T', N2, N1, -ONE, A( 0 ), N, ONE, + $ A( N1 ), N ) + CALL SPOTRF( 'U', N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + CALL SPOTRF( 'U', N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'L', 'U', 'T', 'N', N1, N2, ONE, A( 0 ), N1, + $ A( N1*N1 ), N1 ) + CALL SSYRK( 'L', 'T', N2, N1, -ONE, A( N1*N1 ), N1, ONE, + $ A( 1 ), N1 ) + CALL SPOTRF( 'L', N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + CALL SPOTRF( 'U', N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'R', 'U', 'N', 'N', N2, N1, ONE, A( N2*N2 ), + $ N2, A( 0 ), N2 ) + CALL SSYRK( 'L', 'N', N2, N1, -ONE, A( 0 ), N2, ONE, + $ A( N1*N2 ), N2 ) + CALL SPOTRF( 'L', N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL SPOTRF( 'L', K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'R', 'L', 'T', 'N', K, K, ONE, A( 1 ), N+1, + $ A( K+1 ), N+1 ) + CALL SSYRK( 'U', 'N', K, K, -ONE, A( K+1 ), N+1, ONE, + $ A( 0 ), N+1 ) + CALL SPOTRF( 'U', K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL SPOTRF( 'L', K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'L', 'L', 'N', 'N', K, K, ONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL SSYRK( 'U', 'T', K, K, -ONE, A( 0 ), N+1, ONE, + $ A( K ), N+1 ) + CALL SPOTRF( 'U', K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL SPOTRF( 'U', K, A( 0+K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'L', 'U', 'T', 'N', K, K, ONE, A( K ), N1, + $ A( K*( K+1 ) ), K ) + CALL SSYRK( 'L', 'T', K, K, -ONE, A( K*( K+1 ) ), K, ONE, + $ A( 0 ), K ) + CALL SPOTRF( 'L', K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL SPOTRF( 'U', K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRSM( 'R', 'U', 'N', 'N', K, K, ONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL SSYRK( 'L', 'N', K, K, -ONE, A( 0 ), K, ONE, + $ A( K*K ), K ) + CALL SPOTRF( 'L', K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of SPFTRF +* + END diff --git a/dspl/liblapack/SRC/spftri.f b/dspl/liblapack/SRC/spftri.f new file mode 100644 index 0000000..96c99a0 --- /dev/null +++ b/dspl/liblapack/SRC/spftri.f @@ -0,0 +1,423 @@ +*> \brief \b SPFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. Array Arguments .. +* REAL A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPFTRI computes the inverse of a real (symmetric) positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by SPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension ( N*(N+1)/2 ) +*> On entry, the symmetric matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, the symmetric inverse of the original matrix, in the +*> same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. Array Arguments .. + REAL A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, STFTRI, SLAUUM, STRMM, SSYRK +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STFTRI( TRANSR, UPLO, 'N', N, A, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or +* inv(L)^C*inv(L). There are eight cases. +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) +* T1 -> a(0), T2 -> a(n), S -> a(N1) +* + CALL SLAUUM( 'L', N1, A( 0 ), N, INFO ) + CALL SSYRK( 'L', 'T', N1, N2, ONE, A( N1 ), N, ONE, + $ A( 0 ), N ) + CALL STRMM( 'L', 'U', 'N', 'N', N2, N1, ONE, A( N ), N, + $ A( N1 ), N ) + CALL SLAUUM( 'U', N2, A( N ), N, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) +* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) +* T1 -> a(N2), T2 -> a(N1), S -> a(0) +* + CALL SLAUUM( 'L', N1, A( N2 ), N, INFO ) + CALL SSYRK( 'L', 'N', N1, N2, ONE, A( 0 ), N, ONE, + $ A( N2 ), N ) + CALL STRMM( 'R', 'U', 'T', 'N', N1, N2, ONE, A( N1 ), N, + $ A( 0 ), N ) + CALL SLAUUM( 'U', N2, A( N1 ), N, INFO ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) +* + CALL SLAUUM( 'U', N1, A( 0 ), N1, INFO ) + CALL SSYRK( 'U', 'N', N1, N2, ONE, A( N1*N1 ), N1, ONE, + $ A( 0 ), N1 ) + CALL STRMM( 'R', 'L', 'N', 'N', N1, N2, ONE, A( 1 ), N1, + $ A( N1*N1 ), N1 ) + CALL SLAUUM( 'L', N2, A( 1 ), N1, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is odd +* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) +* + CALL SLAUUM( 'U', N1, A( N2*N2 ), N2, INFO ) + CALL SSYRK( 'U', 'T', N1, N2, ONE, A( 0 ), N2, ONE, + $ A( N2*N2 ), N2 ) + CALL STRMM( 'L', 'L', 'T', 'N', N2, N1, ONE, A( N1*N2 ), + $ N2, A( 0 ), N2 ) + CALL SLAUUM( 'L', N2, A( N1*N2 ), N2, INFO ) +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL SLAUUM( 'L', K, A( 1 ), N+1, INFO ) + CALL SSYRK( 'L', 'T', K, K, ONE, A( K+1 ), N+1, ONE, + $ A( 1 ), N+1 ) + CALL STRMM( 'L', 'U', 'N', 'N', K, K, ONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) + CALL SLAUUM( 'U', K, A( 0 ), N+1, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL SLAUUM( 'L', K, A( K+1 ), N+1, INFO ) + CALL SSYRK( 'L', 'N', K, K, ONE, A( 0 ), N+1, ONE, + $ A( K+1 ), N+1 ) + CALL STRMM( 'R', 'U', 'T', 'N', K, K, ONE, A( K ), N+1, + $ A( 0 ), N+1 ) + CALL SLAUUM( 'U', K, A( K ), N+1, INFO ) +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL SLAUUM( 'U', K, A( K ), K, INFO ) + CALL SSYRK( 'U', 'N', K, K, ONE, A( K*( K+1 ) ), K, ONE, + $ A( K ), K ) + CALL STRMM( 'R', 'L', 'N', 'N', K, K, ONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + CALL SLAUUM( 'L', K, A( 0 ), K, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL SLAUUM( 'U', K, A( K*( K+1 ) ), K, INFO ) + CALL SSYRK( 'U', 'T', K, K, ONE, A( 0 ), K, ONE, + $ A( K*( K+1 ) ), K ) + CALL STRMM( 'L', 'L', 'T', 'N', K, K, ONE, A( K*K ), K, + $ A( 0 ), K ) + CALL SLAUUM( 'L', K, A( K*K ), K, INFO ) +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of SPFTRI +* + END diff --git a/dspl/liblapack/SRC/spftrs.f b/dspl/liblapack/SRC/spftrs.f new file mode 100644 index 0000000..e6710e2 --- /dev/null +++ b/dspl/liblapack/SRC/spftrs.f @@ -0,0 +1,280 @@ +*> \brief \b SPFTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPFTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( 0: * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPFTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by SPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension ( N*(N+1)/2 ) +*> The triangular factor U or L from the Cholesky factorization +*> of RFP A = U**H*U or RFP A = L*L**T, as computed by SPFTRF. +*> See note below for more details about RFP A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( 0: * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, STFSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPFTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* start execution: there are two triangular solves +* + IF( LOWER ) THEN + CALL STFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, ONE, A, B, + $ LDB ) + CALL STFSM( TRANSR, 'L', UPLO, 'T', 'N', N, NRHS, ONE, A, B, + $ LDB ) + ELSE + CALL STFSM( TRANSR, 'L', UPLO, 'T', 'N', N, NRHS, ONE, A, B, + $ LDB ) + CALL STFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, ONE, A, B, + $ LDB ) + END IF +* + RETURN +* +* End of SPFTRS +* + END diff --git a/dspl/liblapack/SRC/spocon.f b/dspl/liblapack/SRC/spocon.f new file mode 100644 index 0000000..c249c3e --- /dev/null +++ b/dspl/liblapack/SRC/spocon.f @@ -0,0 +1,253 @@ +*> \brief \b SPOCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite matrix using the +*> Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the symmetric matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL SLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A, + $ LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SPOCON +* + END diff --git a/dspl/liblapack/SRC/spoequ.f b/dspl/liblapack/SRC/spoequ.f new file mode 100644 index 0000000..f2de64d --- /dev/null +++ b/dspl/liblapack/SRC/spoequ.f @@ -0,0 +1,205 @@ +*> \brief \b SPOEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The N-by-N symmetric positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPOEQU +* + END diff --git a/dspl/liblapack/SRC/spoequb.f b/dspl/liblapack/SRC/spoequb.f new file mode 100644 index 0000000..e74eefa --- /dev/null +++ b/dspl/liblapack/SRC/spoequb.f @@ -0,0 +1,221 @@ +*> \brief \b SPOEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOEQUB computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> +*> This routine differs from SPOEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled diagonal entries are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The N-by-N symmetric positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I + REAL SMIN, BASE, TMP +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, LOG, INT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* +* Positive definite only performs 1 pass of equilibration. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF + + BASE = SLAMCH( 'B' ) + TMP = -0.5 / LOG ( BASE ) +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = BASE ** INT( TMP * LOG( S( I ) ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)). +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF +* + RETURN +* +* End of SPOEQUB +* + END diff --git a/dspl/liblapack/SRC/sporfs.f b/dspl/liblapack/SRC/sporfs.f new file mode 100644 index 0000000..32a69b0 --- /dev/null +++ b/dspl/liblapack/SRC/sporfs.f @@ -0,0 +1,430 @@ +*> \brief \b SPORFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPORFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, +* LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPORFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite, +*> and provides error bounds and backward error estimates for the +*> solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SPOTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, SPOTRS, SSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPORFS +* + END diff --git a/dspl/liblapack/SRC/sporfsx.f b/dspl/liblapack/SRC/sporfsx.f new file mode 100644 index 0000000..52fab69 --- /dev/null +++ b/dspl/liblapack/SRC/sporfsx.f @@ -0,0 +1,693 @@ +*> \brief \b SPORFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPORFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, +* LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPORFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive +*> definite, and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, + $ LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SPOCON, SLA_PORFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL SLAMCH, SLANSY, SLA_PORCOND + REAL SLAMCH, SLANSY, SLA_PORCOND + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N ) * SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME(UPLO, 'U') .AND. .NOT.LSAME(UPLO, 'L')) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPORFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = SLANSY( NORM, UPLO, N, A, LDA, WORK ) + CALL SPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + CALL SLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), WORK( 1 ), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) ) * SLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = SLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, + $ -1, S, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = SLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, + $ 0, S, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF (N_ERR_BNDS .GE. LA_LINRX_RCOND_I) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = SLA_PORCOND( UPLO, N, A, LDA, AF, LDAF, 1, + $ X( 1, J ), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of SPORFSX +* + END diff --git a/dspl/liblapack/SRC/sposv.f b/dspl/liblapack/SRC/sposv.f new file mode 100644 index 0000000..2a9565d --- /dev/null +++ b/dspl/liblapack/SRC/sposv.f @@ -0,0 +1,193 @@ +*> \brief SPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOsolve +* +* ===================================================================== + SUBROUTINE SPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL SPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPOSV +* + END diff --git a/dspl/liblapack/SRC/sposvx.f b/dspl/liblapack/SRC/sposvx.f new file mode 100644 index 0000000..6051068 --- /dev/null +++ b/dspl/liblapack/SRC/sposvx.f @@ -0,0 +1,494 @@ +*> \brief SPOSVX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), S( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. A and AF will not +*> be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and +*> EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored form +*> of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realPOsolve +* +* ===================================================================== + SUBROUTINE SPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), S( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLAQSY, SPOCON, SPOEQU, SPORFS, SPOTRF, + $ SPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T *U or A = L*L**T. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSY( '1', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SPOSVX +* + END diff --git a/dspl/liblapack/SRC/sposvxx.f b/dspl/liblapack/SRC/sposvxx.f new file mode 100644 index 0000000..3cdfa74 --- /dev/null +++ b/dspl/liblapack/SRC/sposvxx.f @@ -0,0 +1,686 @@ +*> \brief SPOSVXX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T +*> to compute the solution to a real system of linear equations +*> A * X = B, where A is an N-by-N symmetric positive definite matrix +*> and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. SPOSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> SPOSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> SPOSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what SPOSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A (see argument RCOND). If the reciprocal of the condition number +*> is less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A and AF are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = +*> 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper +*> triangular part of A contains the upper triangular part of the +*> matrix A, and the strictly lower triangular part of A is not +*> referenced. If UPLO = 'L', the leading N-by-N lower triangular +*> part of A contains the lower triangular part of the matrix A, and +*> the strictly upper triangular part of A is not referenced. A is +*> not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = +*> 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored +*> form of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realPOsolve +* +* ===================================================================== + SUBROUTINE SPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, SMIN, SMAX, + $ SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, SLA_PORPVGRW + LOGICAL LSAME + REAL SLAMCH, SLA_PORPVGRW +* .. +* .. External Subroutines .. + EXTERNAL SPOEQUB, SPOTRF, SPOTRS, SLACPY, SLAQSY, + $ XERBLA, SLASCL2, SPORFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in SPORFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until SPORFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPOEQUB( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL SLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization of A. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.NE.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = SLA_PORPVGRW( UPLO, INFO, A, LDA, AF, LDAF, WORK ) + RETURN + ENDIF + END IF +* +* Compute the reciprocal growth factor RPVGRW. +* + RPVGRW = SLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, WORK ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, INFO ) + +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL SLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of SPOSVXX +* + END diff --git a/dspl/liblapack/SRC/spotf2.f b/dspl/liblapack/SRC/spotf2.f new file mode 100644 index 0000000..4af4999 --- /dev/null +++ b/dspl/liblapack/SRC/spotf2.f @@ -0,0 +1,230 @@ +*> \brief \b SPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOTF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOTF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U , if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T *U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + REAL SDOT + EXTERNAL LSAME, SDOT, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T *U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - SDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL SSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = A( J, J ) - SDOT( J-1, A( J, 1 ), LDA, A( J, 1 ), + $ LDA ) + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL SSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of SPOTF2 +* + END diff --git a/dspl/liblapack/SRC/spotrf.f b/dspl/liblapack/SRC/spotrf.f new file mode 100644 index 0000000..968365c --- /dev/null +++ b/dspl/liblapack/SRC/spotrf.f @@ -0,0 +1,246 @@ +*> \brief \b SPOTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOTRF( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SPOTRF2, SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL SPOTRF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE, + $ A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL SPOTRF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL SGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1, + $ J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ), + $ LDA, ONE, A( J, J+JB ), LDA ) + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', + $ JB, N-J-JB+1, ONE, A( J, J ), LDA, + $ A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL SSYRK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL SPOTRF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ), + $ LDA, ONE, A( J+JB, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit', + $ N-J-JB+1, JB, ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of SPOTRF +* + END diff --git a/dspl/liblapack/SRC/spotrf2.f b/dspl/liblapack/SRC/spotrf2.f new file mode 100644 index 0000000..474bd39 --- /dev/null +++ b/dspl/liblapack/SRC/spotrf2.f @@ -0,0 +1,237 @@ +*> \brief \b SPOTRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE SPOTRF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOTRF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A using the recursive algorithm. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = n/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> The subroutine calls itself to factor A11. Update and scale A21 +*> or A12, update A22 then call itself to factor A22. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE SPOTRF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO=0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER N1, N2, IINFO +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + EXTERNAL LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SSYRK, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* N=1 case +* + IF( N.EQ.1 ) THEN +* +* Test for non-positive-definiteness +* + IF( A( 1, 1 ).LE.ZERO.OR.SISNAN( A( 1, 1 ) ) ) THEN + INFO = 1 + RETURN + END IF +* +* Factor +* + A( 1, 1 ) = SQRT( A( 1, 1 ) ) +* +* Use recursive code +* + ELSE + N1 = N/2 + N2 = N-N1 +* +* Factor A11 +* + CALL SPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U +* + IF( UPPER ) THEN +* +* Update and scale A12 +* + CALL STRSM( 'L', 'U', 'T', 'N', N1, N2, ONE, + $ A( 1, 1 ), LDA, A( 1, N1+1 ), LDA ) +* +* Update and factor A22 +* + CALL SSYRK( UPLO, 'T', N2, N1, -ONE, A( 1, N1+1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL SPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* +* Compute the Cholesky factorization A = L*L**T +* + ELSE +* +* Update and scale A21 +* + CALL STRSM( 'R', 'L', 'T', 'N', N2, N1, ONE, + $ A( 1, 1 ), LDA, A( N1+1, 1 ), LDA ) +* +* Update and factor A22 +* + CALL SSYRK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL SPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF + END IF + END IF + RETURN +* +* End of SPOTRF2 +* + END diff --git a/dspl/liblapack/SRC/spotri.f b/dspl/liblapack/SRC/spotri.f new file mode 100644 index 0000000..bbd504c --- /dev/null +++ b/dspl/liblapack/SRC/spotri.f @@ -0,0 +1,159 @@ +*> \brief \b SPOTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOTRI( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOTRI computes the inverse of a real symmetric positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by SPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, as computed by +*> SPOTRF. +*> On exit, the upper or lower triangle of the (symmetric) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAUUM, STRTRI, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U) * inv(U)**T or inv(L)**T * inv(L). +* + CALL SLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of SPOTRI +* + END diff --git a/dspl/liblapack/SRC/spotrs.f b/dspl/liblapack/SRC/spotrs.f new file mode 100644 index 0000000..7b21755 --- /dev/null +++ b/dspl/liblapack/SRC/spotrs.f @@ -0,0 +1,204 @@ +*> \brief \b SPOTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPOTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPOTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A using the Cholesky factorization +*> A = U**T*U or A = L*L**T computed by SPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPOcomputational +* +* ===================================================================== + SUBROUTINE SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T *U. +* +* Solve U**T *X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L**T. +* +* Solve L*X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of SPOTRS +* + END diff --git a/dspl/liblapack/SRC/sppcon.f b/dspl/liblapack/SRC/sppcon.f new file mode 100644 index 0000000..565b6ea --- /dev/null +++ b/dspl/liblapack/SRC/sppcon.f @@ -0,0 +1,248 @@ +*> \brief \b SPPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite packed matrix using +*> the Cholesky factorization A = U**T*U or A = L*L**T computed by +*> SPPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm (or infinity-norm) of the symmetric matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + REAL AINVNM, SCALE, SCALEL, SCALEU, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATPS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = SLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**T). +* + CALL SLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL SLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL SLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, WORK( 2*N+1 ), INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**T). +* + CALL SLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, WORK( 2*N+1 ), INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of SPPCON +* + END diff --git a/dspl/liblapack/SRC/sppequ.f b/dspl/liblapack/SRC/sppequ.f new file mode 100644 index 0000000..2b30d33 --- /dev/null +++ b/dspl/liblapack/SRC/sppequ.f @@ -0,0 +1,238 @@ +*> \brief \b SPPEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL AMAX, SCOND +* .. +* .. Array Arguments .. +* REAL AP( * ), S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPEQU computes row and column scalings intended to equilibrate a +*> symmetric positive definite matrix A in packed storage and reduce +*> its condition number (with respect to the two-norm). S contains the +*> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +*> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +*> This choice of S puts the condition number of B within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL AMAX, SCOND +* .. +* .. Array Arguments .. + REAL AP( * ), S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + REAL SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = AP( 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = AP( JJ ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of SPPEQU +* + END diff --git a/dspl/liblapack/SRC/spprfs.f b/dspl/liblapack/SRC/spprfs.f new file mode 100644 index 0000000..760620e --- /dev/null +++ b/dspl/liblapack/SRC/spprfs.f @@ -0,0 +1,421 @@ +*> \brief \b SPPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, +* BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is REAL array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by SPPTRF/CPPTRF, +*> packed columnwise in a linear array in the same format as A +*> (see AP). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SPPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, SPPTRS, SSPMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SPPRFS +* + END diff --git a/dspl/liblapack/SRC/sppsv.f b/dspl/liblapack/SRC/sppsv.f new file mode 100644 index 0000000..d92c25f --- /dev/null +++ b/dspl/liblapack/SRC/sppsv.f @@ -0,0 +1,205 @@ +*> \brief SPPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**T*U or A = L*L**T. +* + CALL SPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SPPSV +* + END diff --git a/dspl/liblapack/SRC/sppsvx.f b/dspl/liblapack/SRC/sppsvx.f new file mode 100644 index 0000000..a4ac149 --- /dev/null +++ b/dspl/liblapack/SRC/sppsvx.f @@ -0,0 +1,493 @@ +*> \brief SPPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, +* X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +*> compute the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFP contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AP and AFP will not +*> be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array, except if FACT = 'F' +*> and EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). The j-th column of A is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is REAL array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AFP is the factored +*> form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T * U or A = L * L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T * U or A = L * L**T of the equilibrated +*> matrix A (see the description of AP for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), S( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + REAL AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SLAQSP, SPPCON, SPPEQU, SPPRFS, + $ SPPTRF, SPPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**T * U or A = L * L**T. +* + CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL SPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, IWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SPPSVX +* + END diff --git a/dspl/liblapack/SRC/spptrf.f b/dspl/liblapack/SRC/spptrf.f new file mode 100644 index 0000000..1134a06 --- /dev/null +++ b/dspl/liblapack/SRC/spptrf.f @@ -0,0 +1,240 @@ +*> \brief \b SPPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPTRF( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPTRF computes the Cholesky factorization of a real symmetric +*> positive definite matrix A stored in packed format. +*> +*> The factorization has the form +*> A = U**T * U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**T*U or A = L*L**T, in the same +*> storage format as A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**T*U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL STPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP, + $ AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) - SDOT( J-1, AP( JC ), 1, AP( JC ), 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**T. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = AP( JJ ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL SSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL SSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of SPPTRF +* + END diff --git a/dspl/liblapack/SRC/spptri.f b/dspl/liblapack/SRC/spptri.f new file mode 100644 index 0000000..9824078 --- /dev/null +++ b/dspl/liblapack/SRC/spptri.f @@ -0,0 +1,188 @@ +*> \brief \b SPPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPTRI( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPTRI computes the inverse of a real symmetric positive definite +*> matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +*> computed by SPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor is stored in AP; +*> = 'L': Lower triangular factor is stored in AP. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, packed columnwise as +*> a linear array. The j-th column of U or L is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> +*> On exit, the upper or lower triangle of the (symmetric) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, STPMV, STPTRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL STPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)**T. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL SSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL SSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)**T * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = SDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) + IF( J.LT.N ) + $ CALL STPMV( 'Lower', 'Transpose', 'Non-unit', N-J, + $ AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of SPPTRI +* + END diff --git a/dspl/liblapack/SRC/spptrs.f b/dspl/liblapack/SRC/spptrs.f new file mode 100644 index 0000000..7eb48ee --- /dev/null +++ b/dspl/liblapack/SRC/spptrs.f @@ -0,0 +1,203 @@ +*> \brief \b SPPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPPTRS solves a system of linear equations A*X = B with a symmetric +*> positive definite matrix A in packed storage using the Cholesky +*> factorization A = U**T*U or A = L*L**T computed by SPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**T * U. +* + DO 10 I = 1, NRHS +* +* Solve U**T *X = B, overwriting B with X. +* + CALL STPSV( 'Upper', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL STPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L * L**T. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL STPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L**T *X = Y, overwriting B with X. +* + CALL STPSV( 'Lower', 'Transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of SPPTRS +* + END diff --git a/dspl/liblapack/SRC/spstf2.f b/dspl/liblapack/SRC/spstf2.f new file mode 100644 index 0000000..4d15065 --- /dev/null +++ b/dspl/liblapack/SRC/spstf2.f @@ -0,0 +1,386 @@ +*> \brief \b SPSTF2 computes the Cholesky factorization with complete pivoting of a real symmetric positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPSTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* REAL TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPSTF2 computes the Cholesky factorization with complete +*> pivoting of a real symmetric positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**T * U , if UPLO = 'U', +*> P**T * A * P = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL AJJ, SSTOP, STEMP + INTEGER I, ITEMP, J, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME, SISNAN + EXTERNAL SLAMCH, LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPSTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + PVT = 1 + AJJ = A( PVT, PVT ) + DO I = 2, N + IF( A( I, I ).GT.AJJ ) THEN + PVT = I + AJJ = A( PVT, PVT ) + END IF + END DO + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 170 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + SSTOP = N * SLAMCH( 'Epsilon' ) * AJJ + ELSE + SSTOP = TOL + END IF +* +* Set first half of WORK to zero, holds dot products +* + DO 110 I = 1, N + WORK( I ) = 0 + 110 CONTINUE +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**T * U +* + DO 130 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 120 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + A( J-1, I )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 120 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 160 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL SSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL SSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + CALL SSWAP( PVT-J-1, A( J, J+1 ), LDA, A( J+1, PVT ), 1 ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J +* + IF( J.LT.N ) THEN + CALL SGEMV( 'Trans', J-1, N-J, -ONE, A( 1, J+1 ), LDA, + $ A( 1, J ), 1, ONE, A( J, J+1 ), LDA ) + CALL SSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 130 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**T +* + DO 150 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 140 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + A( I, J-1 )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 140 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 160 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL SSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL SSWAP( N-PVT, A( PVT+1, J ), 1, A( PVT+1, PVT ), + $ 1 ) + CALL SSWAP( PVT-J-1, A( J+1, J ), 1, A( PVT, J+1 ), LDA ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J +* + IF( J.LT.N ) THEN + CALL SGEMV( 'No Trans', N-J, J-1, -ONE, A( J+1, 1 ), LDA, + $ A( J, 1 ), LDA, ONE, A( J+1, J ), 1 ) + CALL SSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 150 CONTINUE +* + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 170 + 160 CONTINUE +* +* Rank is number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 170 CONTINUE + RETURN +* +* End of SPSTF2 +* + END diff --git a/dspl/liblapack/SRC/spstrf.f b/dspl/liblapack/SRC/spstrf.f new file mode 100644 index 0000000..f9bc4de --- /dev/null +++ b/dspl/liblapack/SRC/spstrf.f @@ -0,0 +1,444 @@ +*> \brief \b SPSTRF computes the Cholesky factorization with complete pivoting of a real symmetric positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPSTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* REAL TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPSTRF computes the Cholesky factorization with complete +*> pivoting of a real symmetric positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**T * U , if UPLO = 'U', +*> P**T * A * P = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is REAL +*> User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + REAL TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A( LDA, * ), WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + REAL AJJ, SSTOP, STEMP + INTEGER I, ITEMP, J, JB, K, NB, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + REAL SLAMCH + INTEGER ILAENV + LOGICAL LSAME, SISNAN + EXTERNAL SLAMCH, ILAENV, LSAME, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SPSTF2, SSCAL, SSWAP, SSYRK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPSTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get block size +* + NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SPSTF2( UPLO, N, A( 1, 1 ), LDA, PIV, RANK, TOL, WORK, + $ INFO ) + GO TO 200 +* + ELSE +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + PVT = 1 + AJJ = A( PVT, PVT ) + DO I = 2, N + IF( A( I, I ).GT.AJJ ) THEN + PVT = I + AJJ = A( PVT, PVT ) + END IF + END DO + IF( AJJ.LE.ZERO.OR.SISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 200 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + SSTOP = N * SLAMCH( 'Epsilon' ) * AJJ + ELSE + SSTOP = TOL + END IF +* +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**T * U +* + DO 140 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 110 I = K, N + WORK( I ) = 0 + 110 CONTINUE +* + DO 130 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 120 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + A( J-1, I )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 120 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL SSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL SSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + CALL SSWAP( PVT-J-1, A( J, J+1 ), LDA, + $ A( J+1, PVT ), 1 ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'Trans', J-K, N-J, -ONE, A( K, J+1 ), + $ LDA, A( K, J ), 1, ONE, A( J, J+1 ), + $ LDA ) + CALL SSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 130 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL SSYRK( 'Upper', 'Trans', N-J+1, JB, -ONE, + $ A( K, J ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 140 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**T +* + DO 180 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 150 I = K, N + WORK( I ) = 0 + 150 CONTINUE +* + DO 170 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 160 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + A( I, J-1 )**2 + END IF + WORK( N+I ) = A( I, I ) - WORK( I ) +* + 160 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.SSTOP.OR.SISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL SSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL SSWAP( N-PVT, A( PVT+1, J ), 1, + $ A( PVT+1, PVT ), 1 ) + CALL SSWAP( PVT-J-1, A( J+1, J ), 1, A( PVT, J+1 ), + $ LDA ) +* +* Swap dot products and PIV +* + STEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = STEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL SGEMV( 'No Trans', N-J, J-K, -ONE, + $ A( J+1, K ), LDA, A( J, K ), LDA, ONE, + $ A( J+1, J ), 1 ) + CALL SSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 170 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL SSYRK( 'Lower', 'No Trans', N-J+1, JB, -ONE, + $ A( J, K ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 180 CONTINUE +* + END IF + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 200 + 190 CONTINUE +* +* Rank is the number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 200 CONTINUE + RETURN +* +* End of SPSTRF +* + END diff --git a/dspl/liblapack/SRC/sptcon.f b/dspl/liblapack/SRC/sptcon.f new file mode 100644 index 0000000..fd11c1d --- /dev/null +++ b/dspl/liblapack/SRC/sptcon.f @@ -0,0 +1,221 @@ +*> \brief \b SPTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTCON computes the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric positive definite tridiagonal matrix +*> using the factorization A = L*D*L**T or A = U**T*D*U computed by +*> SPTTRF. +*> +*> Norm(inv(A)) is computed by a direct method, and the reciprocal of +*> the condition number is computed as +*> RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization of A, as computed by SPTTRF. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal factor +*> U or L from the factorization of A, as computed by SPTTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +*> 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The method used is described in Nicholas J. Higham, "Efficient +*> Algorithms for Computing the Condition Number of a Tridiagonal +*> Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SPTCON( N, D, E, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + REAL AINVNM +* .. +* .. External Functions .. + INTEGER ISAMAX + EXTERNAL ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**T. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 20 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)**T * x = b. +* + WORK( N ) = WORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, WORK, 1 ) + AINVNM = ABS( WORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SPTCON +* + END diff --git a/dspl/liblapack/SRC/spteqr.f b/dspl/liblapack/SRC/spteqr.f new file mode 100644 index 0000000..be35475 --- /dev/null +++ b/dspl/liblapack/SRC/spteqr.f @@ -0,0 +1,261 @@ +*> \brief \b SPTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric positive definite tridiagonal matrix by first factoring the +*> matrix using SPTTRF, and then calling SBDSQR to compute the singular +*> values of the bidiagonal factor. +*> +*> This routine computes the eigenvalues of the positive definite +*> tridiagonal matrix to high relative accuracy. This means that if the +*> eigenvalues range over many orders of magnitude in size, then the +*> small eigenvalues and corresponding eigenvectors will be computed +*> more accurately than, for example, with the standard QR method. +*> +*> The eigenvectors of a full or band symmetric positive definite matrix +*> can also be found if SSYTRD, SSPTRD, or SSBTRD has been used to +*> reduce this matrix to tridiagonal form. (The reduction to tridiagonal +*> form, however, may preclude the possibility of obtaining high +*> relative accuracy in the small eigenvalues of the original matrix, if +*> these eigenvalues range over many orders of magnitude.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvectors of original symmetric +*> matrix also. Array Z contains the orthogonal +*> matrix used to reduce the original matrix to +*> tridiagonal form. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal +*> matrix. +*> On normal exit, D contains the eigenvalues, in descending +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the orthogonal matrix used in the +*> reduction to tridiagonal form. +*> On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +*> original symmetric matrix; +*> if COMPZ = 'I', the orthonormal eigenvectors of the +*> tridiagonal matrix. +*> If INFO > 0 on exit, Z contains the eigenvectors associated +*> with only the stored eigenvalues. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> COMPZ = 'V' or 'I', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is: +*> <= N the Cholesky factorization of the matrix could +*> not be performed because the i-th principal minor +*> was not positive definite. +*> > N the SVD algorithm failed to converge; +*> if INFO = N+i, i off-diagonal elements of the +*> bidiagonal factor did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTcomputational +* +* ===================================================================== + SUBROUTINE SPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SBDSQR, SLASET, SPTTRF, XERBLA +* .. +* .. Local Arrays .. + REAL C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* +* Call SPTTRF to factor the matrix. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call SBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL SBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of SPTEQR +* + END diff --git a/dspl/liblapack/SRC/sptrfs.f b/dspl/liblapack/SRC/sptrfs.f new file mode 100644 index 0000000..c7789c8 --- /dev/null +++ b/dspl/liblapack/SRC/sptrfs.f @@ -0,0 +1,395 @@ +*> \brief \b SPTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, +* BERR, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ E( * ), EF( * ), FERR( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive definite +*> and tridiagonal, and provides error bounds and backward error +*> estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization computed by SPTTRF. +*> \endverbatim +*> +*> \param[in] EF +*> \verbatim +*> EF is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the factorization computed by SPTTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SPTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTcomputational +* +* ===================================================================== + SUBROUTINE SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER COUNT, I, IX, J, NZ + REAL BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2, + $ SAFMIN +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL ISAMAX, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 90 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( N+1 ) = BI - DX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( N+1 ) = BI - DX - EX + WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( N+I ) = BI - CX - DX - EX + WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX ) + 30 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N+N ) = BI - CX - DX + WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 40 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 40 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 50 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 50 CONTINUE + IX = ISAMAX( N, WORK, 1 ) + FERR( J ) = WORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**T. +* +* Solve M(L) * x = e. +* + WORK( 1 ) = ONE + DO 60 I = 2, N + WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) ) + 60 CONTINUE +* +* Solve D * M(L)**T * x = b. +* + WORK( N ) = WORK( N ) / DF( N ) + DO 70 I = N - 1, 1, -1 + WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) ) + 70 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = ISAMAX( N, WORK, 1 ) + FERR( J ) = FERR( J )*ABS( WORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 80 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 80 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 90 CONTINUE +* + RETURN +* +* End of SPTRFS +* + END diff --git a/dspl/liblapack/SRC/sptsv.f b/dspl/liblapack/SRC/sptsv.f new file mode 100644 index 0000000..657548e --- /dev/null +++ b/dspl/liblapack/SRC/sptsv.f @@ -0,0 +1,167 @@ +*> \brief SPTSV computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTSV computes the solution to a real system of linear equations +*> A*X = B, where A is an N-by-N symmetric positive definite tridiagonal +*> matrix, and X and B are N-by-NRHS matrices. +*> +*> A is factored as A = L*D*L**T, and the factored form of A is then +*> used to solve the system of equations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the factorization A = L*D*L**T. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**T factorization of +*> A. (E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**T*D*U factorization of A.) +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the solution has not been +*> computed. The factorization has not been completed +*> unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTsolve +* +* ===================================================================== + SUBROUTINE SPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL SPTTRF, SPTTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + CALL SPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SPTTRS( N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of SPTSV +* + END diff --git a/dspl/liblapack/SRC/sptsvx.f b/dspl/liblapack/SRC/sptsvx.f new file mode 100644 index 0000000..c293e54 --- /dev/null +++ b/dspl/liblapack/SRC/sptsvx.f @@ -0,0 +1,336 @@ +*> \brief SPTSVX computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), +* $ E( * ), EF( * ), FERR( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTSVX uses the factorization A = L*D*L**T to compute the solution +*> to a real system of linear equations A*X = B, where A is an N-by-N +*> symmetric positive definite tridiagonal matrix and X and B are +*> N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L +*> is a unit lower bidiagonal matrix and D is diagonal. The +*> factorization can also be regarded as having the form +*> A = U**T*D*U. +*> +*> 2. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, DF and EF contain the factored form of A. +*> D, E, DF, and EF will not be modified. +*> = 'N': The matrix A will be copied to DF and EF and +*> factored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is REAL array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**T factorization of A. +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in,out] EF +*> \verbatim +*> EF is REAL array, dimension (N-1) +*> If FACT = 'F', then EF is an input argument and on entry +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**T factorization of A. +*> If FACT = 'N', then EF is an output argument and on exit +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal condition number of the matrix A. If RCOND +*> is less than the machine precision (in particular, if +*> RCOND = 0), the matrix is singular to working precision. +*> This condition is indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in any +*> element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTsolve +* +* ===================================================================== + SUBROUTINE SPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + REAL B( LDB, * ), BERR( * ), D( * ), DF( * ), + $ E( * ), EF( * ), FERR( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SPTCON, SPTRFS, SPTTRF, SPTTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + CALL SCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL SCOPY( N-1, E, 1, EF, 1 ) + CALL SPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANST( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SPTTRS( N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR, + $ WORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SPTSVX +* + END diff --git a/dspl/liblapack/SRC/spttrf.f b/dspl/liblapack/SRC/spttrf.f new file mode 100644 index 0000000..a3b4c1b --- /dev/null +++ b/dspl/liblapack/SRC/spttrf.f @@ -0,0 +1,211 @@ +*> \brief \b SPTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTTRF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTTRF computes the L*D*L**T factorization of a real symmetric +*> positive definite tridiagonal matrix A. The factorization may also +*> be regarded as having the form A = U**T*D*U. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**T factorization of A. +*> E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**T*D*U factorization of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite; if k < N, the factorization could not +*> be completed, while if k = N, the factorization was +*> completed, but D(N) <= 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SPTTRF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + REAL EI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L**T (or U**T*D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EI = E( I ) + E( I ) = EI / D( I ) + D( I+1 ) = D( I+1 ) - E( I )*EI +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EI = E( I+1 ) + E( I+1 ) = EI / D( I+1 ) + D( I+2 ) = D( I+2 ) - E( I+1 )*EI +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EI = E( I+2 ) + E( I+2 ) = EI / D( I+2 ) + D( I+3 ) = D( I+3 ) - E( I+2 )*EI +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EI = E( I+3 ) + E( I+3 ) = EI / D( I+3 ) + D( I+4 ) = D( I+4 ) - E( I+3 )*EI + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of SPTTRF +* + END diff --git a/dspl/liblapack/SRC/spttrs.f b/dspl/liblapack/SRC/spttrs.f new file mode 100644 index 0000000..2a7f475 --- /dev/null +++ b/dspl/liblapack/SRC/spttrs.f @@ -0,0 +1,182 @@ +*> \brief \b SPTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTTRS solves a tridiagonal system of the form +*> A * X = B +*> using the L*D*L**T factorization of A computed by SPTTRF. D is a +*> diagonal matrix specified in the vector D, L is a unit bidiagonal +*> matrix whose subdiagonal is specified in the vector E, and X and B +*> are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the L*D*L**T factorization of A. E can also be regarded +*> as the superdiagonal of the unit bidiagonal factor U from the +*> factorization A = U**T*D*U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTcomputational +* +* ===================================================================== + SUBROUTINE SPTTRS( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SPTTS2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'SPTTRS', ' ', N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL SPTTS2( N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL SPTTS2( N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of SPTTRS +* + END diff --git a/dspl/liblapack/SRC/sptts2.f b/dspl/liblapack/SRC/sptts2.f new file mode 100644 index 0000000..ffd73b1 --- /dev/null +++ b/dspl/liblapack/SRC/sptts2.f @@ -0,0 +1,158 @@ +*> \brief \b SPTTS2 solves a tridiagonal system of the form AX=B using the L D LH factorization computed by spttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SPTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SPTTS2( N, NRHS, D, E, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SPTTS2 solves a tridiagonal system of the form +*> A * X = B +*> using the L*D*L**T factorization of A computed by SPTTRF. D is a +*> diagonal matrix specified in the vector D, L is a unit bidiagonal +*> matrix whose subdiagonal is specified in the vector E, and X and B +*> are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> L*D*L**T factorization of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal factor +*> L from the L*D*L**T factorization of A. E can also be regarded +*> as the superdiagonal of the unit bidiagonal factor U from the +*> factorization A = U**T*D*U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realPTcomputational +* +* ===================================================================== + SUBROUTINE SPTTS2( N, NRHS, D, E, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL B( LDB, * ), D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL SSCAL +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL SSCAL( NRHS, 1. / D( 1 ), B, LDB ) + RETURN + END IF +* +* Solve A * X = B using the factorization A = L*D*L**T, +* overwriting each right hand side vector with its solution. +* + DO 30 J = 1, NRHS +* +* Solve L * x = b. +* + DO 10 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 10 CONTINUE +* +* Solve D * L**T * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 20 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 20 CONTINUE + 30 CONTINUE +* + RETURN +* +* End of SPTTS2 +* + END diff --git a/dspl/liblapack/SRC/srscl.f b/dspl/liblapack/SRC/srscl.f new file mode 100644 index 0000000..b5168dd --- /dev/null +++ b/dspl/liblapack/SRC/srscl.f @@ -0,0 +1,174 @@ +*> \brief \b SRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SRSCL( N, SA, SX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* REAL SA +* .. +* .. Array Arguments .. +* REAL SX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SRSCL multiplies an n-element real vector x by the real scalar 1/a. +*> This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is REAL +*> The scalar a which is used to divide each component of x. +*> SA must be >= 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is REAL array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector SX. +*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +* ===================================================================== + SUBROUTINE SRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + REAL SA +* .. +* .. Array Arguments .. + REAL SX( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + REAL BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL SSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of SRSCL +* + END diff --git a/dspl/liblapack/SRC/ssb2st_kernels.f b/dspl/liblapack/SRC/ssb2st_kernels.f new file mode 100644 index 0000000..54479f8 --- /dev/null +++ b/dspl/liblapack/SRC/ssb2st_kernels.f @@ -0,0 +1,380 @@ +*> \brief \b SSB2ST_KERNELS +* +* @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec 7 08:22:40 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSB2ST_KERNELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim +*> +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim +*> +*> \param[in] ST +*> \verbatim +*> ST is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] ED +*> \verbatim +*> ED is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is REAL array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is REAL array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is REAL array. Workspace of size nb. +*> \endverbatim +*> @param[in] n +*> The order of the matrix A. +*> +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + REAL A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + REAL CTMP +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SLARFX, SLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = ( A( OFDPOS, ST ) ) + CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + LM = ED - ST + 1 + CALL SLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.3 ) THEN +* + LM = ED - ST + 1 + CALL SLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL SLARFX( 'Left', LN, LM, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS-NB, J1 ), LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = + $ ( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = ( A( DPOS-NB, J1 ) ) + CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL SLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + ENDIF +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL SLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + LM = ED - ST + 1 +* + CALL SLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.3 ) THEN + LM = ED - ST + 1 +* + CALL SLARFY( UPLO, LM, V( VPOS ), 1, + $ ( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL SLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL SLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL SLARFX( 'Left', LM, LN-1, V( VPOS ), + $ ( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + ENDIF + ENDIF +* + RETURN +* +* END OF SSB2ST_KERNELS +* + END diff --git a/dspl/liblapack/SRC/ssbev.f b/dspl/liblapack/SRC/ssbev.f new file mode 100644 index 0000000..78fa2cc --- /dev/null +++ b/dspl/liblapack/SRC/ssbev.f @@ -0,0 +1,287 @@ +*> \brief SSBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEV computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SSBTRD, SSCAL, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of SSBEV +* + END diff --git a/dspl/liblapack/SRC/ssbev_2stage.f b/dspl/liblapack/SRC/ssbev_2stage.f new file mode 100644 index 0000000..542fa8d --- /dev/null +++ b/dspl/liblapack/SRC/ssbev_2stage.f @@ -0,0 +1,380 @@ +*> \brief SSBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from dsbev_2stage.f, fortran d -> s, Sat Nov 5 23:58:09 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SSCAL, SSTEQR, SSTERF, XERBLA, + $ SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSBEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssbevd.f b/dspl/liblapack/SRC/ssbevd.f new file mode 100644 index 0000000..21fd78e --- /dev/null +++ b/dspl/liblapack/SRC/ssbevd.f @@ -0,0 +1,360 @@ +*> \brief SSBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> IF N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 2, LWORK must be at least 2*N. +*> If JOBZ = 'V' and N > 2, LWORK must be at least +*> ( 1 + 5*N + 2*N**2 ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWRK2, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLASCL, SSBTRD, SSCAL, SSTEDC, + $ SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSBEVD +* + END diff --git a/dspl/liblapack/SRC/ssbevd_2stage.f b/dspl/liblapack/SRC/ssbevd_2stage.f new file mode 100644 index 0000000..84add84 --- /dev/null +++ b/dspl/liblapack/SRC/ssbevd_2stage.f @@ -0,0 +1,412 @@ +*> \brief SSBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from dsbevd_2stage.f, fortran d -> s, Sat Nov 5 23:58:03 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses +*> a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 2, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ LLWRK2 + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLASCL, SSCAL, SSTEDC, + $ SSTERF, XERBLA, SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = MAX( 2*N, N+LHTRD+LWTRD ) + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce band symmetric matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSBEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssbevx.f b/dspl/liblapack/SRC/ssbevx.f new file mode 100644 index 0000000..7eccf61 --- /dev/null +++ b/dspl/liblapack/SRC/ssbevx.f @@ -0,0 +1,543 @@ +*> \brief SSBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, +* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSBTRD, SSCAL, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSBTRD to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of SSBEVX +* + END diff --git a/dspl/liblapack/SRC/ssbevx_2stage.f b/dspl/liblapack/SRC/ssbevx_2stage.f new file mode 100644 index 0000000..5962e78 --- /dev/null +++ b/dspl/liblapack/SRC/ssbevx_2stage.f @@ -0,0 +1,636 @@ +*> \brief SSBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @generated from dsbevx_2stage.f, fortran d -> s, Sat Nov 5 23:58:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, +* LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N orthogonal matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 7*N, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS + 2*N +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, + $ LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSB + EXTERNAL LSAME, SLAMCH, SLANSB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SLASCL, SSCAL, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA, + $ SSYTRD_SB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_SB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBEVX_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + TMP1 = AB( 1, 1 ) + ELSE + TMP1 = AB( KD+1, 1 ) + END IF + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = TMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD_SB2ST to reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDHOUS = INDE + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_SB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSBEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssbgst.f b/dspl/liblapack/SRC/ssbgst.f new file mode 100644 index 0000000..cf5d512 --- /dev/null +++ b/dspl/liblapack/SRC/ssbgst.f @@ -0,0 +1,1434 @@ +*> \brief \b SSBGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, +* LDX, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBGST reduces a real symmetric-definite banded generalized +*> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +*> such that C has the same bandwidth as A. +*> +*> B must have been previously factorized as S**T*S by SPBSTF, using a +*> split Cholesky factorization. A is overwritten by C = X**T*A*X, where +*> X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the +*> bandwidth of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form the transformation matrix X; +*> = 'V': form X. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the transformed matrix X**T*A*X, stored in the same +*> format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in] BB +*> \verbatim +*> BB is REAL array, dimension (LDBB,N) +*> The banded factor S from the split Cholesky factorization of +*> B, as returned by SPBSTF, stored in the first KB+1 rows of +*> the array. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,N) +*> If VECT = 'V', the n-by-n matrix X. +*> If VECT = 'N', the array X is not referenced. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. +*> LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + REAL BII, RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGER, SLAR2V, SLARGV, SLARTG, SLARTV, SLASET, + $ SROT, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in SPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**T*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The cosines and sines of the rotations are stored in the array +* WORK. The cosines of the 1st set of rotations are stored in +* elements n+2:n+m-kb-1 and the sines of the 1st set in elements +* 2:m-kb-1; the cosines of the 2nd set are stored in elements +* n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 20 J = I, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )*AB( K-I+KA1, I ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + + $ AB( KA1, I )*BB( J-I+KB1, I )* + $ BB( K-I+KB1, I ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( K-I+KB1, I )*AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL SLARTG( AB( K+1, I-K+KA ), RA1, + $ WORK( N+I-K+KA-M ), WORK( I-K+KA-M ), + $ RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, + $ WORK( N+J2-M ), WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 250 J = I, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*AB( I-K+1, K ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + + $ AB( 1, I )*BB( I-J+1, J )* + $ BB( I-K+1, K ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-K+1, K )*AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL SLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = WORK( N+I-K+KA-M )*T - + $ WORK( I-K+KA-M )*AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ WORK( N+I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, WORK( N+J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ WORK( N+J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + WORK( N+J ) = WORK( N+J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ WORK( N+J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, WORK( N+J2 ), WORK( J2 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, I - KB + 2*KA + 1, -1 + WORK( N+J-M ) = WORK( N+J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( KB1, I ) + DO 500 J = I1, I + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )*AB( I-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + + $ AB( KA1, I )*BB( I-J+KB1, J )* + $ BB( I-K+KB1, K ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-K+KB1, K )*AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ), + $ LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL SLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, + $ WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, WORK( N+J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**T * A * inv(S(i)) +* + BII = BB( 1, I ) + DO 730 J = I1, I + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*AB( K-I+1, I ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + + $ AB( 1, I )*BB( J-I+1, I )* + $ BB( K-I+1, I ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( K-I+1, I )*AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1, + $ X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL SLARTG( AB( KA1-K, I+K-KA ), RA1, + $ WORK( N+I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T - + $ WORK( I+K-KA )*AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ WORK( N+I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL SLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ WORK( N+J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+J1 ), + $ WORK( J1 ), KA1 ) +* + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+J ), WORK( J ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ WORK( N+M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL SLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, WORK( N+M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ WORK( N+M-KB+J ), WORK( M-KB+J ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ WORK( N+J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1 + WORK( N+J ) = WORK( N+J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of SSBGST +* + END diff --git a/dspl/liblapack/SRC/ssbgv.f b/dspl/liblapack/SRC/ssbgv.f new file mode 100644 index 0000000..078fb2a --- /dev/null +++ b/dspl/liblapack/SRC/ssbgv.f @@ -0,0 +1,280 @@ +*> \brief \b SSBGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, +* LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +*> and banded, and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is REAL array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by SPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then SPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPBSTF, SSBGST, SSBTRD, SSTEQR, SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ INFO ) + END IF + RETURN +* +* End of SSBGV +* + END diff --git a/dspl/liblapack/SRC/ssbgvd.f b/dspl/liblapack/SRC/ssbgvd.f new file mode 100644 index 0000000..0b58760 --- /dev/null +++ b/dspl/liblapack/SRC/ssbgvd.f @@ -0,0 +1,372 @@ +*> \brief \b SSBGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, +* Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of the +*> form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric and +*> banded, and B is also positive definite. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is REAL array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by SPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 3*N. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then SPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), BB( LDBB, * ), W( * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2, + $ LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SPBSTF, SSBGST, SSBTRD, SSTEDC, + $ SSTERF, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 5*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N, + $ ZERO, WORK( INDWK2 ), N ) + CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSBGVD +* + END diff --git a/dspl/liblapack/SRC/ssbgvx.f b/dspl/liblapack/SRC/ssbgvx.f new file mode 100644 index 0000000..3408810 --- /dev/null +++ b/dspl/liblapack/SRC/ssbgvx.f @@ -0,0 +1,522 @@ +*> \brief \b SSBGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, +* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, +* $ N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), +* $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric +*> and banded, and B is also positive definite. Eigenvalues and +*> eigenvectors can be selected by specifying either all eigenvalues, +*> a range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is REAL array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**T*S, as returned by SPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ, N) +*> If JOBZ = 'V', the n-by-n matrix used in the reduction of +*> A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +*> and consequently C to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'N', +*> LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so Z**T*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvalues that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit +*> < 0 : if INFO = -i, the i-th argument had an illegal value +*> <= N: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in IFAIL. +*> > N : SPBSTF returned an error code; i.e., +*> if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT + REAL TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SGEMV, SLACPY, SPBSTF, SSBGST, SSBTRD, + $ SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( WANTZ .AND. LDQ.LT.N ) ) THEN + INFO = -12 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -14 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -16 + END IF + END IF + END IF + IF( INFO.EQ.0) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, IINFO ) +* +* Reduce symmetric band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDWRK = INDE + N + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ), + $ WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SSTEQR. If this fails for some +* eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, +* call SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply transformation matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + DO 20 J = 1, M + CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of SSBGVX +* + END diff --git a/dspl/liblapack/SRC/ssbtrd.f b/dspl/liblapack/SRC/ssbtrd.f new file mode 100644 index 0000000..1a016f6 --- /dev/null +++ b/dspl/liblapack/SRC/ssbtrd.f @@ -0,0 +1,641 @@ +*> \brief \b SSBTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSBTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSBTRD reduces a real symmetric band matrix A to symmetric +*> tridiagonal form T by an orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form Q; +*> = 'V': form Q; +*> = 'U': update a matrix X, by forming X*Q. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if VECT = 'U', then Q must contain an N-by-N +*> matrix X; if VECT = 'N' or 'V', then Q need not be set. +*> +*> On exit: +*> if VECT = 'V', Q contains the N-by-N orthogonal matrix Q; +*> if VECT = 'U', Q contains the product X*Q; +*> if VECT = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by Linda Kaufman, Bell Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + REAL TEMP +* .. +* .. External Subroutines .. + EXTERNAL SLAR2V, SLARGV, SLARTG, SLARTV, SLASET, SROT, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The cosines and sines of the plane rotations are stored in the +* arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL SLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL SLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL SROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL SLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL SROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL SLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL SROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL SROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 100 I = 1, N - 1 + E( I ) = AB( KD, I+1 ) + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to tridiagonal form, working with lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL SLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL SLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL SROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL SLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL SROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL SLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* SLARTV or SROT is used +* + IF( NR.GT.0 ) THEN + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL SLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL SROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL SROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* copy off-diagonal elements to E +* + DO 220 I = 1, N - 1 + E( I ) = AB( 2, I ) + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of SSBTRD +* + END diff --git a/dspl/liblapack/SRC/ssfrk.f b/dspl/liblapack/SRC/ssfrk.f new file mode 100644 index 0000000..6dc50fe --- /dev/null +++ b/dspl/liblapack/SRC/ssfrk.f @@ -0,0 +1,543 @@ +*> \brief \b SSFRK performs a symmetric rank-k operation for matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSFRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, +* C ) +* +* .. Scalar Arguments .. +* REAL ALPHA, BETA +* INTEGER K, LDA, N +* CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for C in RFP Format. +*> +*> SSFRK performs one of the symmetric rank--k operations +*> +*> C := alpha*A*A**T + beta*C, +*> +*> or +*> +*> C := alpha*A**T*A + beta*C, +*> +*> where alpha and beta are real scalars, C is an n--by--n symmetric +*> matrix and A is an n--by--k matrix in the first case and a k--by--n +*> matrix in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'T': The Transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*A**T + beta*C. +*> +*> TRANS = 'T' or 't' C := alpha*A**T*A + beta*C. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrix A, and on entry with TRANS = 'T' +*> or 't', K specifies the number of rows of the matrix A. K +*> must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,ka) +*> where KA +*> is K when TRANS = 'N' or 'n', and is N otherwise. Before +*> entry with TRANS = 'N' or 'n', the leading N--by--K part of +*> the array A must contain the matrix A, otherwise the leading +*> K--by--N part of the array A must contain the matrix A. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is REAL +*> On entry, BETA specifies the scalar beta. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (NT) +*> NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP +*> Format. RFP Format is described by TRANSR, UPLO and N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, + $ C ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + REAL ALPHA, BETA + INTEGER K, LDA, N + CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. + REAL A( LDA, * ), C( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR, NISODD, NOTRANS + INTEGER INFO, NROWA, J, NK, N1, N2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SSYRK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) +* + IF( NOTRANS ) THEN + NROWA = N + ELSE + NROWA = K + END IF +* + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSFRK ', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* +* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not +* done (it is in SSYRK for example) and left in the general case. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* + IF( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ZERO ) ) THEN + DO J = 1, ( ( N*( N+1 ) ) / 2 ) + C( J ) = ZERO + END DO + RETURN + END IF +* +* C is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and NK. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + NK = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL SSYRK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL SSYRK( 'U', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL SGEMM( 'N', 'T', N2, N1, K, ALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( N1+1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' +* + CALL SSYRK( 'L', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL SSYRK( 'U', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL SGEMM( 'T', 'N', N2, N1, K, ALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( N1+1 ), N ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL SSYRK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL SSYRK( 'U', 'N', N2, K, ALPHA, A( N2, 1 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL SGEMM( 'N', 'T', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( N2, 1 ), LDA, BETA, C( 1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' +* + CALL SSYRK( 'L', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL SSYRK( 'U', 'T', N2, K, ALPHA, A( 1, N2 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL SGEMM( 'T', 'N', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, N2 ), LDA, BETA, C( 1 ), N ) +* + END IF +* + END IF +* + ELSE +* +* N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' +* + CALL SSYRK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL SSYRK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL SGEMM( 'N', 'T', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( N1+1, 1 ), LDA, BETA, + $ C( N1*N1+1 ), N1 ) +* + ELSE +* +* N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' +* + CALL SSYRK( 'U', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL SSYRK( 'L', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL SGEMM( 'T', 'N', N1, N2, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, N1+1 ), LDA, BETA, + $ C( N1*N1+1 ), N1 ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' +* + CALL SSYRK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL SSYRK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL SGEMM( 'N', 'T', N2, N1, K, ALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), N2 ) +* + ELSE +* +* N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' +* + CALL SSYRK( 'U', 'T', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL SSYRK( 'L', 'T', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL SGEMM( 'T', 'N', N2, N1, K, ALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), N2 ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL SSYRK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL SSYRK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL SGEMM( 'N', 'T', NK, NK, K, ALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( NK+2 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' +* + CALL SSYRK( 'L', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL SSYRK( 'U', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL SGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( NK+2 ), + $ N+1 ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL SSYRK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL SSYRK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL SGEMM( 'N', 'T', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, BETA, C( 1 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' +* + CALL SSYRK( 'L', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL SSYRK( 'U', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL SGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, BETA, C( 1 ), + $ N+1 ) +* + END IF +* + END IF +* + ELSE +* +* N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' +* + CALL SSYRK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL SSYRK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL SGEMM( 'N', 'T', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, BETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' +* + CALL SSYRK( 'U', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL SSYRK( 'L', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL SGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, BETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' +* + CALL SSYRK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL SSYRK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL SGEMM( 'N', 'T', NK, NK, K, ALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' +* + CALL SSYRK( 'U', 'T', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL SSYRK( 'L', 'T', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL SGEMM( 'T', 'N', NK, NK, K, ALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, BETA, C( 1 ), NK ) +* + END IF +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of SSFRK +* + END diff --git a/dspl/liblapack/SRC/sspcon.f b/dspl/liblapack/SRC/sspcon.f new file mode 100644 index 0000000..9f79b01 --- /dev/null +++ b/dspl/liblapack/SRC/sspcon.f @@ -0,0 +1,238 @@ +*> \brief \b SSPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric packed matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by SSPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SSPTRS, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL SSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SSPCON +* + END diff --git a/dspl/liblapack/SRC/sspev.f b/dspl/liblapack/SRC/sspev.f new file mode 100644 index 0000000..ff9ac1d --- /dev/null +++ b/dspl/liblapack/SRC/sspev.f @@ -0,0 +1,262 @@ +*> \brief SSPEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPEV computes all the eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A in packed storage. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SOPGTR, SSCAL, SSPTRD, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SOPGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of SSPEV +* + END diff --git a/dspl/liblapack/SRC/sspevd.f b/dspl/liblapack/SRC/sspevd.f new file mode 100644 index 0000000..1f407c3 --- /dev/null +++ b/dspl/liblapack/SRC/sspevd.f @@ -0,0 +1,337 @@ +*> \brief SSPEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPEVD computes all the eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A in packed storage. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least 2*N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN, + $ LLWORK, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SOPMTR, SSCAL, SSPTRD, SSTEDC, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + IWORK( 1 ) = LIWMIN + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SOPMTR to multiply it by the +* Householder transformations represented in AP. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL SSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWORK, IWORK, LIWORK, INFO ) + CALL SOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSPEVD +* + END diff --git a/dspl/liblapack/SRC/sspevx.f b/dspl/liblapack/SRC/sspevx.f new file mode 100644 index 0000000..51d4dda --- /dev/null +++ b/dspl/liblapack/SRC/sspevx.f @@ -0,0 +1,496 @@ +*> \brief SSPEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A in packed storage. Eigenvalues/vectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the selected eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AP( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SOPGTR, SOPMTR, SSCAL, SSPTRD, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + ANRM = SLANSP( 'M', UPLO, N, AP, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSPTRD to reduce symmetric packed matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + CALL SSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call SSTERF or SOPGTR and SSTEQR. If this fails +* for some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + CALL SOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of SSPEVX +* + END diff --git a/dspl/liblapack/SRC/sspgst.f b/dspl/liblapack/SRC/sspgst.f new file mode 100644 index 0000000..1e539e9 --- /dev/null +++ b/dspl/liblapack/SRC/sspgst.f @@ -0,0 +1,274 @@ +*> \brief \b SSPGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. +* REAL AP( * ), BP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPGST reduces a real symmetric-definite generalized eigenproblem +*> to standard form, using packed storage. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +*> +*> B must have been previously factorized as U**T*U or L*L**T by SPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**T*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] BP +*> \verbatim +*> BP is REAL array, dimension (N*(N+1)/2) +*> The triangular factor from the Cholesky factorization of B, +*> stored in the same format as A, as returned by SPPTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + REAL AP( * ), BP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + REAL AJJ, AKK, BJJ, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSPMV, SSPR2, STPMV, STPSV, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + BJJ = BP( JJ ) + CALL STPSV( UPLO, 'Transpose', 'Nonunit', J, BP, + $ AP( J1 ), 1 ) + CALL SSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE, + $ AP( J1 ), 1 ) + CALL SSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-SDOT( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL SSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL STPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL STPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL SSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL SSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**T *A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + SDOT( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL SSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL SSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ ONE, AP( JJ+1 ), 1 ) + CALL STPMV( UPLO, 'Transpose', 'Non-unit', N-J+1, + $ BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SSPGST +* + END diff --git a/dspl/liblapack/SRC/sspgv.f b/dspl/liblapack/SRC/sspgv.f new file mode 100644 index 0000000..3f20f62 --- /dev/null +++ b/dspl/liblapack/SRC/sspgv.f @@ -0,0 +1,277 @@ +*> \brief \b SSPGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. +* REAL AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPGV computes all the eigenvalues and, optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric, stored in packed format, +*> and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPPTRF or SSPEV returned an error code: +*> <= N: if INFO = i, SSPEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero. +*> > N: if INFO = n + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEV, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of SSPGV +* + END diff --git a/dspl/liblapack/SRC/sspgvd.f b/dspl/liblapack/SRC/sspgvd.f new file mode 100644 index 0000000..f840d78 --- /dev/null +++ b/dspl/liblapack/SRC/sspgvd.f @@ -0,0 +1,364 @@ +*> \brief \b SSPGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be symmetric, stored in packed format, and B is also +*> positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPPTRF or SSPEVD returned an error code: +*> <= N: if INFO = i, SSPEVD failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEVD, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of BP. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) + LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) ) + LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, NEIG + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T *y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSPGVD +* + END diff --git a/dspl/liblapack/SRC/sspgvx.f b/dspl/liblapack/SRC/sspgvx.f new file mode 100644 index 0000000..ce14b96 --- /dev/null +++ b/dspl/liblapack/SRC/sspgvx.f @@ -0,0 +1,417 @@ +*> \brief \b SSPGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL AP( * ), BP( * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +*> and B are assumed to be symmetric, stored in packed storage, and B +*> is also positive definite. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A and B are stored; +*> = 'L': Lower triangle of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix pencil (A,B). N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPPTRF or SSPEVX returned an error code: +*> <= N: if INFO = i, SSPEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL AP( * ), BP( * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPPTRF, SSPEVX, SSPGST, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -11 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + DO 10 J = 1, M + CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of SSPGVX +* + END diff --git a/dspl/liblapack/SRC/ssprfs.f b/dspl/liblapack/SRC/ssprfs.f new file mode 100644 index 0000000..862dbf3 --- /dev/null +++ b/dspl/liblapack/SRC/ssprfs.f @@ -0,0 +1,431 @@ +*> \brief \b SSPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is REAL array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by SSPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SSPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, SSPMV, SSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ), + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK + S = S + ABS( AP( IK ) )*ABS( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + WORK( K ) = WORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SSPRFS +* + END diff --git a/dspl/liblapack/SRC/sspsv.f b/dspl/liblapack/SRC/sspsv.f new file mode 100644 index 0000000..1685af7 --- /dev/null +++ b/dspl/liblapack/SRC/sspsv.f @@ -0,0 +1,224 @@ +*> \brief SSPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is symmetric and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by SSPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSPTRF, SSPTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL SSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of SSPSV +* + END diff --git a/dspl/liblapack/SRC/sspsvx.f b/dspl/liblapack/SRC/sspsvx.f new file mode 100644 index 0000000..52819b1 --- /dev/null +++ b/dspl/liblapack/SRC/sspsvx.f @@ -0,0 +1,385 @@ +*> \brief SSPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +*> A = L*D*L**T to compute the solution to a real system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form of +*> A. AP, AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is REAL array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by SSPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by SSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL AFP( * ), AP( * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANSP + EXTERNAL LSAME, SLAMCH, SLANSP +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SSPCON, SSPRFS, SSPTRF, SSPTRS, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL SSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSP( 'I', UPLO, N, AP, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of SSPSVX +* + END diff --git a/dspl/liblapack/SRC/ssptrd.f b/dspl/liblapack/SRC/ssptrd.f new file mode 100644 index 0000000..eebc376 --- /dev/null +++ b/dspl/liblapack/SRC/ssptrd.f @@ -0,0 +1,299 @@ +*> \brief \b SSPTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPTRD reduces a real symmetric matrix A stored in packed form to +*> symmetric tridiagonal form T by an orthogonal similarity +*> transformation: Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +*> overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +*> overwriting A(i+2:n,i), and tau is stored in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ), D( * ), E( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + REAL ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SSPMV, SSPR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(1:i-1,i+1) +* + CALL SLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI ) + E( I ) = AP( I1+I-1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL SSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y**T *v) * v +* + ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, AP( I1 ), 1 ) + CALL SAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL SSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + AP( I1+I-1 ) = E( I ) + END IF + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(i+2:n,i) +* + CALL SLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI ) + E( I ) = AP( II+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL SSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y**T *v) * v +* + ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL SAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL SSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + AP( II+1 ) = E( I ) + END IF + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of SSPTRD +* + END diff --git a/dspl/liblapack/SRC/ssptrf.f b/dspl/liblapack/SRC/ssptrf.f new file mode 100644 index 0000000..dc3b855 --- /dev/null +++ b/dspl/liblapack/SRC/ssptrf.f @@ -0,0 +1,614 @@ +*> \brief \b SSPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPTRF computes the factorization of a real symmetric matrix A stored +*> in packed format using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSPR, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, AP( KC ), 1 ) + COLMAX = ABS( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL SSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = ONE / AP( KC+K-1 ) + CALL SSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = ABS( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = ABS( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = ONE / AP( KC ) + CALL SSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL SSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) +* + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE +* + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 +* + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of SSPTRF +* + END diff --git a/dspl/liblapack/SRC/ssptri.f b/dspl/liblapack/SRC/ssptri.f new file mode 100644 index 0000000..d62937b --- /dev/null +++ b/dspl/liblapack/SRC/ssptri.f @@ -0,0 +1,401 @@ +*> \brief \b SSPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPTRI computes the inverse of a real symmetric indefinite matrix +*> A in packed storage using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by SSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + REAL AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSPMV, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ SDOT( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ SDOT( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ SDOT( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL SCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ SDOT( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL SSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ SDOT( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL SCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ SDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of SSPTRI +* + END diff --git a/dspl/liblapack/SRC/ssptrs.f b/dspl/liblapack/SRC/ssptrs.f new file mode 100644 index 0000000..d1ff03c --- /dev/null +++ b/dspl/liblapack/SRC/ssptrs.f @@ -0,0 +1,450 @@ +*> \brief \b SSPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSPTRS solves a system of linear equations A*X = B with a real +*> symmetric matrix A stored in packed format using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by SSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL SGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL SGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL SGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL SGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of SSPTRS +* + END diff --git a/dspl/liblapack/SRC/sstebz.f b/dspl/liblapack/SRC/sstebz.f new file mode 100644 index 0000000..bcac56a --- /dev/null +++ b/dspl/liblapack/SRC/sstebz.f @@ -0,0 +1,770 @@ +*> \brief \b SSTEBZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEBZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, +* M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER ORDER, RANGE +* INTEGER IL, INFO, IU, M, N, NSPLIT +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEBZ computes the eigenvalues of a symmetric tridiagonal +*> matrix T. The user may ask for all eigenvalues, all eigenvalues +*> in the half-open interval (VL, VU], or the IL-th through IU-th +*> eigenvalues. +*> +*> To avoid overflow, the matrix must be scaled so that its +*> largest element is no greater than overflow**(1/2) * underflow**(1/4) in absolute value, and for greatest +*> accuracy, it should not be much smaller than that. +*> +*> See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal +*> Matrix", Report CS41, Computer Science Dept., Stanford +*> University, July 21, 1966. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': ("All") all eigenvalues will be found. +*> = 'V': ("Value") all eigenvalues in the half-open interval +*> (VL, VU] will be found. +*> = 'I': ("Index") the IL-th through IU-th eigenvalues (of the +*> entire matrix) will be found. +*> \endverbatim +*> +*> \param[in] ORDER +*> \verbatim +*> ORDER is CHARACTER*1 +*> = 'B': ("By Block") the eigenvalues will be grouped by +*> split-off block (see IBLOCK, ISPLIT) and +*> ordered from smallest to largest within +*> the block. +*> = 'E': ("Entire matrix") +*> the eigenvalues for the entire matrix +*> will be ordered from smallest to +*> largest. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. Eigenvalues less than or equal +*> to VL, or greater than VU, will not be returned. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute tolerance for the eigenvalues. An eigenvalue +*> (or cluster) is considered to be located if it has been +*> determined to lie in an interval whose width is ABSTOL or +*> less. If ABSTOL is less than or equal to zero, then ULP*|T| +*> will be used, where |T| means the 1-norm of T. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The actual number of eigenvalues found. 0 <= M <= N. +*> (See also the description of INFO=2,3.) +*> \endverbatim +*> +*> \param[out] NSPLIT +*> \verbatim +*> NSPLIT is INTEGER +*> The number of diagonal blocks in the matrix T. +*> 1 <= NSPLIT <= N. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On exit, the first M elements of W will contain the +*> eigenvalues. (SSTEBZ may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> At each row/column j where E(j) is zero or small, the +*> matrix T is considered to split into a block diagonal +*> matrix. On exit, if INFO = 0, IBLOCK(i) specifies to which +*> block (from 1 to the number of blocks) the eigenvalue W(i) +*> belongs. (SSTEBZ may use the remaining N-M elements as +*> workspace.) +*> \endverbatim +*> +*> \param[out] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to ISPLIT(1), +*> the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), +*> etc., and the NSPLIT-th consists of rows/columns +*> ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. +*> (Only the first NSPLIT elements will actually be used, but +*> since the user cannot know a priori what value NSPLIT will +*> have, N words must be reserved for ISPLIT.) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: some or all of the eigenvalues failed to converge or +*> were not computed: +*> =1 or 3: Bisection failed to converge for some +*> eigenvalues; these eigenvalues are flagged by a +*> negative block number. The effect is that the +*> eigenvalues may not be as accurate as the +*> absolute and relative tolerances. This is +*> generally caused by unexpectedly inaccurate +*> arithmetic. +*> =2 or 3: RANGE='I' only: Not all of the eigenvalues +*> IL:IU were found. +*> Effect: M < IU+1-IL +*> Cause: non-monotonic arithmetic, causing the +*> Sturm sequence to be non-monotonic. +*> Cure: recalculate, using RANGE='A', and pick +*> out eigenvalues IL:IU. In some cases, +*> increasing the PARAMETER "FUDGE" may +*> make things work. +*> = 4: RANGE='I', and the Gershgorin interval +*> initially used was too small. No eigenvalues +*> were computed. +*> Probable cause: your machine has sloppy +*> floating-point arithmetic. +*> Cure: Increase the PARAMETER "FUDGE", +*> recompile, and try again. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> RELFAC REAL, default = 2.0e0 +*> The relative tolerance. An interval (a,b] lies within +*> "relative tolerance" if b-a < RELFAC*ulp*max(|a|,|b|), +*> where "ulp" is the machine precision (distance from 1 to +*> the next larger floating point number.) +*> +*> FUDGE REAL, default = 2 +*> A "fudge factor" to widen the Gershgorin intervals. Ideally, +*> a value of 1 should work, but on machines with sloppy +*> arithmetic, this needs to be larger. The default for +*> publicly released versions should be large enough to handle +*> the worst machine around. Note that this has no effect +*> on accuracy of the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E, + $ M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER ORDER, RANGE + INTEGER IL, INFO, IU, M, N, NSPLIT + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), ISPLIT( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, HALF + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ HALF = 1.0E0 / TWO ) + REAL FUDGE, RELFAC + PARAMETER ( FUDGE = 2.1E0, RELFAC = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL NCNVRG, TOOFEW + INTEGER IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO, + $ IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX, + $ ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL, + $ NWU + REAL ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN, + $ TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL +* .. +* .. Local Arrays .. + INTEGER IDUMMA( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH + EXTERNAL LSAME, ILAENV, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLAEBZ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Decode RANGE +* + IF( LSAME( RANGE, 'A' ) ) THEN + IRANGE = 1 + ELSE IF( LSAME( RANGE, 'V' ) ) THEN + IRANGE = 2 + ELSE IF( LSAME( RANGE, 'I' ) ) THEN + IRANGE = 3 + ELSE + IRANGE = 0 + END IF +* +* Decode ORDER +* + IF( LSAME( ORDER, 'B' ) ) THEN + IORDER = 2 + ELSE IF( LSAME( ORDER, 'E' ) ) THEN + IORDER = 1 + ELSE + IORDER = 0 + END IF +* +* Check for Errors +* + IF( IRANGE.LE.0 ) THEN + INFO = -1 + ELSE IF( IORDER.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( IRANGE.EQ.2 ) THEN + IF( VL.GE.VU ) INFO = -5 + ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -6 + ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) + $ THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEBZ', -INFO ) + RETURN + END IF +* +* Initialize error flags +* + INFO = 0 + NCNVRG = .FALSE. + TOOFEW = .FALSE. +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Simplifications: +* + IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N ) + $ IRANGE = 1 +* +* Get machine constants +* NB is the minimum vector length for vector bisection, or 0 +* if only scalar is to be done. +* + SAFEMN = SLAMCH( 'S' ) + ULP = SLAMCH( 'P' ) + RTOLI = ULP*RELFAC + NB = ILAENV( 1, 'SSTEBZ', ' ', N, -1, -1, -1 ) + IF( NB.LE.1 ) + $ NB = 0 +* +* Special Case when N=1 +* + IF( N.EQ.1 ) THEN + NSPLIT = 1 + ISPLIT( 1 ) = 1 + IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN + M = 0 + ELSE + W( 1 ) = D( 1 ) + IBLOCK( 1 ) = 1 + M = 1 + END IF + RETURN + END IF +* +* Compute Splitting Points +* + NSPLIT = 1 + WORK( N ) = ZERO + PIVMIN = ONE +* + DO 10 J = 2, N + TMP1 = E( J-1 )**2 + IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN + ISPLIT( NSPLIT ) = J - 1 + NSPLIT = NSPLIT + 1 + WORK( J-1 ) = ZERO + ELSE + WORK( J-1 ) = TMP1 + PIVMIN = MAX( PIVMIN, TMP1 ) + END IF + 10 CONTINUE + ISPLIT( NSPLIT ) = N + PIVMIN = PIVMIN*SAFEMN +* +* Compute Interval and ATOLI +* + IF( IRANGE.EQ.3 ) THEN +* +* RANGE='I': Compute the interval containing eigenvalues +* IL through IU. +* +* Compute Gershgorin interval for entire (split) matrix +* and use it as the initial interval +* + GU = D( 1 ) + GL = D( 1 ) + TMP1 = ZERO +* + DO 20 J = 1, N - 1 + TMP2 = SQRT( WORK( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 20 CONTINUE +* + GU = MAX( GU, D( N )+TMP1 ) + GL = MIN( GL, D( N )-TMP1 ) + TNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN + GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN +* +* Compute Iteration parameters +* + ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + WORK( N+1 ) = GL + WORK( N+2 ) = GL + WORK( N+3 ) = GU + WORK( N+4 ) = GU + WORK( N+5 ) = GL + WORK( N+6 ) = GU + IWORK( 1 ) = -1 + IWORK( 2 ) = -1 + IWORK( 3 ) = N + 1 + IWORK( 4 ) = N + 1 + IWORK( 5 ) = IL - 1 + IWORK( 6 ) = IU +* + CALL SLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E, + $ WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT, + $ IWORK, W, IBLOCK, IINFO ) +* + IF( IWORK( 6 ).EQ.IU ) THEN + WL = WORK( N+1 ) + WLU = WORK( N+3 ) + NWL = IWORK( 1 ) + WU = WORK( N+4 ) + WUL = WORK( N+2 ) + NWU = IWORK( 4 ) + ELSE + WL = WORK( N+2 ) + WLU = WORK( N+4 ) + NWL = IWORK( 2 ) + WU = WORK( N+3 ) + WUL = WORK( N+1 ) + NWU = IWORK( 3 ) + END IF +* + IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN + INFO = 4 + RETURN + END IF + ELSE +* +* RANGE='A' or 'V' -- Set ATOLI +* + TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ), + $ ABS( D( N ) )+ABS( E( N-1 ) ) ) +* + DO 30 J = 2, N - 1 + TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+ + $ ABS( E( J ) ) ) + 30 CONTINUE +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*TNORM + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.EQ.2 ) THEN + WL = VL + WU = VU + ELSE + WL = ZERO + WU = ZERO + END IF + END IF +* +* Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU. +* NWL accumulates the number of eigenvalues .le. WL, +* NWU accumulates the number of eigenvalues .le. WU +* + M = 0 + IEND = 0 + INFO = 0 + NWL = 0 + NWU = 0 +* + DO 70 JB = 1, NSPLIT + IOFF = IEND + IBEGIN = IOFF + 1 + IEND = ISPLIT( JB ) + IN = IEND - IOFF +* + IF( IN.EQ.1 ) THEN +* +* Special Case -- IN=1 +* + IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN ) + $ NWL = NWL + 1 + IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN ) + $ NWU = NWU + 1 + IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE. + $ D( IBEGIN )-PIVMIN ) ) THEN + M = M + 1 + W( M ) = D( IBEGIN ) + IBLOCK( M ) = JB + END IF + ELSE +* +* General Case -- IN > 1 +* +* Compute Gershgorin Interval +* and use it as the initial interval +* + GU = D( IBEGIN ) + GL = D( IBEGIN ) + TMP1 = ZERO +* + DO 40 J = IBEGIN, IEND - 1 + TMP2 = ABS( E( J ) ) + GU = MAX( GU, D( J )+TMP1+TMP2 ) + GL = MIN( GL, D( J )-TMP1-TMP2 ) + TMP1 = TMP2 + 40 CONTINUE +* + GU = MAX( GU, D( IEND )+TMP1 ) + GL = MIN( GL, D( IEND )-TMP1 ) + BNORM = MAX( ABS( GL ), ABS( GU ) ) + GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN + GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN +* +* Compute ATOLI for the current submatrix +* + IF( ABSTOL.LE.ZERO ) THEN + ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) ) + ELSE + ATOLI = ABSTOL + END IF +* + IF( IRANGE.GT.1 ) THEN + IF( GU.LT.WL ) THEN + NWL = NWL + IN + NWU = NWU + IN + GO TO 70 + END IF + GL = MAX( GL, WL ) + GU = MIN( GU, WU ) + IF( GL.GE.GU ) + $ GO TO 70 + END IF +* +* Set Up Initial Interval +* + WORK( N+1 ) = GL + WORK( N+IN+1 ) = GU + CALL SLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* + NWL = NWL + IWORK( 1 ) + NWU = NWU + IWORK( IN+1 ) + IWOFF = M - IWORK( 1 ) +* +* Compute Eigenvalues +* + ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) / + $ LOG( TWO ) ) + 2 + CALL SLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN, + $ D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ), + $ IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT, + $ IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO ) +* +* Copy Eigenvalues Into W and IBLOCK +* Use -JB for block number for unconverged eigenvalues. +* + DO 60 J = 1, IOUT + TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) ) +* +* Flag non-convergence. +* + IF( J.GT.IOUT-IINFO ) THEN + NCNVRG = .TRUE. + IB = -JB + ELSE + IB = JB + END IF + DO 50 JE = IWORK( J ) + 1 + IWOFF, + $ IWORK( J+IN ) + IWOFF + W( JE ) = TMP1 + IBLOCK( JE ) = IB + 50 CONTINUE + 60 CONTINUE +* + M = M + IM + END IF + 70 CONTINUE +* +* If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU +* If NWL+1 < IL or NWU > IU, discard extra eigenvalues. +* + IF( IRANGE.EQ.3 ) THEN + IM = 0 + IDISCL = IL - 1 - NWL + IDISCU = NWU - IU +* + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN + DO 80 JE = 1, M + IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN + IDISCL = IDISCL - 1 + ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN + IDISCU = IDISCU - 1 + ELSE + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 80 CONTINUE + M = IM + END IF + IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN +* +* Code to deal with effects of bad arithmetic: +* Some low eigenvalues to be discarded are not in (WL,WLU], +* or high eigenvalues to be discarded are not in (WUL,WU] +* so just kill off the smallest IDISCL/largest IDISCU +* eigenvalues, by simply finding the smallest/largest +* eigenvalue(s). +* +* (If N(w) is monotone non-decreasing, this should never +* happen.) +* + IF( IDISCL.GT.0 ) THEN + WKILL = WU + DO 100 JDISC = 1, IDISCL + IW = 0 + DO 90 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 90 CONTINUE + IBLOCK( IW ) = 0 + 100 CONTINUE + END IF + IF( IDISCU.GT.0 ) THEN +* + WKILL = WL + DO 120 JDISC = 1, IDISCU + IW = 0 + DO 110 JE = 1, M + IF( IBLOCK( JE ).NE.0 .AND. + $ ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN + IW = JE + WKILL = W( JE ) + END IF + 110 CONTINUE + IBLOCK( IW ) = 0 + 120 CONTINUE + END IF + IM = 0 + DO 130 JE = 1, M + IF( IBLOCK( JE ).NE.0 ) THEN + IM = IM + 1 + W( IM ) = W( JE ) + IBLOCK( IM ) = IBLOCK( JE ) + END IF + 130 CONTINUE + M = IM + END IF + IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN + TOOFEW = .TRUE. + END IF + END IF +* +* If ORDER='B', do nothing -- the eigenvalues are already sorted +* by block. +* If ORDER='E', sort the eigenvalues from smallest to largest +* + IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN + DO 150 JE = 1, M - 1 + IE = 0 + TMP1 = W( JE ) + DO 140 J = JE + 1, M + IF( W( J ).LT.TMP1 ) THEN + IE = J + TMP1 = W( J ) + END IF + 140 CONTINUE +* + IF( IE.NE.0 ) THEN + ITMP1 = IBLOCK( IE ) + W( IE ) = W( JE ) + IBLOCK( IE ) = IBLOCK( JE ) + W( JE ) = TMP1 + IBLOCK( JE ) = ITMP1 + END IF + 150 CONTINUE + END IF +* + INFO = 0 + IF( NCNVRG ) + $ INFO = INFO + 1 + IF( TOOFEW ) + $ INFO = INFO + 2 + RETURN +* +* End of SSTEBZ +* + END diff --git a/dspl/liblapack/SRC/sstedc.f b/dspl/liblapack/SRC/sstedc.f new file mode 100644 index 0000000..8eb43f5 --- /dev/null +++ b/dspl/liblapack/SRC/sstedc.f @@ -0,0 +1,482 @@ +*> \brief \b SSTEDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEDC computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> The eigenvectors of a full or band real symmetric matrix can also be +*> found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See SLAED3 for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> = 'V': Compute eigenvectors of original dense symmetric +*> matrix also. On entry, Z contains the orthogonal +*> matrix used to reduce the original matrix to +*> tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the subdiagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If COMPZ = 'N' or N <= 1 then LWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LWORK must be at least +*> ( 1 + 3*N + 2*N*lg N + 4*N**2 ), +*> where lg( N ) = smallest integer k such +*> that 2**k >= N. +*> If COMPZ = 'I' and N > 1 then LWORK must be at least +*> ( 1 + 4*N + N**2 ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LWORK need +*> only be max(1,2*(N-1)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1. +*> If COMPZ = 'V' and N > 1 then LIWORK must be at least +*> ( 6 + 6*N + 5*N*lg N ). +*> If COMPZ = 'I' and N > 1 then LIWORK must be at least +*> ( 3 + 5*N ). +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LIWORK +*> need only be 1. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee +*> +* ===================================================================== + SUBROUTINE SSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FINISH, I, ICOMPZ, II, J, K, LGN, LIWMIN, + $ LWMIN, M, SMLSIZ, START, STOREZ, STRTRW + REAL EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL ILAENV, LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLAED0, SLASCL, SLASET, SLASRT, + $ SSTEQR, SSTERF, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MOD, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. + $ ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + SMLSIZ = ILAENV( 9, 'SSTEDC', ' ', 0, 0, 0, 0 ) + IF( N.LE.1 .OR. ICOMPZ.EQ.0 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( N.LE.SMLSIZ ) THEN + LIWMIN = 1 + LWMIN = 2*( N - 1 ) + ELSE + LGN = INT( LOG( REAL( N ) )/LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( ICOMPZ.EQ.1 ) THEN + LWMIN = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT. LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT. LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEDC', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures SSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. If the conditional clause is removed, then +* information on the size of workspace needs to be changed. +* +* If COMPZ = 'N', use SSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL SSTERF( N, D, E, INFO ) + GO TO 50 + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN +* + CALL SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* + ELSE +* +* If COMPZ = 'V', the Z matrix must be stored elsewhere for later +* use. +* + IF( ICOMPZ.EQ.1 ) THEN + STOREZ = 1 + N*N + ELSE + STOREZ = 1 + END IF +* + IF( ICOMPZ.EQ.2 ) THEN + CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) + END IF +* +* Scale. +* + ORGNRM = SLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ GO TO 50 +* + EPS = SLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 10 CONTINUE + IF( START.LE.N ) THEN +* +* Let FINISH be the position of the next subdiagonal entry +* such that E( FINISH ) <= TINY or FINISH = N if no such +* subdiagonal exists. The matrix identified by the elements +* between START and FINISH constitutes an independent +* sub-problem. +* + FINISH = START + 20 CONTINUE + IF( FINISH.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( FINISH ) ) )* + $ SQRT( ABS( D( FINISH+1 ) ) ) + IF( ABS( E( FINISH ) ).GT.TINY ) THEN + FINISH = FINISH + 1 + GO TO 20 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = FINISH - START + 1 + IF( M.EQ.1 ) THEN + START = FINISH + 1 + GO TO 10 + END IF + IF( M.GT.SMLSIZ ) THEN +* +* Scale. +* + ORGNRM = SLANST( 'M', M, D( START ), E( START ) ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + IF( ICOMPZ.EQ.1 ) THEN + STRTRW = 1 + ELSE + STRTRW = START + END IF + CALL SLAED0( ICOMPZ, N, M, D( START ), E( START ), + $ Z( STRTRW, START ), LDZ, WORK( 1 ), N, + $ WORK( STOREZ ), IWORK, INFO ) + IF( INFO.NE.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + GO TO 50 + END IF +* +* Scale back. +* + CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + IF( ICOMPZ.EQ.1 ) THEN +* +* Since QR won't update a Z matrix which is larger than +* the length of D, we must solve the sub-problem in a +* workspace and then multiply back into Z. +* + CALL SSTEQR( 'I', M, D( START ), E( START ), WORK, M, + $ WORK( M*M+1 ), INFO ) + CALL SLACPY( 'A', N, M, Z( 1, START ), LDZ, + $ WORK( STOREZ ), N ) + CALL SGEMM( 'N', 'N', N, M, M, ONE, + $ WORK( STOREZ ), N, WORK, M, ZERO, + $ Z( 1, START ), LDZ ) + ELSE IF( ICOMPZ.EQ.2 ) THEN + CALL SSTEQR( 'I', M, D( START ), E( START ), + $ Z( START, START ), LDZ, WORK, INFO ) + ELSE + CALL SSTERF( M, D( START ), E( START ), INFO ) + END IF + IF( INFO.NE.0 ) THEN + INFO = START*( N+1 ) + FINISH + GO TO 50 + END IF + END IF +* + START = FINISH + 1 + GO TO 10 + END IF +* +* endwhile +* + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 40 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 30 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 30 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 40 CONTINUE + END IF + END IF +* + 50 CONTINUE + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSTEDC +* + END diff --git a/dspl/liblapack/SRC/sstegr.f b/dspl/liblapack/SRC/sstegr.f new file mode 100644 index 0000000..37ce8cd --- /dev/null +++ b/dspl/liblapack/SRC/sstegr.f @@ -0,0 +1,302 @@ +*> \brief \b SSTEGR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEGR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* REAL Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEGR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> SSTEGR is a compatibility wrapper around the improved SSTEMR routine. +*> See SSTEMR for further details. +*> +*> One important change is that the ABSTOL parameter no longer provides any +*> benefit and hence is no longer used. +*> +*> Note : SSTEGR and SSTEMR work only on machines which follow +*> IEEE-754 floating-point standard in their handling of infinities and +*> NaNs. Normal execution may create these exceptiona values and hence +*> may abort due to a floating point exception in environments which +*> do not conform to the IEEE-754 standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> Unused. Was the absolute error tolerance for the +*> eigenvalues/eigenvectors in previous versions. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in SLARRE, +*> if INFO = 2X, internal error in SLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by SLARRE or +*> SLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE SSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + REAL Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL TRYRAC +* .. +* .. External Subroutines .. + EXTERNAL SSTEMR +* .. +* .. Executable Statements .. + INFO = 0 + TRYRAC = .FALSE. + + CALL SSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, N, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* End of SSTEGR +* + END diff --git a/dspl/liblapack/SRC/sstein.f b/dspl/liblapack/SRC/sstein.f new file mode 100644 index 0000000..a04b3ae --- /dev/null +++ b/dspl/liblapack/SRC/sstein.f @@ -0,0 +1,453 @@ +*> \brief \b SSTEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), +* $ IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEIN computes the eigenvectors of a real symmetric tridiagonal +*> matrix T corresponding to specified eigenvalues, using inverse +*> iteration. +*> +*> The maximum number of iterations allowed for each eigenvector is +*> specified by an internal parameter MAXITS (currently set to 5). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix +*> T, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of eigenvectors to be found. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements of W contain the eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block. ( The output array +*> W from SSTEBZ with ORDER = 'B' is expected here. ) +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The submatrix indices associated with the corresponding +*> eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +*> the first submatrix from the top, =2 if W(i) belongs to +*> the second submatrix, etc. ( The output array IBLOCK +*> from SSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> ( The output array ISPLIT from SSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, M) +*> The computed eigenvectors. The eigenvector associated +*> with the eigenvalue W(i) is stored in the i-th column of +*> Z. Any vector which fails to converge is set to its current +*> iterate after MAXITS iterations. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> On normal exit, all elements of IFAIL are zero. +*> If one or more eigenvectors fail to converge after +*> MAXITS iterations, then their indices are stored in +*> array IFAIL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in MAXITS iterations. Their indices are stored in +*> array IFAIL. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXITS INTEGER, default = 5 +*> The maximum number of iterations performed. +*> +*> EXTRA INTEGER, default = 2 +*> The number of iterations performed after norm growth +*> criterion is satisfied, should be at least 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1, + $ ODM3 = 1.0E-3, ODM1 = 1.0E-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, NBLK, NRMCHK + REAL CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, STPCRT, TOL, XJ, XJM +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER ISAMAX + REAL SDOT, SLAMCH, SNRM2 + EXTERNAL ISAMAX, SDOT, SLAMCH, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + EPS = SLAMCH( 'Precision' ) +* +* Initialize seed for random number generator SLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 160 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = J1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + STPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 150 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 160 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 120 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 100 +* +* Normalize and scale the righthand side vector Pb. +* + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ ABS( WORK( INDRV1+JMAX ) ) + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 90 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 80 I = GPIND, J - 1 + CTR = -SDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ), + $ 1 ) + CALL SAXPY( BLKSIZ, CTR, Z( B1, I ), 1, + $ WORK( INDRV1+1 ), 1 ) + 80 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 90 CONTINUE + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.STPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 110 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 100 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 110 CONTINUE + SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 120 CONTINUE + DO 130 I = 1, N + Z( I, J ) = ZERO + 130 CONTINUE + DO 140 I = 1, BLKSIZ + Z( B1+I-1, J ) = WORK( INDRV1+I ) + 140 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 150 CONTINUE + 160 CONTINUE +* + RETURN +* +* End of SSTEIN +* + END diff --git a/dspl/liblapack/SRC/sstemr.f b/dspl/liblapack/SRC/sstemr.f new file mode 100644 index 0000000..2285381 --- /dev/null +++ b/dspl/liblapack/SRC/sstemr.f @@ -0,0 +1,772 @@ +*> \brief \b SSTEMR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* LOGICAL TRYRAC +* INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N +* REAL VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ) +* REAL Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEMR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> Depending on the number of desired eigenvalues, these are computed either +*> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are +*> computed by the use of various suitable L D L^T factorizations near clusters +*> of close eigenvalues (referred to as RRRs, Relatively Robust +*> Representations). An informal sketch of the algorithm follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> For more details, see: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> Further Details +*> 1.SSTEMR works only on machines which follow IEEE-754 +*> floating-point standard in their handling of infinities and NaNs. +*> This permits the use of efficient inner loops avoiding a check for +*> zero divisors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and can be computed with a workspace +*> query by setting NZC = -1, see below. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[in] NZC +*> \verbatim +*> NZC is INTEGER +*> The number of eigenvectors to be held in the array Z. +*> If RANGE = 'A', then NZC >= max(1,N). +*> If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. +*> If RANGE = 'I', then NZC >= IU-IL+1. +*> If NZC = -1, then a workspace query is assumed; the +*> routine calculates the number of columns of the array Z that +*> are needed to hold the eigenvectors. +*> This value is returned as the first entry of the Z array, and +*> no error message related to NZC is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[in,out] TRYRAC +*> \verbatim +*> TRYRAC is LOGICAL +*> If TRYRAC.EQ..TRUE., indicates that the code should check whether +*> the tridiagonal matrix defines its eigenvalues to high relative +*> accuracy. If so, the code uses relative-accuracy preserving +*> algorithms that might be (a bit) slower depending on the matrix. +*> If the matrix does not define its eigenvalues to high relative +*> accuracy, the code can uses possibly faster algorithms. +*> If TRYRAC.EQ..FALSE., the code is not required to guarantee +*> relatively accurate eigenvalues and can use the fastest possible +*> techniques. +*> On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix +*> does not define its eigenvalues to high relative accuracy. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in SLARRE, +*> if INFO = 2X, internal error in SLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by SLARRE or +*> SLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE SSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + LOGICAL TRYRAC + INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N + REAL VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ) + REAL Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, FOUR, MINRGP + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, + $ FOUR = 4.0E0, + $ MINRGP = 3.0E-3 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, + $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, + $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, + $ ITMP2, J, JBLK, JJ, LIWMIN, LWMIN, NSPLIT, + $ NZCMIN, OFFSET, WBEGIN, WEND + REAL BIGNUM, CS, EPS, PIVMIN, R1, R2, RMAX, RMIN, + $ RTOL1, RTOL2, SAFMIN, SCALE, SMLNUM, SN, + $ THRESH, TMP, TNRM, WL, WU +* .. +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAE2, SLAEV2, SLARRC, SLARRE, SLARRJ, + $ SLARRR, SLARRV, SLASRT, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) + ZQUERY = ( NZC.EQ.-1 ) + +* SSTEMR needs WORK of size 6*N, IWORK of size 3*N. +* In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N. +* Furthermore, SLARRV needs WORK of size 12*N, IWORK of size 7*N. + IF( WANTZ ) THEN + LWMIN = 18*N + LIWMIN = 10*N + ELSE +* need less workspace if only the eigenvalues are wanted + LWMIN = 12*N + LIWMIN = 8*N + ENDIF + + WL = ZERO + WU = ZERO + IIL = 0 + IIU = 0 + NSPLIT = 0 + + IF( VALEIG ) THEN +* We do not reference VL, VU in the cases RANGE = 'I','A' +* The interval (WL, WU] contains all the wanted eigenvalues. +* It is either given by the user or computed in SLARRE. + WL = VL + WU = VU + ELSEIF( INDEIG ) THEN +* We do not reference IL, IU in the cases RANGE = 'V','A' + IIL = IL + IIU = IU + ENDIF +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. WU.LE.WL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. ( IIL.LT.1 .OR. IIL.GT.N ) ) THEN + INFO = -8 + ELSE IF( INDEIG .AND. ( IIU.LT.IIL .OR. IIU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( WANTZ .AND. ALLEIG ) THEN + NZCMIN = N + ELSE IF( WANTZ .AND. VALEIG ) THEN + CALL SLARRC( 'T', N, VL, VU, D, E, SAFMIN, + $ NZCMIN, ITMP, ITMP2, INFO ) + ELSE IF( WANTZ .AND. INDEIG ) THEN + NZCMIN = IIU-IIL+1 + ELSE +* WANTZ .EQ. FALSE. + NZCMIN = 0 + ENDIF + IF( ZQUERY .AND. INFO.EQ.0 ) THEN + Z( 1,1 ) = NZCMIN + ELSE IF( NZC.LT.NZCMIN .AND. .NOT.ZQUERY ) THEN + INFO = -14 + END IF + END IF + + IF( INFO.NE.0 ) THEN +* + CALL XERBLA( 'SSTEMR', -INFO ) +* + RETURN + ELSE IF( LQUERY .OR. ZQUERY ) THEN + RETURN + END IF +* +* Handle N = 0, 1, and 2 cases immediately +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( WL.LT.D( 1 ) .AND. WU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, 1 ) = ONE + ISUPPZ(1) = 1 + ISUPPZ(2) = 1 + END IF + RETURN + END IF +* + IF( N.EQ.2 ) THEN + IF( .NOT.WANTZ ) THEN + CALL SLAE2( D(1), E(1), D(2), R1, R2 ) + ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) + END IF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R2.GT.WL).AND. + $ (R2.LE.WU)).OR. + $ (INDEIG.AND.(IIL.EQ.1)) ) THEN + M = M+1 + W( M ) = R2 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R1.GT.WL).AND. + $ (R1.LE.WU)).OR. + $ (INDEIG.AND.(IIU.EQ.2)) ) THEN + M = M+1 + W( M ) = R1 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + ELSE + +* Continue with general N + + INDGRS = 1 + INDERR = 2*N + 1 + INDGP = 3*N + 1 + INDD = 4*N + 1 + INDE2 = 5*N + 1 + INDWRK = 6*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDW = 2*N + 1 + IINDWK = 3*N + 1 +* +* Scale matrix to allowable range, if necessary. +* The allowable range is related to the PIVMIN parameter; see the +* comments in SLARRD. The preference for scaling small values +* up is heuristic; we expect users' matrices not to be close to the +* RMAX threshold. +* + SCALE = ONE + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N, SCALE, D, 1 ) + CALL SSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + IF( VALEIG ) THEN +* If eigenvalues in interval have to be found, +* scale (WL, WU] accordingly + WL = WL*SCALE + WU = WU*SCALE + ENDIF + END IF +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding off-diagonal elements +* are small +* THRESH is the splitting parameter for SLARRE +* A negative THRESH forces the old splitting criterion based on the +* size of the off-diagonal. A positive THRESH switches to splitting +* which preserves relative accuracy. +* + IF( TRYRAC ) THEN +* Test whether the matrix warrants the more expensive relative approach. + CALL SLARRR( N, D, E, IINFO ) + ELSE +* The user does not care about relative accurately eigenvalues + IINFO = -1 + ENDIF +* Set the splitting criterion + IF (IINFO.EQ.0) THEN + THRESH = EPS + ELSE + THRESH = -EPS +* relative accuracy is desired but T does not guarantee it + TRYRAC = .FALSE. + ENDIF +* + IF( TRYRAC ) THEN +* Copy original diagonal, needed to guarantee relative accuracy + CALL SCOPY(N,D,1,WORK(INDD),1) + ENDIF +* Store the squares of the offdiagonal values of T + DO 5 J = 1, N-1 + WORK( INDE2+J-1 ) = E(J)**2 + 5 CONTINUE + +* Set the tolerance parameters for bisection + IF( .NOT.WANTZ ) THEN +* SLARRE computes the eigenvalues to full precision. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ELSE +* SLARRE computes the eigenvalues to less than full precision. +* SLARRV will refine the eigenvalue approximations, and we can +* need less accurate initial bisection in SLARRE. +* Note: these settings do only affect the subset case and SLARRE + RTOL1 = MAX( SQRT(EPS)*5.0E-2, FOUR * EPS ) + RTOL2 = MAX( SQRT(EPS)*5.0E-3, FOUR * EPS ) + ENDIF + CALL SLARRE( RANGE, N, WL, WU, IIL, IIU, D, E, + $ WORK(INDE2), RTOL1, RTOL2, THRESH, NSPLIT, + $ IWORK( IINSPL ), M, W, WORK( INDERR ), + $ WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), PIVMIN, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 10 + ABS( IINFO ) + RETURN + END IF +* Note that if RANGE .NE. 'V', SLARRE computes bounds on the desired +* part of the spectrum. All desired eigenvalues are contained in +* (WL,WU] + + + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + CALL SLARRV( N, WL, WU, D, E, + $ PIVMIN, IWORK( IINSPL ), M, + $ 1, M, MINRGP, RTOL1, RTOL2, + $ W, WORK( INDERR ), WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), Z, LDZ, + $ ISUPPZ, WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 20 + ABS( IINFO ) + RETURN + END IF + ELSE +* SLARRE computes eigenvalues of the (shifted) root representation +* SLARRV returns the eigenvalues of the unshifted matrix. +* However, if the eigenvectors are not desired by the user, we need +* to apply the corresponding shifts from SLARRE to obtain the +* eigenvalues of the original matrix. + DO 20 J = 1, M + ITMP = IWORK( IINDBL+J-1 ) + W( J ) = W( J ) + E( IWORK( IINSPL+ITMP-1 ) ) + 20 CONTINUE + END IF +* + + IF ( TRYRAC ) THEN +* Refine computed eigenvalues so that they are relatively accurate +* with respect to the original matrix T. + IBEGIN = 1 + WBEGIN = 1 + DO 39 JBLK = 1, IWORK( IINDBL+M-1 ) + IEND = IWORK( IINSPL+JBLK-1 ) + IN = IEND - IBEGIN + 1 + WEND = WBEGIN - 1 +* check if any eigenvalues have to be refined in this block + 36 CONTINUE + IF( WEND.LT.M ) THEN + IF( IWORK( IINDBL+WEND ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 36 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 39 + END IF + + OFFSET = IWORK(IINDW+WBEGIN-1)-1 + IFIRST = IWORK(IINDW+WBEGIN-1) + ILAST = IWORK(IINDW+WEND-1) + RTOL2 = FOUR * EPS + CALL SLARRJ( IN, + $ WORK(INDD+IBEGIN-1), WORK(INDE2+IBEGIN-1), + $ IFIRST, ILAST, RTOL2, OFFSET, W(WBEGIN), + $ WORK( INDERR+WBEGIN-1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), PIVMIN, + $ TNRM, IINFO ) + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 39 CONTINUE + ENDIF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( M, ONE / SCALE, W, 1 ) + END IF + END IF +* +* If eigenvalues are not in increasing order, then sort them, +* possibly along with eigenvectors. +* + IF( NSPLIT.GT.1 .OR. N.EQ.2 ) THEN + IF( .NOT. WANTZ ) THEN + CALL SLASRT( 'I', M, W, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 3 + RETURN + END IF + ELSE + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF + ENDIF +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSTEMR +* + END diff --git a/dspl/liblapack/SRC/ssteqr.f b/dspl/liblapack/SRC/ssteqr.f new file mode 100644 index 0000000..f776c24 --- /dev/null +++ b/dspl/liblapack/SRC/ssteqr.f @@ -0,0 +1,572 @@ +*> \brief \b SSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band symmetric matrix can also be found +*> if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to +*> tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> symmetric matrix. On entry, Z must contain the +*> orthogonal matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the orthogonal +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original symmetric matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is orthogonally similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + REAL ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL LSAME, SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLAEV2, SLARTG, SLASCL, SLASET, SLASR, + $ SLASRT, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL SLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL SLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use SLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL SLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = SLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL SLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL SLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + GO TO 190 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL SLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF +* + 190 CONTINUE + RETURN +* +* End of SSTEQR +* + END diff --git a/dspl/liblapack/SRC/ssterf.f b/dspl/liblapack/SRC/ssterf.f new file mode 100644 index 0000000..02bf5b9 --- /dev/null +++ b/dspl/liblapack/SRC/ssterf.f @@ -0,0 +1,425 @@ +*> \brief \b SSTERF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTERF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTERF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTERF computes all eigenvalues of a symmetric tridiagonal matrix +*> using the Pal-Walker-Kahan variant of the QL or QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm failed to find all of the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup auxOTHERcomputational +* +* ===================================================================== + SUBROUTINE SSTERF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0, + $ THREE = 3.0E0 ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M, + $ NMAXIT + REAL ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC, + $ OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN, + $ SIGMA, SSFMAX, SSFMIN +* .. +* .. External Functions .. + REAL SLAMCH, SLANST, SLAPY2 + EXTERNAL SLAMCH, SLANST, SLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL SLAE2, SLASCL, SLASRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'SSTERF', -INFO ) + RETURN + END IF + IF( N.LE.1 ) + $ RETURN +* +* Determine the unit roundoff for this environment. +* + EPS = SLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues of the tridiagonal matrix. +* + NMAXIT = N*MAXIT + SIGMA = ZERO + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 170 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + DO 20 M = L1, N - 1 + IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )* + $ SQRT( ABS( D( M+1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = SLANST( 'M', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* + DO 40 I = L, LEND - 1 + E( I ) = E( I )**2 + 40 CONTINUE +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GE.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 50 CONTINUE + IF( L.NE.LEND ) THEN + DO 60 M = L, LEND - 1 + IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) ) + $ GO TO 70 + 60 CONTINUE + END IF + M = LEND +* + 70 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 90 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L+1 ) THEN + RTE = SQRT( E( L ) ) + CALL SLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 ) + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L ) ) + SIGMA = ( D( L+1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 80 I = M - 1, L, -1 + BB = E( I ) + R = P + BB + IF( I.NE.M-1 ) + $ E( I+1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I+1 ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 80 CONTINUE +* + E( L ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 50 +* +* Eigenvalue found. +* + 90 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 50 + GO TO 150 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 100 CONTINUE + DO 110 M = L, LEND + 1, -1 + IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) ) + $ GO TO 120 + 110 CONTINUE + M = LEND +* + 120 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 140 +* +* If remaining matrix is 2 by 2, use SLAE2 to compute its +* eigenvalues. +* + IF( M.EQ.L-1 ) THEN + RTE = SQRT( E( L-1 ) ) + CALL SLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 ) + D( L ) = RT1 + D( L-1 ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 150 + JTOT = JTOT + 1 +* +* Form shift. +* + RTE = SQRT( E( L-1 ) ) + SIGMA = ( D( L-1 )-P ) / ( TWO*RTE ) + R = SLAPY2( SIGMA, ONE ) + SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) ) +* + C = ONE + S = ZERO + GAMMA = D( M ) - SIGMA + P = GAMMA*GAMMA +* +* Inner loop +* + DO 130 I = M, L - 1 + BB = E( I ) + R = P + BB + IF( I.NE.M ) + $ E( I-1 ) = S*R + OLDC = C + C = P / R + S = BB / R + OLDGAM = GAMMA + ALPHA = D( I+1 ) + GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM + D( I ) = OLDGAM + ( ALPHA-GAMMA ) + IF( C.NE.ZERO ) THEN + P = ( GAMMA*GAMMA ) / C + ELSE + P = OLDC*BB + END IF + 130 CONTINUE +* + E( L-1 ) = S*P + D( L ) = SIGMA + GAMMA + GO TO 100 +* +* Eigenvalue found. +* + 140 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 100 + GO TO 150 +* + END IF +* +* Undo scaling if necessary +* + 150 CONTINUE + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + IF( ISCALE.EQ.2 ) + $ CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.LT.NMAXIT ) + $ GO TO 10 + DO 160 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 160 CONTINUE + GO TO 180 +* +* Sort eigenvalues in increasing order. +* + 170 CONTINUE + CALL SLASRT( 'I', N, D, INFO ) +* + 180 CONTINUE + RETURN +* +* End of SSTERF +* + END diff --git a/dspl/liblapack/SRC/sstev.f b/dspl/liblapack/SRC/sstev.f new file mode 100644 index 0000000..bd62b98 --- /dev/null +++ b/dspl/liblapack/SRC/sstev.f @@ -0,0 +1,235 @@ +*> \brief SSTEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEV computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric tridiagonal matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A, stored in elements 1 to N-1 of E. +*> On exit, the contents of E are destroyed. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with D(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (max(1,2*N-2)) +*> If JOBZ = 'N', WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of E did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IMAX, ISCALE + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTEQR, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call SSTERF. For eigenvalues and +* eigenvectors, call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, D, E, INFO ) + ELSE + CALL SSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, D, 1 ) + END IF +* + RETURN +* +* End of SSTEV +* + END diff --git a/dspl/liblapack/SRC/sstevd.f b/dspl/liblapack/SRC/sstevd.f new file mode 100644 index 0000000..7e239a3 --- /dev/null +++ b/dspl/liblapack/SRC/sstevd.f @@ -0,0 +1,302 @@ +*> \brief SSTEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEVD computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric tridiagonal matrix. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A, stored in elements 1 to N-1 of E. +*> On exit, the contents of E are destroyed. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with D(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' or N <= 1 then LWORK must be at least 1. +*> If JOBZ = 'V' and N > 1 then LWORK must be at least +*> ( 1 + 4*N + N**2 ). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOBZ = 'N' or N <= 1 then LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1 then LIWORK must be at least 3+5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of E did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDZ, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL D( * ), E( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER ISCALE, LIWMIN, LWMIN + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TNRM +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSTEDC, SSTERF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + LIWMIN = 1 + LWMIN = 1 + IF( N.GT.1 .AND. WANTZ ) THEN + LWMIN = 1 + 4*N + N**2 + LIWMIN = 3 + 5*N + END IF +* + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + END IF +* +* For eigenvalues only, call SSTERF. For eigenvalues and +* eigenvectors, call SSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, D, E, INFO ) + ELSE + CALL SSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, D, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSTEVD +* + END diff --git a/dspl/liblapack/SRC/sstevr.f b/dspl/liblapack/SRC/sstevr.f new file mode 100644 index 0000000..16635c8 --- /dev/null +++ b/dspl/liblapack/SRC/sstevr.f @@ -0,0 +1,582 @@ +*> \brief SSTEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, +* M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Eigenvalues and +*> eigenvectors can be selected by specifying either a range of values +*> or a range of indices for the desired eigenvalues. +*> +*> Whenever possible, SSTEVR calls SSTEMR to compute the +*> eigenspectrum using Relatively Robust Representations. SSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. For the i-th +*> unreduced block of T, +*> (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T +*> is a relatively robust representation, +*> (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high +*> relative accuracy by the dqds algorithm, +*> (c) If there is a cluster of close eigenvalues, "choose" sigma_i +*> close to the cluster, and go to step (a), +*> (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, +*> compute the corresponding eigenvector by forming a +*> rank-revealing twisted factorization. +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, +*> Computer Science Division Technical Report No. UCB//CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : SSTEVR calls SSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> SSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of SSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> SSTEIN are called +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, D may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (max(1,N-1)) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A in elements 1 to N-1 of E. +*> On exit, E may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal (and +*> minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 20*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal (and +*> minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= 10*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE SSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, LQUERY, VALEIG, WANTZ, + $ TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, ISCALE, J, JJ, LIWMIN, LWMIN, NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANST + EXTERNAL LSAME, ILAENV, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTEMR, SSTEIN, SSTERF, + $ SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSTEVR', 'N', 1, 2, 3, 4 ) +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) + LWMIN = MAX( 1, 20*N ) + LIWMIN = MAX(1, 10*N ) +* +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -14 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF +* + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: These indices are used only +* if SSTERF or SSTEMR fail. + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* SSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDISP + N +* +* If all eigenvalues are desired, then +* call SSTERF or SSTEMR. If this fails for some eigenvalue, then +* try SSTEBZ. +* +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. IEEEOK.EQ.1 ) THEN + CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, D, 1, W, 1 ) + CALL SSTERF( N, W, WORK, INFO ) + ELSE + CALL SCOPY( N, D, 1, WORK( N+1 ), 1 ) + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL SSTEMR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL, + $ IU, M, W, Z, LDZ, N, ISUPPZ, TRYRAC, + $ WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO ) +* + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 10 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK, + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 10 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 30 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 20 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 20 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 30 CONTINUE + END IF +* +* Causes problems with tests 19 & 20: +* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of SSTEVR +* + END diff --git a/dspl/liblapack/SRC/sstevx.f b/dspl/liblapack/SRC/sstevx.f new file mode 100644 index 0000000..bb7b3e5 --- /dev/null +++ b/dspl/liblapack/SRC/sstevx.f @@ -0,0 +1,464 @@ +*> \brief SSTEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSTEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, +* M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSTEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix A. Eigenvalues and +*> eigenvectors can be selected by specifying either a range of values +*> or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. +*> On exit, D may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (max(1,N-1)) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A in elements 1 to N-1 of E. +*> On exit, E may be multiplied by a constant factor chosen +*> to avoid over/underflow in computing the eigenvalues. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less +*> than or equal to zero, then EPS*|T| will be used in +*> its place, where |T| is the 1-norm of the tridiagonal +*> matrix. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge (INFO > 0), then that +*> column of Z contains the latest approximation to the +*> eigenvector, and the index of the eigenvector is returned +*> in IFAIL. If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHEReigen +* +* ===================================================================== + SUBROUTINE SSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IMAX, INDIBL, INDISP, INDIWO, INDWRK, + $ ISCALE, ITMP1, J, JJ, NSPLIT + REAL BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM, + $ TMP1, TNRM, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH, SLANST + EXTERNAL LSAME, SLAMCH, SLANST +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSCAL, SSTEBZ, SSTEIN, SSTEQR, SSTERF, + $ SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSTEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + IF ( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + ENDIF + TNRM = SLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / TNRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL SSCAL( N, SIGMA, D, 1 ) + CALL SSCAL( N-1, SIGMA, E( 1 ), 1 ) + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* If all eigenvalues are desired and ABSTOL is less than zero, then +* call SSTERF or SSTEQR. If this fails for some eigenvalue, then +* try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, D, 1, W, 1 ) + CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 ) + INDWRK = N + 1 + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK, INFO ) + ELSE + CALL SSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDWRK = 1 + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M, + $ NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), + $ WORK( INDWRK ), IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ), + $ Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL, + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of SSTEVX +* + END diff --git a/dspl/liblapack/SRC/ssycon.f b/dspl/liblapack/SRC/ssycon.f new file mode 100644 index 0000000..a0a999c --- /dev/null +++ b/dspl/liblapack/SRC/ssycon.f @@ -0,0 +1,244 @@ +*> \brief \b SSYCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by SSYTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL SSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SSYCON +* + END diff --git a/dspl/liblapack/SRC/ssycon_3.f b/dspl/liblapack/SRC/ssycon_3.f new file mode 100644 index 0000000..f91a527 --- /dev/null +++ b/dspl/liblapack/SRC/ssycon_3.f @@ -0,0 +1,285 @@ +*> \brief \b SSYCON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYCON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> computed by DSYTRF_RK or DSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver SSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by SSYTRF_RK and SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_RK or SSYTRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SSYTRS_3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL SSYTRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of DSYCON_3 +* + END diff --git a/dspl/liblapack/SRC/ssycon_rook.f b/dspl/liblapack/SRC/ssycon_rook.f new file mode 100644 index 0000000..bd2557a --- /dev/null +++ b/dspl/liblapack/SRC/ssycon_rook.f @@ -0,0 +1,258 @@ +*> \brief SSYCON_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* REAL ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYCON_ROOK estimates the reciprocal of the condition number (in the +*> 1-norm) of a real symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by SSYTRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is REAL +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + REAL ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + REAL AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SSYTRS_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL SSYTRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of SSYCON_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssyconv.f b/dspl/liblapack/SRC/ssyconv.f new file mode 100644 index 0000000..187528a --- /dev/null +++ b/dspl/liblapack/SRC/ssyconv.f @@ -0,0 +1,366 @@ +*> \brief \b SSYCONV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCONV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYCONV convert A given by TRF into L and D and vice-versa. +*> Get Non-diag elements of D (returned in workspace) and +*> apply or reverse permutation done in TRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> E stores the supdiagonal/subdiagonal of the symmetric 1-by-1 +*> or 2-by-2 block diagonal matrix D in LDLT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, J + REAL TEMP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCONV', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* A is UPPER +* +* Convert A (A is upper) +* +* Convert VALUE +* + IF ( CONVERT ) THEN + I=N + E(1)=ZERO + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + E(I)=A(I-1,I) + E(I-1)=ZERO + A(I-1,I)=ZERO + I=I-1 + ELSE + E(I)=ZERO + ENDIF + I=I-1 + END DO +* +* Convert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO 12 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 12 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF( I .LT. N) THEN + DO 13 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + 13 CONTINUE + ENDIF + I=I-1 + ENDIF + I=I-1 + END DO + + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I+1 + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + END DO + ENDIF + ENDIF + I=I+1 + END DO +* +* Revert VALUE +* + I=N + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I-1,I)=E(I) + I=I-1 + ENDIF + I=I-1 + END DO + END IF + ELSE +* +* A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* + I=1 + E(N)=ZERO + DO WHILE ( I .LE. N ) + IF( I.LT.N .AND. IPIV(I) .LT. 0 ) THEN + E(I)=A(I+1,I) + E(I+1)=ZERO + A(I+1,I)=ZERO + I=I+1 + ELSE + E(I)=ZERO + ENDIF + I=I+1 + END DO +* +* Convert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO 22 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 22 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF (I .GT. 1) THEN + DO 23 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I+1,J) + A(I+1,J)=TEMP + 23 CONTINUE + ENDIF + I=I+1 + ENDIF + I=I+1 + END DO + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I,J) + A(I,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I-1 + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I+1,J) + A(I+1,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ENDIF + I=I-1 + END DO +* +* Revert VALUE +* + I=1 + DO WHILE ( I .LE. N-1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I+1,I)=E(I) + I=I+1 + ENDIF + I=I+1 + END DO + END IF + END IF + + RETURN +* +* End of SSYCONV +* + END diff --git a/dspl/liblapack/SRC/ssyconvf.f b/dspl/liblapack/SRC/ssyconvf.f new file mode 100644 index 0000000..d43b947 --- /dev/null +++ b/dspl/liblapack/SRC/ssyconvf.f @@ -0,0 +1,559 @@ +*> \brief \b SSYCONVF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCONVF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> SSYCONVF converts the factorization output format used in +*> SSYTRF provided on entry in parameter A into the factorization +*> output format used in SSYTRF_RK (or SSYTRF_BK) that is stored +*> on exit in parameters A and E. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in SSYTRF into +*> the format used in SSYTRF_RK (or SSYTRF_BK). +*> +*> If parameter WAY = 'R': +*> SSYCONVF performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in SSYTRF_RK +*> (or SSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in SSYTRF that is stored +*> on exit in parameter A. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in SSYTRF_RK +*> (or SSYTRF_BK) into the format used in SSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> SSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> SSYTRF_RK or SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> SSYTRF_RK or SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> SSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> +*> 1) If WAY ='C': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in SSYTRF. +*> On exit, details of the interchanges and the block +*> structure of D in the format used in SSYTRF_RK +*> ( or SSYTRF_BK). +*> +*> 1) If WAY ='R': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in SSYTRF_RK +*> ( or SSYTRF_BK). +*> On exit, details of the interchanges and the block +*> structure of D in the format used in SSYTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE SSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL SSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCONVF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL SSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL SSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i-1 and IPIV(i-1), +* so this should be recorded in two consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I-1 ) +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where k increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL SSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL SSWAP( I-1, A( IP, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i+1 and IPIV(i+1), +* so this should be recorded in consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I+1 ) +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of SSYCONVF +* + END diff --git a/dspl/liblapack/SRC/ssyconvf_rook.f b/dspl/liblapack/SRC/ssyconvf_rook.f new file mode 100644 index 0000000..833b9c6 --- /dev/null +++ b/dspl/liblapack/SRC/ssyconvf_rook.f @@ -0,0 +1,544 @@ +*> \brief \b SSYCONVF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYCONVF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> SSYCONVF_ROOK converts the factorization output format used in +*> SSYTRF_ROOK provided on entry in parameter A into the factorization +*> output format used in SSYTRF_RK (or SSYTRF_BK) that is stored +*> on exit in parameters A and E. IPIV format for SSYTRF_ROOK and +*> SSYTRF_RK (or SSYTRF_BK) is the same and is not converted. +*> +*> If parameter WAY = 'R': +*> SSYCONVF_ROOK performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in SSYTRF_RK +*> (or SSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in SSYTRF_ROOK that is stored +*> on exit in parameter A. IPIV format for SSYTRF_ROOK and +*> SSYTRF_RK (or SSYTRF_BK) is the same and is not converted. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> SSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> SSYTRF_RK or SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> SSYTRF_RK or SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> SSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On entry, details of the interchanges and the block +*> structure of D as determined: +*> 1) by SSYTRF_ROOK, if WAY ='C'; +*> 2) by SSYTRF_RK (or SSYTRF_BK), if WAY ='R'. +*> The IPIV format is the same for all these routines. +*> +*> On exit, is not changed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE SSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL SSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, IP2 +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYCONVF_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i-1 and IPIV(i-1) +* in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + IF( IP2.NE.(I-1) ) THEN + CALL SSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP2, I+1 ), LDA ) + END IF + END IF + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i-1) and i and IPIV(i) +* in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP2.NE.(I-1) ) THEN + CALL SSWAP( N-I, A( IP2, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL SSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i+1 and IPIV(i+1) +* in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + IF( IP2.NE.(I+1) ) THEN + CALL SSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP2, 1 ), LDA ) + END IF + END IF + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i+1) and i and IPIV(i) +* in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP2.NE.(I+1) ) THEN + CALL SSWAP( I-1, A( IP2, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL SSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of SSYCONVF_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssyequb.f b/dspl/liblapack/SRC/ssyequb.f new file mode 100644 index 0000000..deb5510 --- /dev/null +++ b/dspl/liblapack/SRC/ssyequb.f @@ -0,0 +1,334 @@ +*> \brief \b SSYEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* REAL AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), S( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEQUB computes row and column scalings intended to equilibrate a +*> symmetric matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The N-by-N symmetric matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is REAL +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is REAL +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE SSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + REAL AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + REAL A( LDA, * ), S( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E0, ZERO = 0.0E0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + REAL AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, INT, LOG, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'SSYEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), ABS( A( I, J ) ) ) + S( J ) = MAX( S( J ), ABS( A( I, J ) ) ) + AMAX = MAX( AMAX, ABS( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), ABS( A( J, J ) ) ) + AMAX = MAX( AMAX, ABS( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), ABS( A( J, J ) ) ) + AMAX = MAX( AMAX, ABS( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), ABS( A( I, J ) ) ) + S( J ) = MAX( S( J ), ABS( A( I, J ) ) ) + AMAX = MAX( AMAX, ABS( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0E0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0E0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0E0 + SUMSQ = 0.0E0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + ABS( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + ABS( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + ABS( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + ABS( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0E0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0E0 + DO I = N+1, 2*N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL SLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = ABS( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = ABS( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = ABS( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = ABS( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = ABS( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = SLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = SLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/ssyev.f b/dspl/liblapack/SRC/ssyev.f new file mode 100644 index 0000000..f3cfd5a --- /dev/null +++ b/dspl/liblapack/SRC/ssyev.f @@ -0,0 +1,286 @@ +*> \brief SSYEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEV computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for SSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYeigen +* +* ===================================================================== + SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, SSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SORGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYEV +* + END diff --git a/dspl/liblapack/SRC/ssyev_2stage.f b/dspl/liblapack/SRC/ssyev_2stage.f new file mode 100644 index 0000000..1667669 --- /dev/null +++ b/dspl/liblapack/SRC/ssyev_2stage.f @@ -0,0 +1,348 @@ +*> \brief SSYEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @generated from dsyev_2stage.f, fortran d -> s, Sat Nov 5 23:55:51 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, + $ XERBLA, SSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SORGTR to generate the orthogonal matrix, then call SSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSYEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssyevd.f b/dspl/liblapack/SRC/ssyevd.f new file mode 100644 index 0000000..3ba95ca --- /dev/null +++ b/dspl/liblapack/SRC/ssyevd.f @@ -0,0 +1,355 @@ +*> \brief SSYEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVD computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> +*> Because of large use of BLAS of level 3, SSYEVD needs N**2 more +*> workspace than SSYEVX. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n +*> +* ===================================================================== + SUBROUTINE SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, + $ SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + LOPT = LWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = MAX( LWMIN, 2*N + + $ ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) ) + LIOPT = LIWMIN + END IF + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of SSYEVD +* + END diff --git a/dspl/liblapack/SRC/ssyevd_2stage.f b/dspl/liblapack/SRC/ssyevd_2stage.f new file mode 100644 index 0000000..8ab90b6 --- /dev/null +++ b/dspl/liblapack/SRC/ssyevd_2stage.f @@ -0,0 +1,410 @@ +*> \brief SSYEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @generated from dsyevd_2stage.f, fortran d -> s, Sat Nov 5 23:55:54 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, +*> dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least +*> 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA \n +*> Modified by Francoise Tisseur, University of Tennessee \n +*> Modified description of INFO. Sven, 16 Feb 05. \n +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLWORK, LLWRK2, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF, + $ SSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + LHTRD + LWTRD + END IF + END IF + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call SSTERF. For eigenvectors, first call +* SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call SORMTR to multiply it by the +* Householder transformations stored in A. +* + IF( .NOT.WANTZ ) THEN + CALL SSTERF( N, W, WORK( INDE ), INFO ) + ELSE +* Not available in this release, and agrument checking should not +* let it getting here + RETURN + CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO ) + CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) + $ CALL SSCAL( N, ONE / SIGMA, W, 1 ) +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssyevr.f b/dspl/liblapack/SRC/ssyevr.f new file mode 100644 index 0000000..f24091e --- /dev/null +++ b/dspl/liblapack/SRC/ssyevr.f @@ -0,0 +1,690 @@ +*> \brief SSYEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> SSYEVR first reduces the matrix A to tridiagonal form T with a call +*> to SSYTRD. Then, whenever possible, SSYEVR calls SSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. SSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see SSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : SSYEVR calls SSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> SSYEVR calls SSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of SSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> SSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of SSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by SORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,26*N). +*> For optimal efficiency, LWORK >= (NB+6)*N, +*> where NB is the max of the blocksize for SSYTRD and SORMTR +*> returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE SSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, + $ SSTERF, SSWAP, SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + LWMIN = MAX( 1, 26*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 26 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or SSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in SSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from SSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by SSTEMR (the SSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and SSTEMR. + INDEE = INDDD + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDEE + N + LLWORK = LWORK - INDWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* SSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or SSTEMR and SORMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL SSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip SSTEBZ/SSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* Also call SSTEBZ and SSTEIN if SSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if SSTEMR/SSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if SSTEMR/SSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVR +* + END diff --git a/dspl/liblapack/SRC/ssyevr_2stage.f b/dspl/liblapack/SRC/ssyevr_2stage.f new file mode 100644 index 0000000..60339cd --- /dev/null +++ b/dspl/liblapack/SRC/ssyevr_2stage.f @@ -0,0 +1,745 @@ +*> \brief SSYEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @generated from dsyevr_2stage.f, fortran d -> s, Sat Nov 5 23:50:10 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVR_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> SSYEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to SSYTRD. Then, whenever possible, SSYEVR_2STAGE calls SSTEMR to compute +*> the eigenspectrum using Relatively Robust Representations. SSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see SSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : SSYEVR_2STAGE calls SSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> SSYEVR_2STAGE calls SSTEBZ and SSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of SSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and +*> SSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> SLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> future releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of SSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the orthogonal transformations applied by SORMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 5*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 5*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ, + $ TRYRAC, TEST + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE, + $ INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU, + $ INDWK, INDWKN, ISCALE, J, JJ, LIWMIN, + $ LLWORK, LLWRKN, LWMIN, NSPLIT, + $ LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ILAENV2STAGE + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEMR, SSTEIN, + $ SSTERF, SSWAP, SSYTRD_2STAGE, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'SSYEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = MAX( 26*N, 5*N + LHTRD + LWTRD ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN +* NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) +* NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) +* LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 26 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if SSTERF or SSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the scalar factors of the +* elementary reflectors used in SSYTRD. + INDTAU = 1 +* WORK(INDD:INDD+N-1) stores the tridiagonal's diagonal entries. + INDD = INDTAU + N +* WORK(INDE:INDE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from SSYTRD. + INDE = INDD + N +* WORK(INDDD:INDDD+N-1) is a copy of the diagonal entries over +* -written by SSTEMR (the SSTERF path copies the diagonal to W). + INDDD = INDE + N +* WORK(INDEE:INDEE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in SSTERF and SSTEMR. + INDEE = INDDD + N +* INDHOUS is the starting offset Householder storage of stage 2 + INDHOUS = INDEE + N +* INDWK is the starting offset of the left-over workspace, and +* LLWORK is the remaining workspace size. + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in SSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in SSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* SSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call SSTERF or SSTEMR and SORMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL SSTEMR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ), + $ VL, VU, IL, IU, M, W, Z, LDZ, N, ISUPPZ, + $ TRYRAC, WORK( INDWK ), LWORK, IWORK, LIWORK, + $ INFO ) +* +* +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN +* Everything worked. Skip SSTEBZ/SSTEIN. IWORK(:) are +* undefined. + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* Also call SSTEBZ and SSTEIN if SSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* +* Jump here if SSTEMR/SSTEIN succeeded. + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. Note: We do not sort the IFAIL portion of IWORK. +* It may not be initialized (if SSTEMR/SSTEIN succeeded), and we do +* not return this detailed information to the user. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of SSYEVR_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssyevx.f b/dspl/liblapack/SRC/ssyevx.f new file mode 100644 index 0000000..7a7fac9 --- /dev/null +++ b/dspl/liblapack/SRC/ssyevx.f @@ -0,0 +1,554 @@ +*> \brief SSYEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise 8*N. +*> For optimal efficiency, LWORK >= (NB+3)*N, +*> where NB is the max of the blocksize for SSYTRD and SORMTR +*> returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +* ===================================================================== + SUBROUTINE SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, LWKMIN, + $ LWKOPT, NB, NSPLIT + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL LSAME, ILAENV, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, SSYTRD, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWKMIN = 1 + WORK( 1 ) = LWKMIN + ELSE + LWKMIN = 8*N + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDWRK = INDD + N + LLWORK = LWORK - INDWRK + 1 + CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or SORGTR and SSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL SORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYEVX +* + END diff --git a/dspl/liblapack/SRC/ssyevx_2stage.f b/dspl/liblapack/SRC/ssyevx_2stage.f new file mode 100644 index 0000000..227a70b --- /dev/null +++ b/dspl/liblapack/SRC/ssyevx_2stage.f @@ -0,0 +1,612 @@ +*> \brief SSYEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices +* +* @generated from dsyevx_2stage.f, fortran d -> s, Sat Nov 5 23:55:46 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can be +*> selected by specifying either a range of values or a range of indices +*> for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*SLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 3*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 3*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LLWRKN, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + REAL SLAMCH, SLANSY + EXTERNAL LSAME, SLAMCH, SLANSY, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ, + $ SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA, + $ SSYTRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = MAX( 8*N, 3*N + LHTRD + LWTRD ) + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE + IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = SLAMCH( 'Safe minimum' ) + EPS = SLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL SSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call SSYTRD_2STAGE to reduce symmetric matrix to tridiagonal form. +* + INDTAU = 1 + INDE = INDTAU + N + INDD = INDE + N + INDHOUS = INDD + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL SSYTRD_2STAGE( JOBZ, UPLO, N, A, LDA, WORK( INDD ), + $ WORK( INDE ), WORK( INDTAU ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call SSTERF or SORGTR and SSTEQR. If this fails for +* some eigenvalue, then try SSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL SCOPY( N, WORK( INDD ), 1, W, 1 ) + INDEE = INDWRK + 2*N + IF( .NOT.WANTZ ) THEN + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTERF( N, W, WORK( INDEE ), INFO ) + ELSE + CALL SLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL SORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 ) + CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ, + $ WORK( INDWRK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWO = INDISP + N + CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ WORK( INDD ), WORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO ) +* +* Apply orthogonal matrix used in reduction to tridiagonal +* form to eigenvectors returned by SSTEIN. +* + INDWKN = INDE + LLWRKN = LWORK - INDWKN + 1 + CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL SSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of SSYEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssygs2.f b/dspl/liblapack/SRC/ssygs2.f new file mode 100644 index 0000000..addf681 --- /dev/null +++ b/dspl/liblapack/SRC/ssygs2.f @@ -0,0 +1,283 @@ +*> \brief \b SSYGS2 reduces a symmetric definite generalized eigenproblem to standard form, using the factorization results obtained from spotrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGS2 reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T *A*L. +*> +*> B must have been previously factorized as U**T *U or L*L**T by SPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T *A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored, and how B has been factorized. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + REAL AKK, BKK, CT +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SSCAL, SSYR2, STRMV, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL SSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL STRSV( UPLO, 'Transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL SSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL SSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL STRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL STRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL SSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL SSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**T *A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL STRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB, + $ A( K, 1 ), LDA ) + CT = HALF*AKK + CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL SSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL SSCAL( K-1, BKK, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of SSYGS2 +* + END diff --git a/dspl/liblapack/SRC/ssygst.f b/dspl/liblapack/SRC/ssygst.f new file mode 100644 index 0000000..422475a --- /dev/null +++ b/dspl/liblapack/SRC/ssygst.f @@ -0,0 +1,321 @@ +*> \brief \b SSYGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGST reduces a real symmetric-definite generalized eigenproblem +*> to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. +*> +*> B must have been previously factorized as U**T*U or L*L**T by SPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); +*> = 2 or 3: compute U*A*U**T or L**T*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**T*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by SPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, HALF + PARAMETER ( ONE = 1.0, HALF = 0.5 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL SSYGS2, SSYMM, SSYR2K, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'SSYGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**T)*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL STRSM( 'Left', UPLO, 'Transpose', 'Non-unit', + $ KB, N-K-KB+1, ONE, B( K, K ), LDB, + $ A( K, K+KB ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL SSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE, + $ A( K, K+KB ), LDA, B( K, K+KB ), LDB, + $ ONE, A( K+KB, K+KB ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, ONE, + $ A( K, K+KB ), LDA ) + CALL STRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, ONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**T) +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL STRSM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ N-K-KB+1, KB, ONE, B( K, K ), LDB, + $ A( K+KB, K ), LDA ) + CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL SSYR2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -ONE, A( K+KB, K ), LDA, B( K+KB, K ), + $ LDB, ONE, A( K+KB, K+KB ), LDA ) + CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, ONE, + $ A( K+KB, K ), LDA ) + CALL STRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, ONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**T +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL STRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, ONE, B, LDB, A( 1, K ), LDA ) + CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL SSYR2K( UPLO, 'No transpose', K-1, KB, ONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA ) + CALL STRMM( 'Right', UPLO, 'Transpose', 'Non-unit', + $ K-1, KB, ONE, B( K, K ), LDB, A( 1, K ), + $ LDA ) + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L**T*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL STRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, ONE, B, LDB, A( K, 1 ), LDA ) + CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL SSYR2K( UPLO, 'Transpose', K-1, KB, ONE, + $ A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A, + $ LDA ) + CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA ) + CALL STRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB, + $ K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA ) + CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of SSYGST +* + END diff --git a/dspl/liblapack/SRC/ssygv.f b/dspl/liblapack/SRC/ssygv.f new file mode 100644 index 0000000..4010424 --- /dev/null +++ b/dspl/liblapack/SRC/ssygv.f @@ -0,0 +1,314 @@ +*> \brief \b SSYGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for SSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPOTRF or SSYEV returned an error code: +*> <= N: if INFO = i, SSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYeigen +* +* ===================================================================== + SUBROUTINE SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKMIN, LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEV, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 3*N - 1 ) + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKMIN, ( NB + 2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYGV +* + END diff --git a/dspl/liblapack/SRC/ssygv_2stage.f b/dspl/liblapack/SRC/ssygv_2stage.f new file mode 100644 index 0000000..7dfbbaa --- /dev/null +++ b/dspl/liblapack/SRC/ssygv_2stage.f @@ -0,0 +1,370 @@ +*> \brief \b SSYGV_2STAGE +* +* @generated from dsygv_2stage.f, fortran d -> s, Sun Nov 6 12:54:29 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +*> sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + 2*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + 2*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPOTRF or SSYEV returned an error code: +*> <= N: if INFO = i, SSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYGST, STRMM, STRSM, XERBLA, + $ SSYEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = 2*N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYGV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssygvd.f b/dspl/liblapack/SRC/ssygvd.f new file mode 100644 index 0000000..7e28b0a --- /dev/null +++ b/dspl/liblapack/SRC/ssygvd.f @@ -0,0 +1,380 @@ +*> \brief \b SSYGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be symmetric and B is also positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the symmetric matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. +*> If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK and IWORK +*> arrays, returns these values as the first entries of the WORK +*> and IWORK arrays, and no error message related to LWORK or +*> LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK >= 1. +*> If JOBZ = 'N' and N > 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK and IWORK arrays, and no error message related to +*> LWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPOTRF or SSYEVD returned an error code: +*> <= N: if INFO = i and JOBZ = 'N', then the algorithm +*> failed to converge; i off-diagonal elements of an +*> intermediate tridiagonal form did not converge to +*> zero; +*> if INFO = i and JOBZ = 'V', then the algorithm +*> failed to compute an eigenvalue while working on +*> the submatrix lying in rows and columns INFO/(N+1) +*> through mod(INFO,N+1); +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified so that no backsubstitution is performed if SSYEVD fails to +*> converge (NEIG in old code could be greater than N causing out of +*> bounds reference to A - reported by Ralf Meyer). Also corrected the +*> description of INFO and the test on ITYPE. Sven, 16 Feb 05. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +*> +* ===================================================================== + SUBROUTINE SSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEVD, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LIWMIN = 1 + LWMIN = 1 + ELSE IF( WANTZ ) THEN + LIWMIN = 3 + 5*N + LWMIN = 1 + 6*N + 2*N**2 + ELSE + LIWMIN = 1 + LWMIN = 2*N + 1 + END IF + LOPT = LWMIN + LIOPT = LIWMIN + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK, + $ INFO ) + LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) ) + LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) ) +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, N, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of SSYGVD +* + END diff --git a/dspl/liblapack/SRC/ssygvx.f b/dspl/liblapack/SRC/ssygvx.f new file mode 100644 index 0000000..8a99c93 --- /dev/null +++ b/dspl/liblapack/SRC/ssygvx.f @@ -0,0 +1,465 @@ +*> \brief \b SSYGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, +* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N +* REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A +*> and B are assumed to be symmetric and B is also positive definite. +*> Eigenvalues and eigenvectors can be selected by specifying either a +*> range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A and B are stored; +*> = 'L': Lower triangle of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix pencil (A,B). N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, the symmetric matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is REAL +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is REAL +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing C to tridiagonal form, where C is the symmetric +*> matrix of the standard symmetric problem to which the +*> generalized problem is transformed. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*SLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is REAL array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,8*N). +*> For optimal efficiency, LWORK >= (NB+3)*N, +*> where NB is the blocksize for SSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: SPOTRF or SSYEVX returned an error code: +*> <= N: if INFO = i, SSYEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYeigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE SSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + REAL ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), W( * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LWKMIN, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL SPOTRF, SSYEVX, SSYGST, STRMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + UPPER = LSAME( UPLO, 'U' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF (INFO.EQ.0) THEN + IF (LDZ.LT.1 .OR. (WANTZ .AND. LDZ.LT.N)) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 8*N ) + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKMIN, ( NB + 3 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL SPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYGVX +* + END diff --git a/dspl/liblapack/SRC/ssyrfs.f b/dspl/liblapack/SRC/ssyrfs.f new file mode 100644 index 0000000..5745b72 --- /dev/null +++ b/dspl/liblapack/SRC/ssyrfs.f @@ -0,0 +1,441 @@ +*> \brief \b SSYRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SSYTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) + REAL TWO + PARAMETER ( TWO = 2.0E+0 ) + REAL THREE + PARAMETER ( THREE = 3.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, SSYMV, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 ) + CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, + $ WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + DO 40 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 40 CONTINUE + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = ABS( X( K, J ) ) + WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + DO 60 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 60 CONTINUE + WORK( K ) = WORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + DO 110 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 120 CONTINUE + CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of SSYRFS +* + END diff --git a/dspl/liblapack/SRC/ssyrfsx.f b/dspl/liblapack/SRC/ssyrfsx.f new file mode 100644 index 0000000..b5dd0b2 --- /dev/null +++ b/dspl/liblapack/SRC/ssyrfsx.f @@ -0,0 +1,700 @@ +*> \brief \b SSYRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYRFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by SGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL ITREF_DEFAULT, ITHRESH_DEFAULT, + $ COMPONENTWISE_DEFAULT + REAL RTHRESH_DEFAULT, DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE, N_NORMS + REAL ANORM, RCOND_TMP + REAL ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + REAL RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYCON, SLA_SYRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL SLAMCH, SLANSY, SLA_SYRCOND + REAL SLAMCH, SLANSY, SLA_SYRCOND + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = REAL( N )*SLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF ( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0 + DO J = 1, NRHS + BERR( J ) = 0.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0 + DO J = 1, NRHS + BERR( J ) = 1.0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = SLANSY( NORM, UPLO, N, A, LDA, WORK ) + CALL SSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ IWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'D' ) + + CALL SLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK( N+1 ), WORK( 1 ), WORK( 2*N+1 ), WORK( 1 ), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0, SQRT( REAL( N ) ) )*SLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = SLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ -1, S, INFO, WORK, IWORK ) + ELSE + RCOND_TMP = SLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ 0, S, INFO, WORK, IWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF (N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF (ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF (N_ERR_BNDS .GE. LA_LINRX_RCOND_I) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( SLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = SLA_SYRCOND( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ 1, X(1,J), INFO, WORK, IWORK ) + ELSE + RCOND_TMP = 0.0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0 + IF ( .NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of SSYRFSX +* + END diff --git a/dspl/liblapack/SRC/ssysv.f b/dspl/liblapack/SRC/ssysv.f new file mode 100644 index 0000000..e27e5fc --- /dev/null +++ b/dspl/liblapack/SRC/ssysv.f @@ -0,0 +1,270 @@ +*> \brief SSYSV computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> SSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by SSYTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> SSYTRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRF, SSYTRS, SSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL SSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL SSYTRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV +* + END diff --git a/dspl/liblapack/SRC/ssysv_aa.f b/dspl/liblapack/SRC/ssysv_aa.f new file mode 100644 index 0000000..abf52b1 --- /dev/null +++ b/dspl/liblapack/SRC/ssysv_aa.f @@ -0,0 +1,253 @@ +*> \brief SSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> SSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for +*> the best performance, LWORK >= MAX(1,N*NB), where NB is +*> the optimal blocksize for SSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRS_AA, SSYTRF_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV_AA +* + END diff --git a/dspl/liblapack/SRC/ssysv_aa_2stage.f b/dspl/liblapack/SRC/ssysv_aa_2stage.f new file mode 100644 index 0000000..a738c74 --- /dev/null +++ b/dspl/liblapack/SRC/ssysv_aa_2stage.f @@ -0,0 +1,279 @@ +*> \brief SSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSV_AA_2STAGE computes the solution to a real system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSYTRF_AA_2STAGE, SSYTRS_AA_2STAGE, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssysv_rk.f b/dspl/liblapack/SRC/ssysv_rk.f new file mode 100644 index 0000000..9503893 --- /dev/null +++ b/dspl/liblapack/SRC/ssysv_rk.f @@ -0,0 +1,317 @@ +*> \brief SSYSV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYSV_RK computes the solution to a real system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> SSYTRF_RK is called to compute the factorization of a real +*> symmetric matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine SSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by SSYTRF_RK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine DSYTRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by SSYTRF_RK. +*> +*> For more info see the description of DSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for DSYTRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup singleSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRF_RK, SSYTRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL SSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = P*U*D*(U**T)*(P**T) or +* A = P*U*D*(U**T)*(P**T). +* + CALL SSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL SSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV_RK +* + END diff --git a/dspl/liblapack/SRC/ssysv_rook.f b/dspl/liblapack/SRC/ssysv_rook.f new file mode 100644 index 0000000..2829f59 --- /dev/null +++ b/dspl/liblapack/SRC/ssysv_rook.f @@ -0,0 +1,293 @@ +*> \brief SSYSV_ROOK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSV_ROOK computes the solution to a real system of linear +*> equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> SSYTRF_ROOK is called to compute the factorization of a real +*> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling SSYTRS_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by SSYTRF_ROOK. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> SSYTRF_ROOK. +*> +*> TRS will be done with Level 2 BLAS +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realSYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRF_ROOK, SSYTRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL SSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL SSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS_ROOK ( Use Level 2 BLAS) +* + CALL SSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSV_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssysvx.f b/dspl/liblapack/SRC/ssysvx.f new file mode 100644 index 0000000..3f09837 --- /dev/null +++ b/dspl/liblapack/SRC/ssysvx.f @@ -0,0 +1,416 @@ +*> \brief SSYSVX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSVX uses the diagonal pivoting factorization to compute the +*> solution to a real system of linear equations A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form of +*> A. AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by SSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by SSYTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,3*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,3*N,N*NB), where +*> NB is the optimal blocksize for SSYTRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ BERR( * ), FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + REAL ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + REAL SLAMCH, SLANSY + EXTERNAL ILAENV, LSAME, SLAMCH, SLANSY +* .. +* .. External Subroutines .. + EXTERNAL SLACPY, SSYCON, SSYRFS, SSYTRF, SSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 3*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = SLANSY( 'I', UPLO, N, A, LDA, WORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL SSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK, + $ INFO ) +* +* Compute the solution vectors X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.SLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYSVX +* + END diff --git a/dspl/liblapack/SRC/ssysvxx.f b/dspl/liblapack/SRC/ssysvxx.f new file mode 100644 index 0000000..4762748 --- /dev/null +++ b/dspl/liblapack/SRC/ssysvxx.f @@ -0,0 +1,699 @@ +*> \brief \b SSYSVXX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* REAL S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSVXX uses the diagonal pivoting factorization to compute the +*> solution to a real system of linear equations A * X = B, where A +*> is an N-by-N symmetric matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. SSYSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> SSYSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> SSYSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what SSYSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is REAL array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by SSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by SSYTRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is REAL array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is REAL +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is REAL array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is REAL array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYsolve +* +* ===================================================================== + SUBROUTINE SSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + REAL RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IWORK( * ) + REAL A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + REAL S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + REAL AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, SLAMCH, SLA_SYRPVGRW + LOGICAL LSAME + REAL SLAMCH, SLA_SYRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL SSYEQUB, SSYTRF, SSYTRS, + $ SLACPY, SLAQSY, XERBLA, SLASCL2, SSYRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = SLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in SSYRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until SSYRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME(UPLO, 'U') .AND. + $ .NOT.LSAME(UPLO, 'L') ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL SSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL SLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL SSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF ( N.GT.0 ) + $ RPVGRW = SLA_SYRPVGRW(UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, WORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF ( N.GT.0 ) + $ RPVGRW = SLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, WORK ) +* +* Compute the solution matrix X. +* + CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL SSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL SSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, IWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL SLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of SSYSVXX +* + END diff --git a/dspl/liblapack/SRC/ssyswapr.f b/dspl/liblapack/SRC/ssyswapr.f new file mode 100644 index 0000000..17ce4db --- /dev/null +++ b/dspl/liblapack/SRC/ssyswapr.f @@ -0,0 +1,193 @@ +*> \brief \b SSYSWAPR applies an elementary permutation on the rows and columns of a symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYSWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYSWAPR applies an elementary permutation on the rows and the columns of +*> a symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYauxiliary +* +* ===================================================================== + SUBROUTINE SSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL SSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=A(I1+I,I2) + A(I1+I,I2)=TMP + END DO +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from I1 to I1-1 + CALL SSWAP( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=A(I2,I1+I) + A(I2,I1+I)=TMP + END DO +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + END SUBROUTINE SSYSWAPR + diff --git a/dspl/liblapack/SRC/ssytd2.f b/dspl/liblapack/SRC/ssytd2.f new file mode 100644 index 0000000..f6d327c --- /dev/null +++ b/dspl/liblapack/SRC/ssytd2.f @@ -0,0 +1,322 @@ +*> \brief \b SSYTD2 reduces a symmetric matrix to real symmetric tridiagonal form by an orthogonal similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal +*> form T by an orthogonal similarity transformation: Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO, HALF + PARAMETER ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + REAL ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SLARFG, SSYMV, SSYR2, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(1:i-1,i+1) +* + CALL SLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI ) + E( I ) = A( I, I+1 ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL SSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL SAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL SSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + A( I, I+1 ) = E( I ) + END IF + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**T +* to annihilate A(i+2:n,i) +* + CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1, + $ TAUI ) + E( I ) = A( I+1, I ) +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL SSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**T * v) * v +* + ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**T - w * v**T +* + CALL SSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + A( I+1, I ) = E( I ) + END IF + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of SSYTD2 +* + END diff --git a/dspl/liblapack/SRC/ssytf2.f b/dspl/liblapack/SRC/ssytf2.f new file mode 100644 index 0000000..9e31cbb --- /dev/null +++ b/dspl/liblapack/SRC/ssytf2.f @@ -0,0 +1,611 @@ +*> \brief \b SSYTF2 computes the factorization of a real symmetric indefinite matrix, using the diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTF2 computes the factorization of a real symmetric matrix A using +*> the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.204 and l.372 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1, + $ ROWMAX, T, WK, WKM1, WKP1 +* .. +* .. External Functions .. + LOGICAL LSAME, SISNAN + INTEGER ISAMAX + EXTERNAL LSAME, ISAMAX, SISNAN +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + ISAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = ISAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL SSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL SSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = ONE / A( K, K ) + CALL SSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. SISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + ISAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + ISAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL SSCAL( N-K, D11, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N +* + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE +* + A( J, K ) = WK + A( J, K+1 ) = WKP1 +* + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of SSYTF2 +* + END diff --git a/dspl/liblapack/SRC/ssytf2_rk.f b/dspl/liblapack/SRC/ssytf2_rk.f new file mode 100644 index 0000000..bf113d1 --- /dev/null +++ b/dspl/liblapack/SRC/ssytf2_rk.f @@ -0,0 +1,943 @@ +*> \brief \b SSYTF2_RK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYTF2_RK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup singleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ ROWMAX, STEMP, T, WK, WKM1, WKP1, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = ZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = ZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ISAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ISAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = ABS( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL SSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL SSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL SSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL SSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL SSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL SSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL SSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = ZERO + A( K-1, K ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = ZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = ZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ISAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ISAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = ABS( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL SSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL SSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL SSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL SSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL SSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL SSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = ZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = ZERO + A( K+1, K ) = ZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of SSYTF2_RK +* + END diff --git a/dspl/liblapack/SRC/ssytf2_rook.f b/dspl/liblapack/SRC/ssytf2_rook.f new file mode 100644 index 0000000..270530d --- /dev/null +++ b/dspl/liblapack/SRC/ssytf2_rook.f @@ -0,0 +1,813 @@ +*> \brief \b SSYTF2_ROOK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTF2_ROOK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + REAL ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, + $ ROWMAX, STEMP, T, WK, WKM1, WKP1, SFMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH + EXTERNAL LSAME, ISAMAX, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, SSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = SLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = ISAMAX( K-1, A( 1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + ISAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = ISAMAX( IMAX-1, A( 1, IMAX ), 1 ) + STEMP = ABS( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL SSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL SSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL SSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL SSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL SSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL SSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + ISAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = ABS( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + ISAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = ABS( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + ISAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + STEMP = ABS( A( ITEMP, IMAX ) ) + IF( STEMP.GT.ROWMAX ) THEN + ROWMAX = STEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( ABS( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL SSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL SSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL SSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( ABS( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / A( K, K ) + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL SSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of SSYTF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssytrd.f b/dspl/liblapack/SRC/ssytrd.f new file mode 100644 index 0000000..ebfaba7 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrd.f @@ -0,0 +1,376 @@ +*> \brief \b SSYTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), D( * ), E( * ), TAU( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by an orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), D( * ), E( * ), TAU( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL SLATRD, SSYR2K, SSYTD2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'SSYTRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'SSYTRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**T - W*V**T +* + CALL SSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ), + $ LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL SLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+ib:n,i+ib:n), using +* an update of the form: A := A - V*W**T - W*V**T +* + CALL SSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL SSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRD +* + END diff --git a/dspl/liblapack/SRC/ssytrd_2stage.f b/dspl/liblapack/SRC/ssytrd_2stage.f new file mode 100644 index 0000000..7ddc022 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b SSYTRD_2STAGE +* +* @generated from zhetrd_2stage.f, fortran z -> s, Sun Nov 6 19:34:06 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* REAL A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_2STAGE reduces a real symmetric matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q1**T Q2**T* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is REAL array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + REAL A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYTRD_SY2SB, SSYTRD_SB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV2STAGE( 1, 'SSYTRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'SSYTRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'SSYTRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'SSYTRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL SSYTRD_SY2SB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) + RETURN + END IF + CALL SSYTRD_SB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssytrd_sb2st.F b/dspl/liblapack/SRC/ssytrd_sb2st.F new file mode 100644 index 0000000..891ec9b --- /dev/null +++ b/dspl/liblapack/SRC/ssytrd_sb2st.F @@ -0,0 +1,556 @@ +*> \brief \b SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_SB2ST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* REAL D( * ), E( * ) +* REAL AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric +*> tridiagonal form T by a orthogonal similarity transformation: +*> Q**T * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the ssytrd_sy2sb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the ssytrd_sy2sb +*> routine has been called to produce AB (e.g., AB is +*> the output of ssytrd_sy2sb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is REAL array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is REAL array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup real16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + REAL D( * ), E( * ) + REAL AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RZERO + REAL ZERO, ONE + PARAMETER ( RZERO = 0.0E+0, + $ ZERO = 0.0E+0, + $ ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SISEV, SIZETAU, LDV, LHMIN, LWMIN +* .. +* .. External Subroutines .. + EXTERNAL SSB2ST_KERNELS, SLACPY, SLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'SSYTRD_SB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'SSYTRD_SB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'SSYTRD_SB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SISEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* real because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = ( AB( ABDPOS, I ) ) + 50 CONTINUE +* + IF( UPPER ) THEN + DO 60 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I+1 ) ) + 60 CONTINUE + ELSE + DO 70 I = 1, N-1 + E( I ) = ( AB( ABOFDPOS, I ) ) + 70 CONTINUE + ENDIF +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Main code start here. +* Reduce the symmetric band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL SLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL SLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) EXIT + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL SSB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + EXIT + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = ( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_SB2ST +* + END + diff --git a/dspl/liblapack/SRC/ssytrd_sy2sb.f b/dspl/liblapack/SRC/ssytrd_sy2sb.f new file mode 100644 index 0000000..c01fe35 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrd_sy2sb.f @@ -0,0 +1,517 @@ +*> \brief \b SSYTRD_SY2SB +* +* @generated from zhetrd_he2hb.f, fortran z -> s, Wed Dec 7 08:22:40 2016 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRD_SY2SB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric +*> band-diagonal form AB by a orthogonal similarity transformation: +*> Q**T * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the orthogonal +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the orthogonal matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LWORK) +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**T +*> +*> where tau is a real scalar, and v is a real vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +*> A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL RONE + REAL ZERO, ONE, HALF + PARAMETER ( RONE = 1.0E+0, + $ ZERO = 0.0E+0, + $ ONE = 1.0E+0, + $ HALF = 0.5E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SSYR2K, SSYMM, SGEMM, SCOPY, + $ SLARFT, SGELQF, SGEQRF, SLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'SSYTRD_SY2SB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRD_SY2SB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL SCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL SCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL SLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL SGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL SLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL SSYR2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL SGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL SLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL SLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL SSYMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL SGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL SGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL SSYR2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL SCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL SCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of SSYTRD_SY2SB +* + END diff --git a/dspl/liblapack/SRC/ssytrf.f b/dspl/liblapack/SRC/ssytrf.f new file mode 100644 index 0000000..2c29475 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrf.f @@ -0,0 +1,363 @@ +*> \brief \b SSYTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRF computes the factorization of a real symmetric matrix A using +*> the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASYF, SSYTF2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'SSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by SLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL SLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK, + $ IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL SSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by SLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL SLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL SSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRF +* + END diff --git a/dspl/liblapack/SRC/ssytrf_aa.f b/dspl/liblapack/SRC/ssytrf_aa.f new file mode 100644 index 0000000..4aaa978 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrf_aa.f @@ -0,0 +1,467 @@ +*> \brief \b SSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRF_AA computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + REAL ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASYF_AA, SGEMV, SSCAL, SCOPY, SSWAP, SGEMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'SSYTRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL SCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by SLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL SLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL SSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL SCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL SSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with SGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL SGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with SGEMM +* + CALL SGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL SCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL SCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by SLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL SLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL SSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL SCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL SSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with SGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL SGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with SGEMM +* + CALL SGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL SCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of SSYTRF_AA +* + END diff --git a/dspl/liblapack/SRC/ssytrf_aa_2stage.f b/dspl/liblapack/SRC/ssytrf_aa_2stage.f new file mode 100644 index 0000000..be6809d --- /dev/null +++ b/dspl/liblapack/SRC/ssytrf_aa_2stage.f @@ -0,0 +1,647 @@ +*> \brief \b SSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRF_AA_2STAGE computes the factorization of a real symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + REAL PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SCOPY, SLACGV, SLACPY, + $ SLASET, SGBTRF, SGEMM, SGETRF, + $ SSYGST, SSWAP, STRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'SSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL SLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL SGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL SGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL SSYGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL SGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call SGETRF +* + DO K = 1, NB + CALL SCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL SGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL SCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL SLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL SLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL STRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL SLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL SSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL SSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL SSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL SSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL SGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. J-1) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL SGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL SLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL SSYGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL SGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL SGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL SGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL SLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL SLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL STRSM( 'R', 'L', 'T', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL SLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL SSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL SSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL SSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL SSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL SLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL SGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of SSYTRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssytrf_rk.f b/dspl/liblapack/SRC/ssytrf_rk.f new file mode 100644 index 0000000..f4221e1 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b SSYTRF_RK computes the factorization of a real symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYTRF_RK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup singleSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASYF_RK, SSYTF2_RK, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'SSYTRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'SSYTRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by SLASYF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL SLASYF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL SSYTF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL SSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by SLASYF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL SLASYF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL SSYTF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL SSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRF_RK +* + END diff --git a/dspl/liblapack/SRC/ssytrf_rook.f b/dspl/liblapack/SRC/ssytrf_rook.f new file mode 100644 index 0000000..534a48e --- /dev/null +++ b/dspl/liblapack/SRC/ssytrf_rook.f @@ -0,0 +1,393 @@ +*> \brief \b SSYTRF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRF_ROOK computes the factorization of a real symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realSYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SLASYF_ROOK, SSYTF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'SSYTRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'SSYTRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by SLASYF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL SLASYF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL SSYTF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by SLASYF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL SLASYF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL SSYTF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of SSYTRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssytri.f b/dspl/liblapack/SRC/ssytri.f new file mode 100644 index 0000000..31aa2dd --- /dev/null +++ b/dspl/liblapack/SRC/ssytri.f @@ -0,0 +1,382 @@ +*> \brief \b SSYTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRI computes the inverse of a real symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> SSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + REAL AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP, SSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ SDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL SCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ SDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ SDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL SCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ SDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of SSYTRI +* + END diff --git a/dspl/liblapack/SRC/ssytri2.f b/dspl/liblapack/SRC/ssytri2.f new file mode 100644 index 0000000..97b5390 --- /dev/null +++ b/dspl/liblapack/SRC/ssytri2.f @@ -0,0 +1,205 @@ +*> \brief \b SSYTRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRI2 computes the inverse of a REAL symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> SSYTRF. SSYTRI2 sets the LEADING DIMENSION of the workspace +*> before calling SSYTRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LDWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LDWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SSYTRI, SSYTRI2X, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL SSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL SSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of SSYTRI2 +* + END diff --git a/dspl/liblapack/SRC/ssytri2x.f b/dspl/liblapack/SRC/ssytri2x.f new file mode 100644 index 0000000..9aa567f --- /dev/null +++ b/dspl/liblapack/SRC/ssytri2x.f @@ -0,0 +1,590 @@ +*> \brief \b SSYTRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRI2X computes the inverse of a real symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> SSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + REAL AK, AKKP1, AKP1, D, T + REAL U01_I_J, U01_IP1_J + REAL U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSYCONV, XERBLA, STRTRI + EXTERNAL SGEMM, STRMM, SSYSWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL SSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K+1,1) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = -AKKP1 / D + K=K+2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**T*invD1*U11->U11 +* + CALL STRMM('L','U','T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**T*invD*U01->A(CUT+I,CUT+J) +* + CALL SGEMM('T','N',NNB,NNB,CUT,ONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) +* +* U11 = U11**T*invD1*U11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T*invD0*U01 +* + CALL STRMM('L',UPLO,'T','U',CUT, NNB, + $ ONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL SSYSWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / A( K, K ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K-1,1) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = -AKKP1 / D + K=K-2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GT. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**T*invD1*L11->L11 +* + CALL STRMM('L',UPLO,'T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) + +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**T*invD2*L21->A(CUT+I,CUT+J) +* + CALL SGEMM('T','N',NNB,NNB,N-NNB-CUT,ONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**T*invD1*L11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T*invD2*L21 +* + CALL STRMM('L',UPLO,'T','U', N-NNB-CUT, NNB, + $ ONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) +* +* Update L21 +* + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + + ELSE +* +* L11 = L11**T*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of SSYTRI2X +* + END + diff --git a/dspl/liblapack/SRC/ssytri_3.f b/dspl/liblapack/SRC/ssytri_3.f new file mode 100644 index 0000000..a7d8fe7 --- /dev/null +++ b/dspl/liblapack/SRC/ssytri_3.f @@ -0,0 +1,248 @@ +*> \brief \b SSYTRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYTRI_3 computes the inverse of a real symmetric indefinite +*> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> SSYTRI_3 sets the leading dimension of the workspace before calling +*> SSYTRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by SSYTRF_RK and SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_RK or SSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SSYTRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'SSYTRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of SSYTRI_3 +* + END diff --git a/dspl/liblapack/SRC/ssytri_3x.f b/dspl/liblapack/SRC/ssytri_3x.f new file mode 100644 index 0000000..a2b011f --- /dev/null +++ b/dspl/liblapack/SRC/ssytri_3x.f @@ -0,0 +1,645 @@ +*> \brief \b SSYTRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYTRI_3X computes the inverse of a real symmetric indefinite +*> matrix A using the factorization computed by SSYTRF_RK or SSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by SYTRF_RK and SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_RK or SSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + REAL AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, + $ U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SSYSWAPR, STRTRI, STRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. +* + CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / A( K, K ) + WORK( K, INVD+1 ) = ZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K+1, 1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-ONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = WORK( K, INVD+1 ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = ONE + DO J = 1, I-1 + WORK( U11+I, J ) = ZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**T * invD1 * U11 -> U11 +* + CALL STRMM( 'L', 'U', 'T', 'U', NNB, NNB, + $ ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**T * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL SGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 ) + +* +* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T * invD0 * U01 +* + CALL STRMM( 'L', UPLO, 'T', 'U', CUT, NNB, + $ ONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**T) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. +* + CALL STRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / A( K, K ) + WORK( K, INVD+1 ) = ZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K-1, 1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-ONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**T) = (inv(L))**T +* +* inv(L**T) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = ONE + DO J = I+1, NNB + WORK( U11+I, J ) = ZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**T * invD1 * L11 -> L11 +* + CALL STRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL SGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ ZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T * invD2 * L21 +* + CALL STRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**T * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**T) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL SSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of SSYTRI_3X +* + END + diff --git a/dspl/liblapack/SRC/ssytri_rook.f b/dspl/liblapack/SRC/ssytri_rook.f new file mode 100644 index 0000000..01e4ea1 --- /dev/null +++ b/dspl/liblapack/SRC/ssytri_rook.f @@ -0,0 +1,450 @@ +*> \brief \b SSYTRI_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRI_ROOK computes the inverse of a real symmetric +*> matrix A using the factorization A = U*D*U**T or A = L*D*L**T +*> computed by SSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by SSYTRF_ROOK. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + REAL AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT + EXTERNAL LSAME, SDOT +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SSWAP, SSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ SDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL SCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ SDOT( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k+1,1:k+1) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1)in the leading submatrix A(1:k+1,1:k+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ SDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL SCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ SDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k-1:n,k-1:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of SSYTRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/ssytrs.f b/dspl/liblapack/SRC/ssytrs.f new file mode 100644 index 0000000..167851a --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs.f @@ -0,0 +1,445 @@ +*> \brief \b SSYTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by SSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL SGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL SGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of SSYTRS +* + END diff --git a/dspl/liblapack/SRC/ssytrs2.f b/dspl/liblapack/SRC/ssytrs2.f new file mode 100644 index 0000000..0a1870b --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs2.f @@ -0,0 +1,361 @@ +*> \brief \b SSYTRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS2 solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by SSYTRF and converted by SSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF. +*> Note that A is input / output. This might be counter-intuitive, +*> and one may think that A is input only. A is input / output. This +*> is because, at the start of the subroutine, we permute A in a +*> "better" form and then we permute A back to its original form at +*> the end. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSYCONV, SSWAP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL SSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL STRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + CALL SSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL STRSM('L','U','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL STRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + CALL SSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL STRSM('L','L','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL SSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of SSYTRS2 +* + END diff --git a/dspl/liblapack/SRC/ssytrs_3.f b/dspl/liblapack/SRC/ssytrs_3.f new file mode 100644 index 0000000..4e98819 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs_3.f @@ -0,0 +1,371 @@ +*> \brief \b SSYTRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> SSYTRS_3 solves a system of linear equations A * X = B with a real +*> symmetric matrix A using the factorization computed +*> by SSYTRF_RK or SSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by SSYTRF_RK and SSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_RK or SSYTRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup singleSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ==================================================================== + SUBROUTINE SSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, SSWAP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL STRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + CALL SSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL STRSM( 'L', 'U', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL STRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + CALL SSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL STRSM('L', 'L', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of SSYTRS_3 +* + END diff --git a/dspl/liblapack/SRC/ssytrs_aa.f b/dspl/liblapack/SRC/ssytrs_aa.f new file mode 100644 index 0000000..b05c9f7 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs_aa.f @@ -0,0 +1,294 @@ +*> \brief \b SSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS_AA solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by SSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Details of factors computed by SSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by SSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGTSV, SSWAP, SLACPY, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + K = 1 + DO WHILE ( K.LE.N ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL STRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL SLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL SLACPY( 'F', 1, N-1, A(1, 2), LDA+1, WORK(1), 1) + CALL SLACPY( 'F', 1, N-1, A(1, 2), LDA+1, WORK(2*N), 1) + END IF + CALL SGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL STRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B(2, 1), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + K = N + DO WHILE ( K.GE.1 ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + K = 1 + DO WHILE ( K.LE.N ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL STRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1), LDA, + $ B(2, 1), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL SLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL SLACPY( 'F', 1, N-1, A(2, 1), LDA+1, WORK(1), 1) + CALL SLACPY( 'F', 1, N-1, A(2, 1), LDA+1, WORK(2*N), 1) + END IF + CALL SGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL STRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + K = N + DO WHILE ( K.GE.1 ) + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + END DO +* + END IF +* + RETURN +* +* End of SSYTRS_AA +* + END diff --git a/dspl/liblapack/SRC/ssytrs_aa_2stage.f b/dspl/liblapack/SRC/ssytrs_aa_2stage.f new file mode 100644 index 0000000..c9c7181 --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b SSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* REAL A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by SSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> Details of factors computed by SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is REAL array, dimension (LTB) +*> Details of factors computed by SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> SSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE SSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + REAL A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGBTRS, SLASWP, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL STRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL SGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL STRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL STRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL SGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL STRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL SLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of SSYTRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/ssytrs_rook.f b/dspl/liblapack/SRC/ssytrs_rook.f new file mode 100644 index 0000000..ba2428b --- /dev/null +++ b/dspl/liblapack/SRC/ssytrs_rook.f @@ -0,0 +1,484 @@ +*> \brief \b SSYTRS_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download SSYTRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE SSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SSYTRS_ROOK solves a system of linear equations A*X = B with +*> a real symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by SSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by SSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> April 2012, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE SSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + REAL AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SGER, SSCAL, SSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'SSYTRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL SGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.GT.2 ) THEN + CALL SGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), + $ LDB, B( 1, 1 ), LDB ) + CALL SGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) + $ CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of SSYTRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/stbcon.f b/dspl/liblapack/SRC/stbcon.f new file mode 100644 index 0000000..e11c895 --- /dev/null +++ b/dspl/liblapack/SRC/stbcon.f @@ -0,0 +1,284 @@ +*> \brief \b STBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, KD, LDAB, N +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STBCON estimates the reciprocal of the condition number of a +*> triangular band matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTB + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTB +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATBS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL SLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB, + $ LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STBCON +* + END diff --git a/dspl/liblapack/SRC/stbrfs.f b/dspl/liblapack/SRC/stbrfs.f new file mode 100644 index 0000000..85cd312 --- /dev/null +++ b/dspl/liblapack/SRC/stbrfs.f @@ -0,0 +1,485 @@ +*> \brief \b STBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AB( LDAB, * ), B( LDB, * ), BERR( * ), +* $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STBRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular band +*> coefficient matrix. +*> +*> The solution matrix X must be computed by STBTRS or some other +*> means before entering this routine. STBRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AB( LDAB, * ), B( LDB, * ), BERR( * ), + $ FERR( * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, STBMV, STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ), + $ 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + WORK( I ) = WORK( I ) + + $ ABS( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + ABS( AB( KD+1+I-K, K ) )* + $ ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL STBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, + $ WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STBRFS +* + END diff --git a/dspl/liblapack/SRC/stbtrs.f b/dspl/liblapack/SRC/stbtrs.f new file mode 100644 index 0000000..4955b7c --- /dev/null +++ b/dspl/liblapack/SRC/stbtrs.f @@ -0,0 +1,244 @@ +*> \brief \b STBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STBTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular band matrix of order N, and B is an +*> N-by NRHS matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is REAL array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STBSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B or A**T * X = B. +* + DO 30 J = 1, NRHS + CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of STBTRS +* + END diff --git a/dspl/liblapack/SRC/stfsm.f b/dspl/liblapack/SRC/stfsm.f new file mode 100644 index 0000000..a631cc4 --- /dev/null +++ b/dspl/liblapack/SRC/stfsm.f @@ -0,0 +1,1005 @@ +*> \brief \b STFSM solves a matrix equation (one operand is a triangular matrix in RFP format). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STFSM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO +* INTEGER LDB, M, N +* REAL ALPHA +* .. +* .. Array Arguments .. +* REAL A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for A in RFP Format. +*> +*> STFSM solves the matrix equation +*> +*> op( A )*X = alpha*B or X*op( A ) = alpha*B +*> +*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or +*> non-unit, upper or lower triangular matrix and op( A ) is one of +*> +*> op( A ) = A or op( A ) = A**T. +*> +*> A is in Rectangular Full Packed (RFP) Format. +*> +*> The matrix X is overwritten on B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'T': The Transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether op( A ) appears on the left +*> or right of X as follows: +*> +*> SIDE = 'L' or 'l' op( A )*X = alpha*B. +*> +*> SIDE = 'R' or 'r' X*op( A ) = alpha*B. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> UPLO = 'U' or 'u' RFP A came from an upper triangular matrix +*> UPLO = 'L' or 'l' RFP A came from a lower triangular matrix +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the form of op( A ) to be used +*> in the matrix multiplication as follows: +*> +*> TRANS = 'N' or 'n' op( A ) = A. +*> +*> TRANS = 'T' or 't' op( A ) = A'. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not RFP A is unit +*> triangular as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of B. M must be at +*> least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of B. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is REAL +*> On entry, ALPHA specifies the scalar alpha. When alpha is +*> zero then A is not referenced and B need not be set before +*> entry. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (NT) +*> NT = N*(N+1)/2. On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'T' then RFP is the transpose of RFP A as +*> defined when TRANSR = 'N'. The contents of RFP A are defined +*> by UPLO as follows: If UPLO = 'U' the RFP A contains the NT +*> elements of upper packed A either in normal or +*> transpose Format. If UPLO = 'L' the RFP A contains +*> the NT elements of lower packed A either in normal or +*> transpose Format. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and is N when is odd. +*> See the Note below for more details. Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> Before entry, the leading m by n part of the array B must +*> contain the right-hand side matrix B, and on exit is +*> overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE STFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, + $ B, LDB ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO + INTEGER LDB, M, N + REAL ALPHA +* .. +* .. Array Arguments .. + REAL A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LSIDE, MISODD, NISODD, NORMALTRANSR, + $ NOTRANS + INTEGER M1, M2, N1, N2, K, INFO, I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LSIDE = LSAME( SIDE, 'L' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSIDE .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -4 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STFSM ', -INFO ) + RETURN + END IF +* +* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* +* Quick return when ALPHA.EQ.(0D+0) +* + IF( ALPHA.EQ.ZERO ) THEN + DO 20 J = 0, N - 1 + DO 10 I = 0, M - 1 + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* + IF( LSIDE ) THEN +* +* SIDE = 'L' +* +* A is M-by-M. +* If M is odd, set NISODD = .TRUE., and M1 and M2. +* If M is even, NISODD = .FALSE., and M. +* + IF( MOD( M, 2 ).EQ.0 ) THEN + MISODD = .FALSE. + K = M / 2 + ELSE + MISODD = .TRUE. + IF( LOWER ) THEN + M2 = M / 2 + M1 = M - M2 + ELSE + M1 = M / 2 + M2 = M - M1 + END IF + END IF +* + IF( MISODD ) THEN +* +* SIDE = 'L' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL STRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A, M, B, LDB ) + ELSE + CALL STRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + CALL SGEMM( 'N', 'N', M2, N, M1, -ONE, A( M1 ), + $ M, B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL STRSM( 'L', 'U', 'T', DIAG, M2, N, ONE, + $ A( M ), M, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'T' +* + IF( M.EQ.1 ) THEN + CALL STRSM( 'L', 'L', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + ELSE + CALL STRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M ), M, B( M1, 0 ), LDB ) + CALL SGEMM( 'T', 'N', M1, N, M2, -ONE, A( M1 ), + $ M, B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'L', 'T', DIAG, M1, N, ONE, + $ A( 0 ), M, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL STRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( M2 ), M, B, LDB ) + CALL SGEMM( 'T', 'N', M2, N, M1, -ONE, A( 0 ), M, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL STRSM( 'L', 'U', 'T', DIAG, M2, N, ONE, + $ A( M1 ), M, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'T' +* + CALL STRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M1 ), M, B( M1, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M1, N, M2, -ONE, A( 0 ), M, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'L', 'T', DIAG, M1, N, ONE, + $ A( M2 ), M, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL STRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL STRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + CALL SGEMM( 'T', 'N', M2, N, M1, -ONE, + $ A( M1*M1 ), M1, B, LDB, ALPHA, + $ B( M1, 0 ), LDB ) + CALL STRSM( 'L', 'L', 'N', DIAG, M2, N, ONE, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'T' +* + IF( M.EQ.1 ) THEN + CALL STRSM( 'L', 'U', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL STRSM( 'L', 'L', 'T', DIAG, M2, N, ALPHA, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M1, N, M2, -ONE, + $ A( M1*M1 ), M1, B( M1, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL STRSM( 'L', 'U', 'N', DIAG, M1, N, ONE, + $ A( 0 ), M1, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'N' +* + CALL STRSM( 'L', 'U', 'T', DIAG, M1, N, ALPHA, + $ A( M2*M2 ), M2, B, LDB ) + CALL SGEMM( 'N', 'N', M2, N, M1, -ONE, A( 0 ), M2, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL STRSM( 'L', 'L', 'N', DIAG, M2, N, ONE, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'T' +* + CALL STRSM( 'L', 'L', 'T', DIAG, M2, N, ALPHA, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) + CALL SGEMM( 'T', 'N', M1, N, M2, -ONE, A( 0 ), M2, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'U', 'N', DIAG, M1, N, ONE, + $ A( M2*M2 ), M2, B, LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL STRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( 1 ), M+1, B, LDB ) + CALL SGEMM( 'N', 'N', K, N, K, -ONE, A( K+1 ), + $ M+1, B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL STRSM( 'L', 'U', 'T', DIAG, K, N, ONE, + $ A( 0 ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'T' +* + CALL STRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( 0 ), M+1, B( K, 0 ), LDB ) + CALL SGEMM( 'T', 'N', K, N, K, -ONE, A( K+1 ), + $ M+1, B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'L', 'T', DIAG, K, N, ONE, + $ A( 1 ), M+1, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL STRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( K+1 ), M+1, B, LDB ) + CALL SGEMM( 'T', 'N', K, N, K, -ONE, A( 0 ), M+1, + $ B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL STRSM( 'L', 'U', 'T', DIAG, K, N, ONE, + $ A( K ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'T' + CALL STRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( K ), M+1, B( K, 0 ), LDB ) + CALL SGEMM( 'N', 'N', K, N, K, -ONE, A( 0 ), M+1, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'L', 'T', DIAG, K, N, ONE, + $ A( K+1 ), M+1, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'N' +* + CALL STRSM( 'L', 'U', 'T', DIAG, K, N, ALPHA, + $ A( K ), K, B, LDB ) + CALL SGEMM( 'T', 'N', K, N, K, -ONE, + $ A( K*( K+1 ) ), K, B, LDB, ALPHA, + $ B( K, 0 ), LDB ) + CALL STRSM( 'L', 'L', 'N', DIAG, K, N, ONE, + $ A( 0 ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'T' +* + CALL STRSM( 'L', 'L', 'T', DIAG, K, N, ALPHA, + $ A( 0 ), K, B( K, 0 ), LDB ) + CALL SGEMM( 'N', 'N', K, N, K, -ONE, + $ A( K*( K+1 ) ), K, B( K, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL STRSM( 'L', 'U', 'N', DIAG, K, N, ONE, + $ A( K ), K, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'N' +* + CALL STRSM( 'L', 'U', 'T', DIAG, K, N, ALPHA, + $ A( K*( K+1 ) ), K, B, LDB ) + CALL SGEMM( 'N', 'N', K, N, K, -ONE, A( 0 ), K, B, + $ LDB, ALPHA, B( K, 0 ), LDB ) + CALL STRSM( 'L', 'L', 'N', DIAG, K, N, ONE, + $ A( K*K ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'T' +* + CALL STRSM( 'L', 'L', 'T', DIAG, K, N, ALPHA, + $ A( K*K ), K, B( K, 0 ), LDB ) + CALL SGEMM( 'T', 'N', K, N, K, -ONE, A( 0 ), K, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL STRSM( 'L', 'U', 'N', DIAG, K, N, ONE, + $ A( K*( K+1 ) ), K, B, LDB ) +* + END IF +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' +* +* A is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and K. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + K = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* SIDE = 'R' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + CALL STRSM( 'R', 'U', 'T', DIAG, M, N2, ALPHA, + $ A( N ), N, B( 0, N1 ), LDB ) + CALL SGEMM( 'N', 'N', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, 0 ), + $ LDB ) + CALL STRSM( 'R', 'L', 'N', DIAG, M, N1, ONE, + $ A( 0 ), N, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'T' +* + CALL STRSM( 'R', 'L', 'T', DIAG, M, N1, ALPHA, + $ A( 0 ), N, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'T', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL STRSM( 'R', 'U', 'N', DIAG, M, N2, ONE, + $ A( N ), N, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL STRSM( 'R', 'L', 'T', DIAG, M, N1, ALPHA, + $ A( N2 ), N, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL STRSM( 'R', 'U', 'N', DIAG, M, N2, ONE, + $ A( N1 ), N, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'T' +* + CALL STRSM( 'R', 'U', 'T', DIAG, M, N2, ALPHA, + $ A( N1 ), N, B( 0, N1 ), LDB ) + CALL SGEMM( 'N', 'T', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, 0 ), LDB ) + CALL STRSM( 'R', 'L', 'N', DIAG, M, N1, ONE, + $ A( N2 ), N, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is odd, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'N' +* + CALL STRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( 1 ), N1, B( 0, N1 ), LDB ) + CALL SGEMM( 'N', 'T', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL STRSM( 'R', 'U', 'T', DIAG, M, N1, ONE, + $ A( 0 ), N1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and +* TRANS = 'T' +* + CALL STRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( 0 ), N1, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, N1 ), + $ LDB ) + CALL STRSM( 'R', 'L', 'T', DIAG, M, N2, ONE, + $ A( 1 ), N1, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'N' +* + CALL STRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'T', M, N2, N1, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, N1 ), + $ LDB ) + CALL STRSM( 'R', 'L', 'T', DIAG, M, N2, ONE, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and +* TRANS = 'T' +* + CALL STRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) + CALL SGEMM( 'N', 'N', M, N1, N2, -ONE, B( 0, N1 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, 0 ), + $ LDB ) + CALL STRSM( 'R', 'U', 'T', DIAG, M, N1, ONE, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL STRSM( 'R', 'U', 'T', DIAG, M, K, ALPHA, + $ A( 0 ), N+1, B( 0, K ), LDB ) + CALL SGEMM( 'N', 'N', M, K, K, -ONE, B( 0, K ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL STRSM( 'R', 'L', 'N', DIAG, M, K, ONE, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'T' +* + CALL STRSM( 'R', 'L', 'T', DIAG, M, K, ALPHA, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'T', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL STRSM( 'R', 'U', 'N', DIAG, M, K, ONE, + $ A( 0 ), N+1, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL STRSM( 'R', 'L', 'T', DIAG, M, K, ALPHA, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL STRSM( 'R', 'U', 'N', DIAG, M, K, ONE, + $ A( K ), N+1, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'T' +* + CALL STRSM( 'R', 'U', 'T', DIAG, M, K, ALPHA, + $ A( K ), N+1, B( 0, K ), LDB ) + CALL SGEMM( 'N', 'T', M, K, K, -ONE, B( 0, K ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL STRSM( 'R', 'L', 'N', DIAG, M, K, ONE, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is even, and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'N' +* + CALL STRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( 0 ), K, B( 0, K ), LDB ) + CALL SGEMM( 'N', 'T', M, K, K, -ONE, B( 0, K ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, 0 ), LDB ) + CALL STRSM( 'R', 'U', 'T', DIAG, M, K, ONE, + $ A( K ), K, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', +* and TRANS = 'T' +* + CALL STRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( K ), K, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'N', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, K ), LDB ) + CALL STRSM( 'R', 'L', 'T', DIAG, M, K, ONE, + $ A( 0 ), K, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'N' +* + CALL STRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) + CALL SGEMM( 'N', 'T', M, K, K, -ONE, B( 0, 0 ), + $ LDB, A( 0 ), K, ALPHA, B( 0, K ), LDB ) + CALL STRSM( 'R', 'L', 'T', DIAG, M, K, ONE, + $ A( K*K ), K, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', +* and TRANS = 'T' +* + CALL STRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( K*K ), K, B( 0, K ), LDB ) + CALL SGEMM( 'N', 'N', M, K, K, -ONE, B( 0, K ), + $ LDB, A( 0 ), K, ALPHA, B( 0, 0 ), LDB ) + CALL STRSM( 'R', 'U', 'T', DIAG, M, K, ONE, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + END IF + END IF +* + RETURN +* +* End of STFSM +* + END diff --git a/dspl/liblapack/SRC/stftri.f b/dspl/liblapack/SRC/stftri.f new file mode 100644 index 0000000..fa3099d --- /dev/null +++ b/dspl/liblapack/SRC/stftri.f @@ -0,0 +1,472 @@ +*> \brief \b STFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO, DIAG +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STFTRI computes the inverse of a triangular matrix A stored in RFP +*> format. +*> +*> This is a Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'T': The Transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (NT); +*> NT=N*(N+1)/2. On entry, the triangular factor of a Hermitian +*> Positive Definite matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is +*> the transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A; If UPLO = 'L' the RFP A contains the nt +*> elements of lower packed A. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and N is odd. See the Note below for more details. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO, DIAG + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, STRMM, STRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL STRTRI( 'L', DIAG, N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'L', 'N', DIAG, N2, N1, -ONE, A( 0 ), + $ N, A( N1 ), N ) + CALL STRTRI( 'U', DIAG, N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'U', 'T', DIAG, N2, N1, ONE, A( N ), N, + $ A( N1 ), N ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL STRTRI( 'L', DIAG, N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'L', 'T', DIAG, N1, N2, -ONE, A( N2 ), + $ N, A( 0 ), N ) + CALL STRTRI( 'U', DIAG, N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'U', 'N', DIAG, N1, N2, ONE, A( N1 ), + $ N, A( 0 ), N ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) +* + CALL STRTRI( 'U', DIAG, N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'U', 'N', DIAG, N1, N2, -ONE, A( 0 ), + $ N1, A( N1*N1 ), N1 ) + CALL STRTRI( 'L', DIAG, N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'L', 'T', DIAG, N1, N2, ONE, A( 1 ), + $ N1, A( N1*N1 ), N1 ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) +* + CALL STRTRI( 'U', DIAG, N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'U', 'T', DIAG, N2, N1, -ONE, + $ A( N2*N2 ), N2, A( 0 ), N2 ) + CALL STRTRI( 'L', DIAG, N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'L', 'N', DIAG, N2, N1, ONE, + $ A( N1*N2 ), N2, A( 0 ), N2 ) + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL STRTRI( 'L', DIAG, K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'L', 'N', DIAG, K, K, -ONE, A( 1 ), + $ N+1, A( K+1 ), N+1 ) + CALL STRTRI( 'U', DIAG, K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'U', 'T', DIAG, K, K, ONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL STRTRI( 'L', DIAG, K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'L', 'T', DIAG, K, K, -ONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL STRTRI( 'U', DIAG, K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'U', 'N', DIAG, K, K, ONE, A( K ), N+1, + $ A( 0 ), N+1 ) + END IF + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL STRTRI( 'U', DIAG, K, A( K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'U', 'N', DIAG, K, K, -ONE, A( K ), K, + $ A( K*( K+1 ) ), K ) + CALL STRTRI( 'L', DIAG, K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'L', 'T', DIAG, K, K, ONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL STRTRI( 'U', DIAG, K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'R', 'U', 'T', DIAG, K, K, -ONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL STRTRI( 'L', DIAG, K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL STRMM( 'L', 'L', 'N', DIAG, K, K, ONE, A( K*K ), K, + $ A( 0 ), K ) + END IF + END IF + END IF +* + RETURN +* +* End of STFTRI +* + END diff --git a/dspl/liblapack/SRC/stfttp.f b/dspl/liblapack/SRC/stfttp.f new file mode 100644 index 0000000..52e581d --- /dev/null +++ b/dspl/liblapack/SRC/stfttp.f @@ -0,0 +1,517 @@ +*> \brief \b STFTTP copies a triangular matrix from the rectangular full packed format (TF) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STFTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STFTTP copies a triangular matrix A from rectangular full packed +*> format (TF) to standard packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'T': ARF is in Transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is REAL array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is REAL array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STFTTP', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + AP( 0 ) = ARF( 0 ) + ELSE + AP( 0 ) = ARF( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of STFTTP +* + END diff --git a/dspl/liblapack/SRC/stfttr.f b/dspl/liblapack/SRC/stfttr.f new file mode 100644 index 0000000..c7cf066 --- /dev/null +++ b/dspl/liblapack/SRC/stfttr.f @@ -0,0 +1,495 @@ +*> \brief \b STFTTR copies a triangular matrix from the rectangular full packed format (TF) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STFTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* REAL A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STFTTR copies a triangular matrix A from rectangular full packed +*> format (TF) to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'T': ARF is in Transpose format. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices ARF and A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is REAL array, dimension (N*(N+1)/2). +*> On entry, the upper (if UPLO = 'U') or lower (if UPLO = 'L') +*> matrix A in RFP format. See the "Notes" below for more +*> details. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On exit, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE STFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + REAL A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT, NX2, NP1X2 + INTEGER I, J, L, IJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STFTTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + A( 0, 0 ) = ARF( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + A( N2+J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + A( J-N1, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + A( I, N1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + A( N2+J, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + A( K+J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + A( J-K, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + J = K + DO I = K, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + A( I, K+1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + A( J, I ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + A( K+1+J, L ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* Note that here, on exit of the loop, J = K-1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of STFTTR +* + END diff --git a/dspl/liblapack/SRC/stgevc.f b/dspl/liblapack/SRC/stgevc.f new file mode 100644 index 0000000..03d4b71 --- /dev/null +++ b/dspl/liblapack/SRC/stgevc.f @@ -0,0 +1,1211 @@ +*> \brief \b STGEVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGEVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, +* LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL P( LDP, * ), S( LDS, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGEVC computes some or all of the right and/or left eigenvectors of +*> a pair of real matrices (S,P), where S is a quasi-triangular matrix +*> and P is upper triangular. Matrix pairs of this type are produced by +*> the generalized Schur factorization of a matrix pair (A,B): +*> +*> A = Q*S*Z**T, B = Q*P*Z**T +*> +*> as computed by SGGHRD + SHGEQZ. +*> +*> The right eigenvector x and the left eigenvector y of (S,P) +*> corresponding to an eigenvalue w are defined by: +*> +*> S*x = w*P*x, (y**H)*S = w*(y**H)*P, +*> +*> where y**H denotes the conjugate tranpose of y. +*> The eigenvalues are not input to this routine, but are computed +*> directly from the diagonal blocks of S and P. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of (S,P), or the products Z*X and/or Q*Y, +*> where Z and Q are input matrices. +*> If Q and Z are the orthogonal factors from the generalized Schur +*> factorization of a matrix pair (A,B), then Z*X and Q*Y +*> are the matrices of right and left eigenvectors of (A,B). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> specified by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY='S', SELECT specifies the eigenvectors to be +*> computed. If w(j) is a real eigenvalue, the corresponding +*> real eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector +*> is computed if either SELECT(j) or SELECT(j+1) is .TRUE., +*> and on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is +*> set to .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices S and P. N >= 0. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is REAL array, dimension (LDS,N) +*> The upper quasi-triangular matrix S from a generalized Schur +*> factorization, as computed by SHGEQZ. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of array S. LDS >= max(1,N). +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is REAL array, dimension (LDP,N) +*> The upper triangular matrix P from a generalized Schur +*> factorization, as computed by SHGEQZ. +*> 2-by-2 diagonal blocks of P corresponding to 2-by-2 blocks +*> of S must be in positive diagonal form. +*> \endverbatim +*> +*> \param[in] LDP +*> \verbatim +*> LDP is INTEGER +*> The leading dimension of array P. LDP >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of left Schur vectors returned by SHGEQZ). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VL, in the same order as their eigenvalues. +*> +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Z (usually the orthogonal matrix Z +*> of right Schur vectors returned by SHGEQZ). +*> +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); +*> if HOWMNY = 'B' or 'b', the matrix Z*X; +*> if HOWMNY = 'S' or 's', the right eigenvectors of (S,P) +*> specified by SELECT, stored consecutively in the +*> columns of VR, in the same order as their +*> eigenvalues. +*> +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected real eigenvector occupies one +*> column and each selected complex eigenvector occupies two +*> columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (6*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the 2-by-2 block (INFO:INFO+1) does not have a complex +*> eigenvalue. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Allocation of workspace: +*> ---------- -- --------- +*> +*> WORK( j ) = 1-norm of j-th column of A, above the diagonal +*> WORK( N+j ) = 1-norm of j-th column of B, above the diagonal +*> WORK( 2*N+1:3*N ) = real part of eigenvector +*> WORK( 3*N+1:4*N ) = imaginary part of eigenvector +*> WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector +*> WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector +*> +*> Rowwise vs. columnwise solution methods: +*> ------- -- ---------- -------- ------- +*> +*> Finding a generalized eigenvector consists basically of solving the +*> singular triangular system +*> +*> (A - w B) x = 0 (for right) or: (A - w B)**H y = 0 (for left) +*> +*> Consider finding the i-th right eigenvector (assume all eigenvalues +*> are real). The equation to be solved is: +*> n i +*> 0 = sum C(j,k) v(k) = sum C(j,k) v(k) for j = i,. . .,1 +*> k=j k=j +*> +*> where C = (A - w B) (The components v(i+1:n) are 0.) +*> +*> The "rowwise" method is: +*> +*> (1) v(i) := 1 +*> for j = i-1,. . .,1: +*> i +*> (2) compute s = - sum C(j,k) v(k) and +*> k=j+1 +*> +*> (3) v(j) := s / C(j,j) +*> +*> Step 2 is sometimes called the "dot product" step, since it is an +*> inner product between the j-th row and the portion of the eigenvector +*> that has been computed so far. +*> +*> The "columnwise" method consists basically in doing the sums +*> for all the rows in parallel. As each v(j) is computed, the +*> contribution of v(j) times the j-th column of C is added to the +*> partial sums. Since FORTRAN arrays are stored columnwise, this has +*> the advantage that at each step, the elements of C that are accessed +*> are adjacent to one another, whereas with the rowwise method, the +*> elements accessed at a step are spaced LDS (and LDP) words apart. +*> +*> When finding left eigenvectors, the matrix in question is the +*> transpose of the one in storage, so the rowwise method then +*> actually accesses columns of A and B at each step, and so is the +*> preferred method. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, + $ LDVL, VR, LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL P( LDP, * ), S( LDS, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, SAFETY + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, + $ SAFETY = 1.0E+2 ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK, + $ ILBBAD, ILCOMP, ILCPLX, LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE, + $ J, JA, JC, JE, JR, JW, NA, NW + REAL ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI, + $ BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A, + $ CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA, + $ CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE, + $ SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX, + $ XSCALE +* .. +* .. Local Arrays .. + REAL BDIAG( 2 ), SUM( 2, 2 ), SUMS( 2, 2 ), + $ SUMP( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLABAD, SLACPY, SLAG2, SLALN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + ILALL = .TRUE. + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDS.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDP.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors to be computed +* + IF( .NOT.ILALL ) THEN + IM = 0 + ILCPLX = .FALSE. + DO 10 J = 1, N + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 10 + END IF + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) + $ ILCPLX = .TRUE. + END IF + IF( ILCPLX ) THEN + IF( SELECT( J ) .OR. SELECT( J+1 ) ) + $ IM = IM + 2 + ELSE + IF( SELECT( J ) ) + $ IM = IM + 1 + END IF + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check 2-by-2 diagonal blocks of A, B +* + ILABAD = .FALSE. + ILBBAD = .FALSE. + DO 20 J = 1, N - 1 + IF( S( J+1, J ).NE.ZERO ) THEN + IF( P( J, J ).EQ.ZERO .OR. P( J+1, J+1 ).EQ.ZERO .OR. + $ P( J, J+1 ).NE.ZERO )ILBBAD = .TRUE. + IF( J.LT.N-1 ) THEN + IF( S( J+2, J+1 ).NE.ZERO ) + $ ILABAD = .TRUE. + END IF + END IF + 20 CONTINUE +* + IF( ILABAD ) THEN + INFO = -5 + ELSE IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = SLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL SLABAD( SAFMIN, BIG ) + ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part (i.e., excluding all elements belonging to the diagonal +* blocks) of A and B to check for possible overflow in the +* triangular solver. +* + ANORM = ABS( S( 1, 1 ) ) + IF( N.GT.1 ) + $ ANORM = ANORM + ABS( S( 2, 1 ) ) + BNORM = ABS( P( 1, 1 ) ) + WORK( 1 ) = ZERO + WORK( N+1 ) = ZERO +* + DO 50 J = 2, N + TEMP = ZERO + TEMP2 = ZERO + IF( S( J, J-1 ).EQ.ZERO ) THEN + IEND = J - 1 + ELSE + IEND = J - 2 + END IF + DO 30 I = 1, IEND + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 30 CONTINUE + WORK( J ) = TEMP + WORK( N+J ) = TEMP2 + DO 40 I = IEND + 1, MIN( J+1, N ) + TEMP = TEMP + ABS( S( I, J ) ) + TEMP2 = TEMP2 + ABS( P( I, J ) ) + 40 CONTINUE + ANORM = MAX( ANORM, TEMP ) + BNORM = MAX( BNORM, TEMP2 ) + 50 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 220 JE = 1, N +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at. +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 220 + END IF + NW = 1 + IF( JE.LT.N ) THEN + IF( S( JE+1, JE ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 220 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + IEIG = IEIG + 1 + DO 60 JR = 1, N + VL( JR, IEIG ) = ZERO + 60 CONTINUE + VL( IEIG, IEIG ) = ONE + GO TO 220 + END IF + END IF +* +* Clear vector +* + DO 70 JR = 1, NW*N + WORK( 2*N+JR ) = ZERO + 70 CONTINUE +* T +* Compute coefficients in ( a A - b B ) y = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( S( JE, JE ), LDS, P( JE, JE ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + BCOEFI = -BCOEFI + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* + TEMP = ACOEF*S( JE+1, JE ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE+1 ) = -TEMP2R / TEMP + WORK( 3*N+JE+1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE+1 ) = ONE + WORK( 3*N+JE+1 ) = ZERO + TEMP = ACOEF*S( JE, JE+1 ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE+1, JE+1 )-ACOEF* + $ S( JE+1, JE+1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE+1, JE+1 ) / TEMP + END IF + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) ) + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* T +* Triangular solve of (a A - b B) y = 0 +* +* T +* (rowwise in (a A - b B) , or columnwise in (a A - b B) ) +* + IL2BY2 = .FALSE. +* + DO 160 J = JE + NW, N + IF( IL2BY2 ) THEN + IL2BY2 = .FALSE. + GO TO 160 + END IF +* + NA = 1 + BDIAG( 1 ) = P( J, J ) + IF( J.LT.N ) THEN + IF( S( J+1, J ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + BDIAG( 2 ) = P( J+1, J+1 ) + NA = 2 + END IF + END IF +* +* Check whether scaling is necessary for dot products +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = MAX( WORK( J ), WORK( N+J ), + $ ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ), + $ ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN + DO 90 JW = 0, NW - 1 + DO 80 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 80 CONTINUE + 90 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute dot products +* +* j-1 +* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) +* k=je +* +* To reduce the op count, this is done as +* +* _ j-1 _ j-1 +* a*conjg( sum S(k,j)*x(k) ) - b*conjg( sum P(k,j)*x(k) ) +* k=je k=je +* +* which may cause underflow problems if A or B are close +* to underflow. (E.g., less than SMALL.) +* +* + DO 120 JW = 1, NW + DO 110 JA = 1, NA + SUMS( JA, JW ) = ZERO + SUMP( JA, JW ) = ZERO +* + DO 100 JR = JE, J - 1 + SUMS( JA, JW ) = SUMS( JA, JW ) + + $ S( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + SUMP( JA, JW ) = SUMP( JA, JW ) + + $ P( JR, J+JA-1 )* + $ WORK( ( JW+1 )*N+JR ) + 100 CONTINUE + 110 CONTINUE + 120 CONTINUE +* + DO 130 JA = 1, NA + IF( ILCPLX ) THEN + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) - + $ BCOEFI*SUMP( JA, 2 ) + SUM( JA, 2 ) = -ACOEF*SUMS( JA, 2 ) + + $ BCOEFR*SUMP( JA, 2 ) + + $ BCOEFI*SUMP( JA, 1 ) + ELSE + SUM( JA, 1 ) = -ACOEF*SUMS( JA, 1 ) + + $ BCOEFR*SUMP( JA, 1 ) + END IF + 130 CONTINUE +* +* T +* Solve ( a A - b B ) y = SUM(,) +* with scaling and perturbation of the denominator +* + CALL SLALN2( .TRUE., NA, NW, DMIN, ACOEF, S( J, J ), LDS, + $ BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR, + $ BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN + DO 150 JW = 0, NW - 1 + DO 140 JR = JE, J - 1 + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 140 CONTINUE + 150 CONTINUE + XMAX = SCALE*XMAX + END IF + XMAX = MAX( XMAX, TEMP ) + 160 CONTINUE +* +* Copy eigenvector to VL, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG + 1 + IF( ILBACK ) THEN + DO 170 JW = 0, NW - 1 + CALL SGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL, + $ WORK( ( JW+2 )*N+JE ), 1, ZERO, + $ WORK( ( JW+4 )*N+1 ), 1 ) + 170 CONTINUE + CALL SLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ), + $ LDVL ) + IBEG = 1 + ELSE + CALL SLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ), + $ LDVL ) + IBEG = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 180 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+ + $ ABS( VL( J, IEIG+1 ) ) ) + 180 CONTINUE + ELSE + DO 190 J = IBEG, N + XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) ) + 190 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX +* + DO 210 JW = 0, NW - 1 + DO 200 JR = IBEG, N + VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW ) + 200 CONTINUE + 210 CONTINUE + END IF + IEIG = IEIG + NW - 1 +* + 220 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + ILCPLX = .FALSE. + DO 500 JE = N, 1, -1 +* +* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or +* (b) this would be the second of a complex pair. +* Check for complex eigenvalue, so as to be sure of which +* entry(-ies) of SELECT to look at -- if complex, SELECT(JE) +* or SELECT(JE-1). +* If this is a complex pair, the 2-by-2 diagonal block +* corresponding to the eigenvalue is in rows/columns JE-1:JE +* + IF( ILCPLX ) THEN + ILCPLX = .FALSE. + GO TO 500 + END IF + NW = 1 + IF( JE.GT.1 ) THEN + IF( S( JE, JE-1 ).NE.ZERO ) THEN + ILCPLX = .TRUE. + NW = 2 + END IF + END IF + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE IF( ILCPLX ) THEN + ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 ) + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( .NOT.ILCOMP ) + $ GO TO 500 +* +* Decide if (a) singular pencil, (b) real eigenvalue, or +* (c) complex eigenvalue. +* + IF( .NOT.ILCPLX ) THEN + IF( ABS( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( P( JE, JE ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- unit eigenvector +* + IEIG = IEIG - 1 + DO 230 JR = 1, N + VR( JR, IEIG ) = ZERO + 230 CONTINUE + VR( IEIG, IEIG ) = ONE + GO TO 500 + END IF + END IF +* +* Clear vector +* + DO 250 JW = 0, NW - 1 + DO 240 JR = 1, N + WORK( ( JW+2 )*N+JR ) = ZERO + 240 CONTINUE + 250 CONTINUE +* +* Compute coefficients in ( a A - b B ) x = 0 +* a is ACOEF +* b is BCOEFR + i*BCOEFI +* + IF( .NOT.ILCPLX ) THEN +* +* Real eigenvalue +* + TEMP = ONE / MAX( ABS( S( JE, JE ) )*ASCALE, + $ ABS( P( JE, JE ) )*BSCALE, SAFMIN ) + SALFAR = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*P( JE, JE ) )*BSCALE + ACOEF = SBETA*ASCALE + BCOEFR = SALFAR*BSCALE + BCOEFI = ZERO +* +* Scale to avoid underflow +* + SCALE = ONE + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL + LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT. + $ SMALL + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEF ), + $ ABS( BCOEFR ) ) ) ) + IF( LSA ) THEN + ACOEF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEF = SCALE*ACOEF + END IF + IF( LSB ) THEN + BCOEFR = BSCALE*( SCALE*SALFAR ) + ELSE + BCOEFR = SCALE*BCOEFR + END IF + END IF + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) +* +* First component is 1 +* + WORK( 2*N+JE ) = ONE + XMAX = ONE +* +* Compute contribution from column JE of A and B to sum +* (See "Further Details", above.) +* + DO 260 JR = 1, JE - 1 + WORK( 2*N+JR ) = BCOEFR*P( JR, JE ) - + $ ACOEF*S( JR, JE ) + 260 CONTINUE + ELSE +* +* Complex eigenvalue +* + CALL SLAG2( S( JE-1, JE-1 ), LDS, P( JE-1, JE-1 ), LDP, + $ SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2, + $ BCOEFI ) + IF( BCOEFI.EQ.ZERO ) THEN + INFO = JE - 1 + RETURN + END IF +* +* Scale to avoid over/underflow +* + ACOEFA = ABS( ACOEF ) + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + SCALE = ONE + IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN ) + $ SCALE = ( SAFMIN / ULP ) / ACOEFA + IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN ) + $ SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA ) + IF( SAFMIN*ACOEFA.GT.ASCALE ) + $ SCALE = ASCALE / ( SAFMIN*ACOEFA ) + IF( SAFMIN*BCOEFA.GT.BSCALE ) + $ SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) ) + IF( SCALE.NE.ONE ) THEN + ACOEF = SCALE*ACOEF + ACOEFA = ABS( ACOEF ) + BCOEFR = SCALE*BCOEFR + BCOEFI = SCALE*BCOEFI + BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI ) + END IF +* +* Compute first two components of eigenvector +* and contribution to sums +* + TEMP = ACOEF*S( JE, JE-1 ) + TEMP2R = ACOEF*S( JE, JE ) - BCOEFR*P( JE, JE ) + TEMP2I = -BCOEFI*P( JE, JE ) + IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN + WORK( 2*N+JE ) = ONE + WORK( 3*N+JE ) = ZERO + WORK( 2*N+JE-1 ) = -TEMP2R / TEMP + WORK( 3*N+JE-1 ) = -TEMP2I / TEMP + ELSE + WORK( 2*N+JE-1 ) = ONE + WORK( 3*N+JE-1 ) = ZERO + TEMP = ACOEF*S( JE-1, JE ) + WORK( 2*N+JE ) = ( BCOEFR*P( JE-1, JE-1 )-ACOEF* + $ S( JE-1, JE-1 ) ) / TEMP + WORK( 3*N+JE ) = BCOEFI*P( JE-1, JE-1 ) / TEMP + END IF +* + XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ), + $ ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) ) +* +* Compute contribution from columns JE and JE-1 +* of A and B to the sums. +* + CREALA = ACOEF*WORK( 2*N+JE-1 ) + CIMAGA = ACOEF*WORK( 3*N+JE-1 ) + CREALB = BCOEFR*WORK( 2*N+JE-1 ) - + $ BCOEFI*WORK( 3*N+JE-1 ) + CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) + + $ BCOEFR*WORK( 3*N+JE-1 ) + CRE2A = ACOEF*WORK( 2*N+JE ) + CIM2A = ACOEF*WORK( 3*N+JE ) + CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE ) + CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE ) + DO 270 JR = 1, JE - 2 + WORK( 2*N+JR ) = -CREALA*S( JR, JE-1 ) + + $ CREALB*P( JR, JE-1 ) - + $ CRE2A*S( JR, JE ) + CRE2B*P( JR, JE ) + WORK( 3*N+JR ) = -CIMAGA*S( JR, JE-1 ) + + $ CIMAGB*P( JR, JE-1 ) - + $ CIM2A*S( JR, JE ) + CIM2B*P( JR, JE ) + 270 CONTINUE + END IF +* + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Columnwise triangular solve of (a A - b B) x = 0 +* + IL2BY2 = .FALSE. + DO 370 J = JE - NW, 1, -1 +* +* If a 2-by-2 block, is in position j-1:j, wait until +* next iteration to process it (when it will be j:j+1) +* + IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN + IF( S( J, J-1 ).NE.ZERO ) THEN + IL2BY2 = .TRUE. + GO TO 370 + END IF + END IF + BDIAG( 1 ) = P( J, J ) + IF( IL2BY2 ) THEN + NA = 2 + BDIAG( 2 ) = P( J+1, J+1 ) + ELSE + NA = 1 + END IF +* +* Compute x(j) (and x(j+1), if 2-by-2 block) +* + CALL SLALN2( .FALSE., NA, NW, DMIN, ACOEF, S( J, J ), + $ LDS, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ), + $ N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP, + $ IINFO ) + IF( SCALE.LT.ONE ) THEN +* + DO 290 JW = 0, NW - 1 + DO 280 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = SCALE* + $ WORK( ( JW+2 )*N+JR ) + 280 CONTINUE + 290 CONTINUE + END IF + XMAX = MAX( SCALE*XMAX, TEMP ) +* + DO 310 JW = 1, NW + DO 300 JA = 1, NA + WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW ) + 300 CONTINUE + 310 CONTINUE +* +* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling +* + IF( J.GT.1 ) THEN +* +* Check whether scaling is necessary for sum. +* + XSCALE = ONE / MAX( ONE, XMAX ) + TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J ) + IF( IL2BY2 ) + $ TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA* + $ WORK( N+J+1 ) ) + TEMP = MAX( TEMP, ACOEFA, BCOEFA ) + IF( TEMP.GT.BIGNUM*XSCALE ) THEN +* + DO 330 JW = 0, NW - 1 + DO 320 JR = 1, JE + WORK( ( JW+2 )*N+JR ) = XSCALE* + $ WORK( ( JW+2 )*N+JR ) + 320 CONTINUE + 330 CONTINUE + XMAX = XMAX*XSCALE + END IF +* +* Compute the contributions of the off-diagonals of +* column j (and j+1, if 2-by-2 block) of A and B to the +* sums. +* +* + DO 360 JA = 1, NA + IF( ILCPLX ) THEN + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CIMAGA = ACOEF*WORK( 3*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) - + $ BCOEFI*WORK( 3*N+J+JA-1 ) + CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) + + $ BCOEFR*WORK( 3*N+J+JA-1 ) + DO 340 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + WORK( 3*N+JR ) = WORK( 3*N+JR ) - + $ CIMAGA*S( JR, J+JA-1 ) + + $ CIMAGB*P( JR, J+JA-1 ) + 340 CONTINUE + ELSE + CREALA = ACOEF*WORK( 2*N+J+JA-1 ) + CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) + DO 350 JR = 1, J - 1 + WORK( 2*N+JR ) = WORK( 2*N+JR ) - + $ CREALA*S( JR, J+JA-1 ) + + $ CREALB*P( JR, J+JA-1 ) + 350 CONTINUE + END IF + 360 CONTINUE + END IF +* + IL2BY2 = .FALSE. + 370 CONTINUE +* +* Copy eigenvector to VR, back transforming if +* HOWMNY='B'. +* + IEIG = IEIG - NW + IF( ILBACK ) THEN +* + DO 410 JW = 0, NW - 1 + DO 380 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )* + $ VR( JR, 1 ) + 380 CONTINUE +* +* A series of compiler directives to defeat +* vectorization for the next loop +* +* + DO 400 JC = 2, JE + DO 390 JR = 1, N + WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) + + $ WORK( ( JW+2 )*N+JC )*VR( JR, JC ) + 390 CONTINUE + 400 CONTINUE + 410 CONTINUE +* + DO 430 JW = 0, NW - 1 + DO 420 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR ) + 420 CONTINUE + 430 CONTINUE +* + IEND = N + ELSE + DO 450 JW = 0, NW - 1 + DO 440 JR = 1, N + VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR ) + 440 CONTINUE + 450 CONTINUE +* + IEND = JE + END IF +* +* Scale eigenvector +* + XMAX = ZERO + IF( ILCPLX ) THEN + DO 460 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+ + $ ABS( VR( J, IEIG+1 ) ) ) + 460 CONTINUE + ELSE + DO 470 J = 1, IEND + XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) ) + 470 CONTINUE + END IF +* + IF( XMAX.GT.SAFMIN ) THEN + XSCALE = ONE / XMAX + DO 490 JW = 0, NW - 1 + DO 480 JR = 1, IEND + VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW ) + 480 CONTINUE + 490 CONTINUE + END IF + 500 CONTINUE + END IF +* + RETURN +* +* End of STGEVC +* + END diff --git a/dspl/liblapack/SRC/stgex2.f b/dspl/liblapack/SRC/stgex2.f new file mode 100644 index 0000000..1ae9563 --- /dev/null +++ b/dspl/liblapack/SRC/stgex2.f @@ -0,0 +1,697 @@ +*> \brief \b STGEX2 swaps adjacent diagonal blocks in an upper (quasi) triangular matrix pair by an orthogonal equivalence transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGEX2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22) +*> of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair +*> (A, B) by an orthogonal equivalence transformation. +*> +*> (A, B) must be in generalized real Schur canonical form (as returned +*> by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +*> diagonal blocks. B is upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T +*> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +*> On exit, the updated matrix Q. +*> Not referenced if WANTQ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> On entry, if WANTZ =.TRUE., the orthogonal matrix Z. +*> On exit, the updated matrix Z. +*> Not referenced if WANTZ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index to the first block (A11, B11). 1 <= J1 <= N. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> The order of the first block (A11, B11). N1 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[in] N2 +*> \verbatim +*> N2 is INTEGER +*> The order of the second block (A22, B22). N2 = 0, 1 or 2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)). +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit +*> >0: If INFO = 1, the transformed matrix (A, B) would be +*> too far from generalized Schur form; the blocks are +*> not swapped and (A, B) and (Q, Z) are unchanged. +*> The problem of swapping is too ill-conditioned. +*> <0: If INFO = -16: LWORK is too small. Appropriate value +*> for LWORK is returned in WORK(1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEauxiliary +* +*> \par Further Details: +* ===================== +*> +*> In the current code both weak and strong stability tests are +*> performed. The user can omit the strong stability test by changing +*> the internal logical parameter WANDS to .FALSE.. See ref. [2] for +*> details. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, N1, N2, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2 +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to SCOPY by calls to SLASET, or by DO +* loops. Sven Hammarling, 1/5/02. +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + REAL TWENTY + PARAMETER ( TWENTY = 2.0E+01 ) + INTEGER LDST + PARAMETER ( LDST = 4 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL STRONG, WEAK + INTEGER I, IDUM, LINFO, M + REAL BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS, + $ F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS +* .. +* .. Local Arrays .. + INTEGER IWORK( LDST ) + REAL AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ), + $ IRCOP( LDST, LDST ), LI( LDST, LDST ), + $ LICOP( LDST, LDST ), S( LDST, LDST ), + $ SCPY( LDST, LDST ), T( LDST, LDST ), + $ TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST ) +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SGEQR2, SGERQ2, SLACPY, SLAGV2, SLARTG, + $ SLASET, SLASSQ, SORG2R, SORGR2, SORM2R, SORMR2, + $ SROT, SSCAL, STGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 ) + $ RETURN + IF( N1.GT.N .OR. ( J1+N1 ).GT.N ) + $ RETURN + M = N1 + N2 + IF( LWORK.LT.MAX( N*M, M*M*2 ) ) THEN + INFO = -16 + WORK( 1 ) = MAX( N*M, M*M*2 ) + RETURN + END IF +* + WEAK = .FALSE. + STRONG = .FALSE. +* +* Make a local copy of selected block +* + CALL SLASET( 'Full', LDST, LDST, ZERO, ZERO, LI, LDST ) + CALL SLASET( 'Full', LDST, LDST, ZERO, ZERO, IR, LDST ) + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute threshold for testing acceptance of swapping. +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + DSCALE = ZERO + DSUM = ONE + CALL SLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + CALL SLACPY( 'Full', M, M, T, LDST, WORK, M ) + CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM ) + DNORM = DSCALE*SQRT( DSUM ) +* +* THRES has been changed from +* THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* to +* THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* on 04/01/10. +* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by +* Jim Demmel and Guillaume Revy. See forum post 1783. +* + THRESH = MAX( TWENTY*EPS*DNORM, SMLNUM ) +* + IF( M.EQ.2 ) THEN +* +* CASE 1: Swap 1-by-1 and 1-by-1 blocks. +* +* Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SB = ABS( T( 2, 2 ) ) + SA = ABS( S( 2, 2 ) ) + CALL SLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM ) + IR( 2, 1 ) = -IR( 1, 2 ) + IR( 2, 2 ) = IR( 1, 1 ) + CALL SROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( SA.GE.SB ) THEN + CALL SLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + ELSE + CALL SLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ), + $ DDUM ) + END IF + CALL SROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + CALL SROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ), + $ LI( 2, 1 ) ) + LI( 2, 2 ) = LI( 1, 1 ) + LI( 1, 2 ) = -LI( 2, 1 ) +* +* Weak stability test: +* |S21| + |T21| <= O(EPS * F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 70 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL**T*S*QR, B-QL**T*T*QR)) <= O(EPS*F-norm((A, B))) +* + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + STRONG = SS.LE.THRESH + IF( .NOT.STRONG ) + $ GO TO 70 + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + CALL SROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + CALL SROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, + $ LI( 1, 1 ), LI( 2, 1 ) ) + CALL SROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, + $ LI( 1, 1 ), LI( 2, 1 ) ) +* +* Set N1-by-N2 (2,1) - blocks to ZERO. +* + A( J1+1, J1 ) = ZERO + B( J1+1, J1 ) = ZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL SROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ), + $ IR( 2, 1 ) ) + IF( WANTQ ) + $ CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ), + $ LI( 2, 1 ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + ELSE +* +* CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2 +* and 2-by-2 blocks. +* +* Solve the generalized Sylvester equation +* S11 * R - L * S22 = SCALE * S12 +* T11 * R - L * T22 = SCALE * T12 +* for R and L. Solutions in LI and IR. +* + CALL SLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST ) + CALL SLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST ) + CALL STGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST, + $ IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ), + $ LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM, + $ LINFO ) +* +* Compute orthogonal matrix QL: +* +* QL**T * LI = [ TL ] +* [ 0 ] +* where +* LI = [ -L ] +* [ SCALE * identity(N2) ] +* + DO 10 I = 1, N2 + CALL SSCAL( N1, -ONE, LI( 1, I ), 1 ) + LI( N1+I, I ) = SCALE + 10 CONTINUE + CALL SGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute orthogonal matrix RQ: +* +* IR * RQ**T = [ 0 TR], +* +* where IR = [ SCALE * identity(N1), R ] +* + DO 20 I = 1, N1 + IR( N2+I, I ) = SCALE + 20 CONTINUE + CALL SGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Perform the swapping tentatively: +* + CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S, + $ LDST ) + CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T, + $ LDST ) + CALL SLACPY( 'F', M, M, S, LDST, SCPY, LDST ) + CALL SLACPY( 'F', M, M, T, LDST, TCPY, LDST ) + CALL SLACPY( 'F', M, M, IR, LDST, IRCOP, LDST ) + CALL SLACPY( 'F', M, M, LI, LDST, LICOP, LDST ) +* +* Triangularize the B-part by an RQ factorization. +* Apply transformation (from left) to A-part, giving S. +* + CALL SGERQ2( M, M, T, LDST, TAUR, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK, + $ LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BRQA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 30 I = 1, N2 + CALL SLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM ) + 30 CONTINUE + BRQA21 = DSCALE*SQRT( DSUM ) +* +* Triangularize the B-part by a QR factorization. +* Apply transformation (from right) to A-part, giving S. +* + CALL SGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 + CALL SORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST, + $ WORK, INFO ) + CALL SORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST, + $ WORK, INFO ) + IF( LINFO.NE.0 ) + $ GO TO 70 +* +* Compute F-norm(S21) in BQRA21. (T21 is 0.) +* + DSCALE = ZERO + DSUM = ONE + DO 40 I = 1, N2 + CALL SLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM ) + 40 CONTINUE + BQRA21 = DSCALE*SQRT( DSUM ) +* +* Decide which method to use. +* Weak stability test: +* F-norm(S21) <= O(EPS * F-norm((S, T))) +* + IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN + CALL SLACPY( 'F', M, M, SCPY, LDST, S, LDST ) + CALL SLACPY( 'F', M, M, TCPY, LDST, T, LDST ) + CALL SLACPY( 'F', M, M, IRCOP, LDST, IR, LDST ) + CALL SLACPY( 'F', M, M, LICOP, LDST, LI, LDST ) + ELSE IF( BRQA21.GE.THRESH ) THEN + GO TO 70 + END IF +* +* Set lower triangle of B-part to zero +* + CALL SLASET( 'Lower', M-1, M-1, ZERO, ZERO, T(2,1), LDST ) +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL*S*QR**T, B-QL*T*QR**T)) <= O(EPS*F-norm((A,B))) +* + CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + DSCALE = ZERO + DSUM = ONE + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) +* + CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ), + $ M ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE, + $ WORK( M*M+1 ), M ) + CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM ) + SS = DSCALE*SQRT( DSUM ) + STRONG = ( SS.LE.THRESH ) + IF( .NOT.STRONG ) + $ GO TO 70 +* + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* transformations and set N1-by-N2 (2,1)-block to zero. +* + CALL SLASET( 'Full', N1, N2, ZERO, ZERO, S(N2+1,1), LDST ) +* +* copy back M-by-M diagonal block starting at index J1 of (A, B) +* + CALL SLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA ) + CALL SLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB ) + CALL SLASET( 'Full', LDST, LDST, ZERO, ZERO, T, LDST ) +* +* Standardize existing 2-by-2 blocks. +* + CALL SLASET( 'Full', M, M, ZERO, ZERO, WORK, M ) + WORK( 1 ) = ONE + T( 1, 1 ) = ONE + IDUM = LWORK - M*M - 2 + IF( N2.GT.1 ) THEN + CALL SLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE, + $ WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) ) + WORK( M+1 ) = -WORK( 2 ) + WORK( M+2 ) = WORK( 1 ) + T( N2, N2 ) = T( 1, 1 ) + T( 1, 2 ) = -T( 2, 1 ) + END IF + WORK( M*M ) = ONE + T( M, M ) = ONE +* + IF( N1.GT.1 ) THEN + CALL SLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB, + $ TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ), + $ WORK( N2*M+N2+2 ), T( N2+1, N2+1 ), + $ T( M, M-1 ) ) + WORK( M*M ) = WORK( N2*M+N2+1 ) + WORK( M*M-1 ) = -WORK( N2*M+N2+2 ) + T( M, M ) = T( N2+1, N2+1 ) + T( M-1, M ) = -T( M, M-1 ) + END IF + CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ), + $ LDA, ZERO, WORK( M*M+1 ), N2 ) + CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ), + $ LDA ) + CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ), + $ LDB, ZERO, WORK( M*M+1 ), N2 ) + CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ), + $ LDB ) + CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO, + $ WORK( M*M+1 ), M ) + CALL SLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST ) + CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL SLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA ) + CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDB, + $ T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 ) + CALL SLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB ) + CALL SGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO, + $ WORK, M ) + CALL SLACPY( 'Full', M, M, WORK, M, IR, LDST ) +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTQ ) THEN + CALL SGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI, + $ LDST, ZERO, WORK, N ) + CALL SLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ ) +* + END IF +* + IF( WANTZ ) THEN + CALL SGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR, + $ LDST, ZERO, WORK, N ) + CALL SLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ ) +* + END IF +* +* Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and +* (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)). +* + I = J1 + M + IF( I.LE.N ) THEN + CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ A( J1, I ), LDA, ZERO, WORK, M ) + CALL SLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA ) + CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST, + $ B( J1, I ), LDB, ZERO, WORK, M ) + CALL SLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDB ) + END IF + I = J1 - 1 + IF( I.GT.0 ) THEN + CALL SGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR, + $ LDST, ZERO, WORK, I ) + CALL SLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA ) + CALL SGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR, + $ LDST, ZERO, WORK, I ) + CALL SLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB ) + END IF +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* + END IF +* +* Exit with INFO = 1 if swap was rejected. +* + 70 CONTINUE +* + INFO = 1 + RETURN +* +* End of STGEX2 +* + END diff --git a/dspl/liblapack/SRC/stgexc.f b/dspl/liblapack/SRC/stgexc.f new file mode 100644 index 0000000..840036d --- /dev/null +++ b/dspl/liblapack/SRC/stgexc.f @@ -0,0 +1,544 @@ +*> \brief \b STGEXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGEXC reorders the generalized real Schur decomposition of a real +*> matrix pair (A,B) using an orthogonal equivalence transformation +*> +*> (A, B) = Q * (A, B) * Z**T, +*> +*> so that the diagonal block of (A, B) with row index IFST is moved +*> to row ILST. +*> +*> (A, B) must be in generalized real Schur canonical form (as returned +*> by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2 +*> diagonal blocks. B is upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**T = Q(out) * A(out) * Z(out)**T +*> Q(in) * B(in) * Z(in)**T = Q(out) * B(out) * Z(out)**T +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the matrix A in generalized real Schur canonical +*> form. +*> On exit, the updated matrix A, again in generalized +*> real Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the matrix B in generalized real Schur canonical +*> form (A,B). +*> On exit, the updated matrix B, again in generalized +*> real Schur canonical form (A,B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the orthogonal matrix Q. +*> On exit, the updated matrix Q. +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., the orthogonal matrix Z. +*> On exit, the updated matrix Z. +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in,out] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> Specify the reordering of the diagonal blocks of (A, B). +*> The block with row index IFST is moved to row ILST, by a +*> sequence of swapping between adjacent blocks. +*> On exit, if IFST pointed on entry to the second row of +*> a 2-by-2 block, it is changed to point to the first row; +*> ILST always points to the first row of the block in its +*> final position (which may differ from its input value by +*> +1 or -1). 1 <= IFST, ILST <= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= 1 when N <= 1, otherwise LWORK >= 4*N + 16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit. +*> <0: if INFO = -i, the i-th argument had an illegal value. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. (A, B) may have been partially reordered, +*> and ILST points to the first row of the current +*> position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realGEcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER HERE, LWMIN, NBF, NBL, NBNEXT +* .. +* .. External Subroutines .. + EXTERNAL STGEX2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + ELSE + LWMIN = 4*N + 16 + END IF + WORK(1) = LWMIN +* + IF (LWORK.LT.LWMIN .AND. .NOT.LQUERY) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGEXC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of the specified block and find out +* if it is 1-by-1 or 2-by-2. +* + IF( IFST.GT.1 ) THEN + IF( A( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( A( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out if it is 1-by-1 or 2-by-2. +* + IF( ILST.GT.1 ) THEN + IF( A( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( A( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST. +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( A( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 +* + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2-by-2 block did split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + END IF +* + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 + ELSE + HERE = IFST +* + 20 CONTINUE +* +* Swap with next one below. +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1-by-1 or 2-by-2. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2-by-2 block breaks into two 1-by-1 blocks. +* + IF( NBF.EQ.2 ) THEN + IF( A( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1-by-1 blocks, each of which +* must be swapped individually. +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( A( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK, + $ INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1-by-1 blocks. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case of 2-by-2 split. +* + IF( A( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2-by-2 block did not split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2-by-2 block did split. +* + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE + WORK( 1 ) = LWMIN + RETURN +* +* End of STGEXC +* + END diff --git a/dspl/liblapack/SRC/stgsen.f b/dspl/liblapack/SRC/stgsen.f new file mode 100644 index 0000000..5e63a96 --- /dev/null +++ b/dspl/liblapack/SRC/stgsen.f @@ -0,0 +1,865 @@ +*> \brief \b STGSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, +* ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, +* PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, +* $ M, N +* REAL PL, PR +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), +* $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGSEN reorders the generalized real Schur decomposition of a real +*> matrix pair (A, B) (in terms of an orthonormal equivalence trans- +*> formation Q**T * (A, B) * Z), so that a selected cluster of eigenvalues +*> appears in the leading diagonal blocks of the upper quasi-triangular +*> matrix A and the upper triangular B. The leading columns of Q and +*> Z form orthonormal bases of the corresponding left and right eigen- +*> spaces (deflating subspaces). (A, B) must be in generalized real +*> Schur canonical form (as returned by SGGES), i.e. A is block upper +*> triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper +*> triangular. +*> +*> STGSEN also computes the generalized eigenvalues +*> +*> w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j) +*> +*> of the reordered matrix pair (A, B). +*> +*> Optionally, STGSEN computes the estimates of reciprocal condition +*> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +*> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +*> between the matrix pairs (A11, B11) and (A22,B22) that correspond to +*> the selected cluster and the eigenvalues outside the cluster, resp., +*> and norms of "projections" onto left and right eigenspaces w.r.t. +*> the selected cluster in the (1,1)-block. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (PL and PR) or the deflating subspaces +*> (Difu and Difl): +*> =0: Only reorder w.r.t. SELECT. No extras. +*> =1: Reciprocal of norms of "projections" onto left and right +*> eigenspaces w.r.t. the selected cluster (PL and PR). +*> =2: Upper bounds on Difu and Difl. F-norm-based estimate +*> (DIF(1:2)). +*> =3: Estimate of Difu and Difl. 1-norm-based estimate +*> (DIF(1:2)). +*> About 5 times as expensive as IJOB = 2. +*> =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +*> version to get it all. +*> =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +*> \endverbatim +*> +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. +*> To select a real eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. To select a complex conjugate pair of eigenvalues +*> w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; a complex conjugate pair of eigenvalues must be +*> either both included in the cluster or both excluded. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension(LDA,N) +*> On entry, the upper quasi-triangular matrix A, with (A, B) in +*> generalized real Schur canonical form. +*> On exit, A is overwritten by the reordered matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension(LDB,N) +*> On entry, the upper triangular matrix B, with (A, B) in +*> generalized real Schur canonical form. +*> On exit, B is overwritten by the reordered matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHAR +*> \verbatim +*> ALPHAR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] ALPHAI +*> \verbatim +*> ALPHAI is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will +*> be the generalized eigenvalues. ALPHAR(j) + ALPHAI(j)*i +*> and BETA(j),j=1,...,N are the diagonals of the complex Schur +*> form (S,T) that would result if the 2-by-2 diagonal blocks of +*> the real generalized Schur form of (A,B) were further reduced +*> to triangular form using complex unitary transformations. +*> If ALPHAI(j) is zero, then the j-th eigenvalue is real; if +*> positive, then the j-th and (j+1)-st eigenvalues are a +*> complex conjugate pair, with ALPHAI(j+1) negative. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +*> On exit, Q has been postmultiplied by the left orthogonal +*> transformation matrix which reorder (A, B); The leading M +*> columns of Q form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> and if WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is REAL array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +*> On exit, Z has been postmultiplied by the left orthogonal +*> transformation matrix which reorder (A, B); The leading M +*> columns of Z form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified pair of left and right eigen- +*> spaces (deflating subspaces). 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] PL +*> \verbatim +*> PL is REAL +*> \endverbatim +*> +*> \param[out] PR +*> \verbatim +*> PR is REAL +*> +*> If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +*> reciprocal of the norm of "projections" onto left and right +*> eigenspaces with respect to the selected cluster. +*> 0 < PL, PR <= 1. +*> If M = 0 or M = N, PL = PR = 1. +*> If IJOB = 0, 2 or 3, PL and PR are not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL array, dimension (2). +*> If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +*> If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +*> Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +*> estimates of Difu and Difl. +*> If M = 0 or N, DIF(1:2) = F-norm([A, B]). +*> If IJOB = 0 or 1, DIF is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 4*N+16. +*> If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)). +*> If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= 1. +*> If IJOB = 1, 2 or 4, LIWORK >= N+6. +*> If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> =1: Reordering of (A, B) failed because the transformed +*> matrix pair (A, B) would be too far from generalized +*> Schur form; the problem is very ill-conditioned. +*> (A, B) may have been partially reordered. +*> If requested, 0 is returned in DIF(*), PL and PR. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> STGSEN first collects the selected eigenvalues by computing +*> orthogonal U and W that move them to the top left corner of (A, B). +*> In other words, the selected eigenvalues are the eigenvalues of +*> (A11, B11) in: +*> +*> U**T*(A, B)*W = (A11 A12) (B11 B12) n1 +*> ( 0 A22),( 0 B22) n2 +*> n1 n2 n1 n2 +*> +*> where N = n1+n2 and U**T means the transpose of U. The first n1 columns +*> of U and W span the specified pair of left and right eigenspaces +*> (deflating subspaces) of (A, B). +*> +*> If (A, B) has been obtained from the generalized real Schur +*> decomposition of a matrix pair (C, D) = Q*(A, B)*Z**T, then the +*> reordered generalized real Schur form of (C, D) is given by +*> +*> (C, D) = (Q*U)*(U**T*(A, B)*W)*(Z*W)**T, +*> +*> and the first n1 columns of Q*U and Z*W span the corresponding +*> deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +*> +*> Note that if the selected eigenvalue is sufficiently ill-conditioned, +*> then its value may differ significantly from its value before +*> reordering. +*> +*> The reciprocal condition numbers of the left and right eigenspaces +*> spanned by the first n1 columns of U and W (or Q*U and Z*W) may +*> be returned in DIF(1:2), corresponding to Difu and Difl, resp. +*> +*> The Difu and Difl are defined as: +*> +*> Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +*> and +*> Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +*> +*> where sigma-min(Zu) is the smallest singular value of the +*> (2*n1*n2)-by-(2*n1*n2) matrix +*> +*> Zu = [ kron(In2, A11) -kron(A22**T, In1) ] +*> [ kron(In2, B11) -kron(B22**T, In1) ]. +*> +*> Here, Inx is the identity matrix of size nx and A22**T is the +*> transpose of A22. kron(X, Y) is the Kronecker product between +*> the matrices X and Y. +*> +*> When DIF(2) is small, small changes in (A, B) can cause large changes +*> in the deflating subspace. An approximate (asymptotic) bound on the +*> maximum angular error in the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / DIF(2), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal norm of the projectors on the left and right +*> eigenspaces associated with (A11, B11) may be returned in PL and PR. +*> They are computed as follows. First we compute L and R so that +*> P*(A, B)*Q is block diagonal, where +*> +*> P = ( I -L ) n1 Q = ( I R ) n1 +*> ( 0 I ) n2 and ( 0 I ) n2 +*> n1 n2 n1 n2 +*> +*> and (L, R) is the solution to the generalized Sylvester equation +*> +*> A11*R - L*A22 = -A12 +*> B11*R - L*B22 = -B12 +*> +*> Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / PL. +*> +*> There are also global error bounds which valid for perturbations up +*> to a certain restriction: A lower bound (x) on the smallest +*> F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +*> coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +*> (i.e. (A + E, B + F), is +*> +*> x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +*> +*> An approximate bound on x can be computed from DIF(1:2), PL and PR. +*> +*> If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +*> (L', R') and unperturbed (L, R) left and right deflating subspaces +*> associated with the selected cluster in the (1,1)-blocks can be +*> bounded as +*> +*> max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +*> max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +*> +*> See LAPACK User's Guide section 4.11 or the following references +*> for more information. +*> +*> Note that if the default method for computing the Frobenius-norm- +*> based estimate DIF is not wanted (see SLATDF), then the parameter +*> IDIFJB (see below) should be changed from 3 to 4 (routine SLATDF +*> (IJOB = 2 will be used)). See STGSYL for more details. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL, + $ PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + REAL PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), ALPHAI( * ), ALPHAR( * ), + $ B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2, + $ WANTP + INTEGER I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN, + $ MN2, N1, N2 + REAL DSCALE, DSUM, EPS, RDSCAL, SMLNUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLACPY, SLAG2, SLASSQ, STGEXC, STGSYL, + $ XERBLA +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSEN', -INFO ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + PAIR = .FALSE. + IF( .NOT.LQUERY .OR. IJOB.NE.0 ) THEN + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + END IF +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 4*N+16, 2*M*(N-M) ) + LIWMIN = MAX( 1, N+6 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*N+16, 4*M*(N-M) ) + LIWMIN = MAX( 1, 2*M*(N-M), N+6 ) + ELSE + LWMIN = MAX( 1, 4*N+16 ) + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL SLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL SLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 60 + END IF +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + PAIR = .FALSE. + DO 30 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF +* + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* Perform the reordering of diagonal blocks in (A, B) +* by orthogonal transformation matrices and update +* Q and Z accordingly (if requested): +* + KK = K + IF( K.NE.KS ) + $ CALL STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, + $ Z, LDZ, KK, KS, WORK, LWORK, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 60 + END IF +* + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L +* and compute PL and PR. +* + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + CALL SLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL SLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto left +* and right eigenspaces. +* + RDSCAL = ZERO + DSUM = ONE + CALL SLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL SLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF +* + IF( WANTD ) THEN +* +* Compute estimates of Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu-estimate. +* + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl-estimate. +* + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with SLACN2. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL SLACN2( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL STGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL SLACN2( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation. +* + CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL STGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) +* + END IF + END IF +* + 60 CONTINUE +* +* Compute generalized eigenvalues of reordered pair (A, B) and +* normalize the generalized Schur form. +* + PAIR = .FALSE. + DO 70 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE +* + IF( K.LT.N ) THEN + IF( A( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + END IF + END IF +* + IF( PAIR ) THEN +* +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ), + $ BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ), + $ ALPHAI( K ) ) + ALPHAI( K+1 ) = -ALPHAI( K ) +* + ELSE +* + IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN +* +* If B(K,K) is negative, make it positive +* + DO 80 I = 1, N + A( K, I ) = -A( K, I ) + B( K, I ) = -B( K, I ) + IF( WANTQ ) Q( I, K ) = -Q( I, K ) + 80 CONTINUE + END IF +* + ALPHAR( K ) = A( K, K ) + ALPHAI( K ) = ZERO + BETA( K ) = B( K, K ) +* + END IF + END IF + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of STGSEN +* + END diff --git a/dspl/liblapack/SRC/stgsja.f b/dspl/liblapack/SRC/stgsja.f new file mode 100644 index 0000000..2a6fc35 --- /dev/null +++ b/dspl/liblapack/SRC/stgsja.f @@ -0,0 +1,655 @@ +*> \brief \b STGSJA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGSJA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, +* LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, +* Q, LDQ, WORK, NCYCLE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, +* $ NCYCLE, P +* REAL TOLA, TOLB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), U( LDU, * ), +* $ V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGSJA computes the generalized singular value decomposition (GSVD) +*> of two real upper triangular (or trapezoidal) matrices A and B. +*> +*> On entry, it is assumed that matrices A and B have the following +*> forms, which may be obtained by the preprocessing subroutine SGGSVP +*> from a general M-by-N matrix A and P-by-N matrix B: +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> B = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. +*> +*> On exit, +*> +*> U**T *A*Q = D1*( 0 R ), V**T *B*Q = D2*( 0 R ), +*> +*> where U, V and Q are orthogonal matrices. +*> R is a nonsingular upper triangular matrix, and D1 and D2 are +*> ``diagonal'' matrices, which are of the following structures: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) K +*> L ( 0 0 R22 ) L +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The computation of the orthogonal transformation matrices U, V or Q +*> is optional. These matrices may either be formed explicitly, or they +*> may be postmultiplied into input matrices U1, V1, or Q1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': U must contain an orthogonal matrix U1 on entry, and +*> the product U1*U is returned; +*> = 'I': U is initialized to the unit matrix, and the +*> orthogonal matrix U is returned; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': V must contain an orthogonal matrix V1 on entry, and +*> the product V1*V is returned; +*> = 'I': V is initialized to the unit matrix, and the +*> orthogonal matrix V is returned; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Q must contain an orthogonal matrix Q1 on entry, and +*> the product Q1*Q is returned; +*> = 'I': Q is initialized to the unit matrix, and the +*> orthogonal matrix Q is returned; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> +*> K and L specify the subblocks in the input matrices A and B: +*> A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N) +*> of A and B, whose GSVD is going to be computed by STGSJA. +*> See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +*> matrix R or part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +*> a part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is REAL +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is REAL +*> +*> TOLA and TOLB are the convergence criteria for the Jacobi- +*> Kogbetliantz iteration procedure. Generally, they are the +*> same as used in the preprocessing step, say +*> TOLA = max(M,N)*norm(A)*MACHEPS, +*> TOLB = max(P,N)*norm(B)*MACHEPS. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is REAL array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = diag(C), +*> BETA(K+1:K+L) = diag(S), +*> or if M-K-L < 0, +*> ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +*> BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +*> Furthermore, if K+L < N, +*> ALPHA(K+L+1:N) = 0 and +*> BETA(K+L+1:N) = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is REAL array, dimension (LDU,M) +*> On entry, if JOBU = 'U', U must contain a matrix U1 (usually +*> the orthogonal matrix returned by SGGSVP). +*> On exit, +*> if JOBU = 'I', U contains the orthogonal matrix U; +*> if JOBU = 'U', U contains the product U1*U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is REAL array, dimension (LDV,P) +*> On entry, if JOBV = 'V', V must contain a matrix V1 (usually +*> the orthogonal matrix returned by SGGSVP). +*> On exit, +*> if JOBV = 'I', V contains the orthogonal matrix V; +*> if JOBV = 'V', V contains the product V1*V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +*> the orthogonal matrix returned by SGGSVP). +*> On exit, +*> if JOBQ = 'I', Q contains the orthogonal matrix Q; +*> if JOBQ = 'Q', Q contains the product Q1*Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] NCYCLE +*> \verbatim +*> NCYCLE is INTEGER +*> The number of cycles required for convergence. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the procedure does not converge after MAXIT cycles. +*> \endverbatim +*> +*> \verbatim +*> Internal Parameters +*> =================== +*> +*> MAXIT INTEGER +*> MAXIT specifies the total loops that the iterative procedure +*> may take. If after MAXIT cycles, the routine fails to +*> converge, we return INFO = 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> STGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +*> min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +*> matrix B13 to the form: +*> +*> U1**T *A13*Q1 = C1*R1; V1**T *B13*Q1 = S1*R1, +*> +*> where U1, V1 and Q1 are orthogonal matrix, and Z**T is the transpose +*> of Z. C1 and S1 are diagonal matrices satisfying +*> +*> C1**2 + S1**2 = I, +*> +*> and R1 is an L-by-L nonsingular upper triangular matrix. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + REAL TOLA, TOLB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), U( LDU, * ), + $ V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + REAL A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR, + $ GAMMA, RWK, SNQ, SNU, SNV, SSMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SCOPY, SLAGS2, SLAPLL, SLARTG, SLASET, SROT, + $ SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL SLASET( 'Full', M, M, ZERO, ONE, U, LDU ) + IF( INITV ) + $ CALL SLASET( 'Full', P, P, ZERO, ONE, V, LDV ) + IF( INITQ ) + $ CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = ZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = A( K+I, N-L+I ) + IF( K+J.LE.M ) + $ A3 = A( K+J, N-L+J ) +* + B1 = B( I, N-L+I ) + B3 = B( J, N-L+J ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U**T *A +* + IF( K+J.LE.M ) + $ CALL SROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, SNU ) +* +* Update I-th and J-th rows of matrix B: V**T *B +* + CALL SROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, SNV ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL SROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL SROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = ZERO + B( I, N-L+J ) = ZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = ZERO + B( J, N-L+I ) = ZERO + END IF +* +* Update orthogonal matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL SROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL SROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL SROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL SCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL SLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = A( K+I, N-L+I ) + B1 = B( I, N-L+I ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* +* change sign if necessary +* + IF( GAMMA.LT.ZERO ) THEN + CALL SSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL SSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL SSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL SSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE +* + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) +* + END IF +* + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE + RETURN +* +* End of STGSJA +* + END diff --git a/dspl/liblapack/SRC/stgsna.f b/dspl/liblapack/SRC/stgsna.f new file mode 100644 index 0000000..2ff38d1 --- /dev/null +++ b/dspl/liblapack/SRC/stgsna.f @@ -0,0 +1,700 @@ +*> \brief \b STGSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, +* LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), +* $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or eigenvectors of a matrix pair (A, B) in +*> generalized real Schur canonical form (or of any matrix pair +*> (Q*A*Z**T, Q*B*Z**T) with orthogonal matrices Q and Z, where +*> Z**T denotes the transpose of Z. +*> +*> (A, B) must be in generalized real Schur form (as returned by SGGES), +*> i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal +*> blocks. B is upper triangular. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (DIF): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (DIF); +*> = 'B': for both eigenvalues and eigenvectors (S and DIF). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the eigenpair corresponding to a real eigenvalue w(j), +*> SELECT(j) must be set to .TRUE.. To select condition numbers +*> corresponding to a complex conjugate pair of eigenvalues w(j) +*> and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +*> set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the square matrix pair (A, B). N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The upper quasi-triangular matrix A in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> The upper triangular matrix B in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VL, as returned by STGEVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1. +*> If JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns ov VR, as returned by STGEVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1. +*> If JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. For a complex conjugate pair of eigenvalues two +*> consecutive elements of S are set to the same value. Thus +*> S(j), DIF(j), and the j-th columns of VL and VR all +*> correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. For a complex eigenvector two +*> consecutive elements of DIF are set to the same value. If +*> the eigenvalues cannot be reordered to compute DIF(j), DIF(j) +*> is set to 0; this can only occur when the true value would be +*> very small anyway. +*> If JOB = 'E', DIF is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S and DIF. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and DIF used to store +*> the specified condition numbers; for each selected real +*> eigenvalue one element is used, and for each selected complex +*> conjugate pair of eigenvalues, two elements are used. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N + 6) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of a generalized eigenvalue +*> w = (a, b) is defined as +*> +*> S(w) = (|u**TAv|**2 + |u**TBv|**2)**(1/2) / (norm(u)*norm(v)) +*> +*> where u and v are the left and right eigenvectors of (A, B) +*> corresponding to w; |z| denotes the absolute value of the complex +*> number, and norm(u) denotes the 2-norm of the vector u. +*> The pair (a, b) corresponds to an eigenvalue w = a/b (= u**TAv/u**TBv) +*> of the matrix pair (A, B). If both a and b equal zero, then (A B) is +*> singular and S(I) = -1 is returned. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(A, B) / S(I) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number DIF(i) of right eigenvector u +*> and left eigenvector v corresponding to the generalized eigenvalue w +*> is defined as follows: +*> +*> a) If the i-th eigenvalue w = (a,b) is real +*> +*> Suppose U and V are orthogonal transformations such that +*> +*> U**T*(A, B)*V = (S, T) = ( a * ) ( b * ) 1 +*> ( 0 S22 ),( 0 T22 ) n-1 +*> 1 n-1 1 n-1 +*> +*> Then the reciprocal condition number DIF(i) is +*> +*> Difl((a, b), (S22, T22)) = sigma-min( Zl ), +*> +*> where sigma-min(Zl) denotes the smallest singular value of the +*> 2(n-1)-by-2(n-1) matrix +*> +*> Zl = [ kron(a, In-1) -kron(1, S22) ] +*> [ kron(b, In-1) -kron(1, T22) ] . +*> +*> Here In-1 is the identity matrix of size n-1. kron(X, Y) is the +*> Kronecker product between the matrices X and Y. +*> +*> Note that if the default method for computing DIF(i) is wanted +*> (see SLATDF), then the parameter DIFDRI (see below) should be +*> changed from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). +*> See STGSYL for more details. +*> +*> b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair, +*> +*> Suppose U and V are orthogonal transformations such that +*> +*> U**T*(A, B)*V = (S, T) = ( S11 * ) ( T11 * ) 2 +*> ( 0 S22 ),( 0 T22) n-2 +*> 2 n-2 2 n-2 +*> +*> and (S11, T11) corresponds to the complex conjugate eigenvalue +*> pair (w, conjg(w)). There exist unitary matrices U1 and V1 such +*> that +*> +*> U1**T*S11*V1 = ( s11 s12 ) and U1**T*T11*V1 = ( t11 t12 ) +*> ( 0 s22 ) ( 0 t22 ) +*> +*> where the generalized eigenvalues w = s11/t11 and +*> conjg(w) = s22/t22. +*> +*> Then the reciprocal condition number DIF(i) is bounded by +*> +*> min( d1, max( 1, |real(s11)/real(s22)| )*d2 ) +*> +*> where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where +*> Z1 is the complex 2-by-2 matrix +*> +*> Z1 = [ s11 -s22 ] +*> [ t11 -t22 ], +*> +*> This is done by computing (using real arithmetic) the +*> roots of the characteristical polynomial det(Z1**T * Z1 - lambda I), +*> where Z1**T denotes the transpose of Z1 and det(X) denotes +*> the determinant of X. +*> +*> and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an +*> upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2) +*> +*> Z2 = [ kron(S11**T, In-2) -kron(I2, S22) ] +*> [ kron(T11**T, In-2) -kron(I2, T22) ] +*> +*> Note that if the default method for computing DIF is wanted (see +*> SLATDF), then the parameter DIFDRI (see below) should be changed +*> from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). See STGSYL +*> for more details. +*> +*> For each eigenvalue/vector specified by SELECT, DIF stores a +*> Frobenius norm-based estimate of Difl. +*> +*> An approximate error bound for the i-th computed eigenvector VL(i) or +*> VR(i) is given by +*> +*> EPS * norm(A, B) / DIF(i). +*> +*> See ref. [2-3] for more details and further references. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, +*> Report UMINF - 94.04, Department of Computing Science, Umea +*> University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working +*> Note 87. To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), DIF( * ), S( * ), + $ VL( LDVL, * ), VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER DIFDRI + PARAMETER ( DIFDRI = 3 ) + REAL ZERO, ONE, TWO, FOUR + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0, + $ FOUR = 4.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2 + REAL ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND, + $ EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM, + $ TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV, + $ UHBVI +* .. +* .. Local Arrays .. + REAL DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLAPY2, SNRM2 + EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SGEMV, SLACPY, SLAG2, STGEXC, STGSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( A( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( N.EQ.0 ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = 2*N*( N + 2 ) + 16 + ELSE + LWMIN = N + END IF + WORK( 1 ) = LWMIN +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + KS = 0 + PAIR = .FALSE. +* + DO 20 K = 1, N +* +* Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 20 + ELSE + IF( K.LT.N ) + $ PAIR = A( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 20 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( PAIR ) THEN +* +* Complex eigenvalue pair. +* + RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ), + $ SNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ), + $ SNRM2( N, VL( 1, KS+1 ), 1 ) ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHAV = TMPRR + TMPII + UHAVI = TMPIR - TMPRI + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1, + $ ZERO, WORK, 1 ) + TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 ) + TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + UHBV = TMPRR + TMPII + UHBVI = TMPIR - TMPRI + UHAV = SLAPY2( UHAV, UHAVI ) + UHBV = SLAPY2( UHBV, UHBVI ) + COND = SLAPY2( UHAV, UHBV ) + S( KS ) = COND / ( RNRM*LNRM ) + S( KS+1 ) = S( KS ) +* + ELSE +* +* Real eigenvalue. +* + RNRM = SNRM2( N, VR( 1, KS ), 1 ) + LNRM = SNRM2( N, VL( 1, KS ), 1 ) + CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHAV = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO, + $ WORK, 1 ) + UHBV = SDOT( N, WORK, 1, VL( 1, KS ), 1 ) + COND = SLAPY2( UHAV, UHBV ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = SLAPY2( A( 1, 1 ), B( 1, 1 ) ) + GO TO 20 + END IF +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. + IF( PAIR ) THEN +* +* Copy the 2-by 2 pencil beginning at (A(k,k), B(k, k)). +* Compute the eigenvalue(s) at position K. +* + WORK( 1 ) = A( K, K ) + WORK( 2 ) = A( K+1, K ) + WORK( 3 ) = A( K, K+1 ) + WORK( 4 ) = A( K+1, K+1 ) + WORK( 5 ) = B( K, K ) + WORK( 6 ) = B( K+1, K ) + WORK( 7 ) = B( K, K+1 ) + WORK( 8 ) = B( K+1, K+1 ) + CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA, + $ DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI ) + ALPRQT = ONE + C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA ) + C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI + ROOT1 = C1 + SQRT( C1*C1-4.0*C2 ) + ROOT2 = C2 / ROOT1 + ROOT1 = ROOT1 / TWO + COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) ) + END IF +* +* Copy the matrix (A, B) to the array WORK and swap the +* diagonal block beginning at A(k,k) to the (1,1) position. +* + CALL SLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL SLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL STGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N, + $ DUMMY, 1, DUMMY1, 1, IFST, ILST, + $ WORK( N*N*2+1 ), LWORK-2*N*N, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl((A11,B11), (A22, B22)). +* + N1 = 1 + IF( WORK( 2 ).NE.ZERO ) + $ N1 = 2 + N2 = N - N1 + IF( N2.EQ.0 ) THEN + DIF( KS ) = COND + ELSE + I = N*N + 1 + IZ = 2*N*N + 1 + CALL STGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), + $ WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR ) +* + IF( PAIR ) + $ DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ), + $ COND ) + END IF + END IF + IF( PAIR ) + $ DIF( KS+1 ) = DIF( KS ) + END IF + IF( PAIR ) + $ KS = KS + 1 +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of STGSNA +* + END diff --git a/dspl/liblapack/SRC/stgsy2.f b/dspl/liblapack/SRC/stgsy2.f new file mode 100644 index 0000000..ca9946a --- /dev/null +++ b/dspl/liblapack/SRC/stgsy2.f @@ -0,0 +1,1075 @@ +*> \brief \b STGSY2 solves the generalized Sylvester equation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGSY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, +* IWORK, PQ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, +* $ PQ +* REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGSY2 solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F, +*> +*> using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices, +*> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +*> N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E) +*> must be in generalized Schur canonical form, i.e. A, B are upper +*> quasi triangular and D, E are upper triangular. The solution (R, L) +*> overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor +*> chosen to avoid overflow. +*> +*> In matrix notation solving equation (1) corresponds to solve +*> Z*x = scale*b, where Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**T, Im) ] (2) +*> [ kron(In, D) -kron(E**T, Im) ], +*> +*> Ik is the identity matrix of size k and X**T is the transpose of X. +*> kron(X, Y) is the Kronecker product between the matrices X and Y. +*> In the process of solving (1), we solve a number of such systems +*> where Dim(In), Dim(In) = 1 or 2. +*> +*> If TRANS = 'T', solve the transposed system Z**T*y = scale*b for y, +*> which is equivalent to solve for R and L in +*> +*> A**T * R + D**T * L = scale * C (3) +*> R * B**T + L * E**T = scale * -F +*> +*> This case is used to compute an estimate of Dif[(A, D), (B, E)] = +*> sigma_min(Z) using reverse communicaton with SLACON. +*> +*> STGSY2 also (IJOB >= 1) contributes to the computation in STGSYL +*> of an upper bound on the separation between to matrix pairs. Then +*> the input (A, D), (B, E) are sub-pencils of the matrix pair in +*> STGSYL. See STGSYL for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T': solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> = 0: solve (1) only. +*> = 1: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (look ahead strategy is used). +*> = 2: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (SGECON on sub-systems is used.) +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the order of A and D, and the row +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of B and E, and the column +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA, M) +*> On entry, A contains an upper quasi triangular matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> On entry, B contains an upper quasi triangular matrix. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1). +*> On exit, if IJOB = 0, C has been overwritten by the +*> solution R. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the matrix C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (LDD, M) +*> On entry, D contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the matrix D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (LDE, N) +*> On entry, E contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the matrix E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is REAL array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1). +*> On exit, if IJOB = 0, F has been overwritten by the +*> solution L. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the matrix F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +*> R and L (C and F on entry) will hold the solutions to a +*> slightly perturbed system but the input matrices A, B, D and +*> E have not been changed. If SCALE = 0, R and L will hold the +*> solutions to the homogeneous system with C = F = 0. Normally, +*> SCALE = 1. +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is REAL +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by STGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is REAL +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when STGSY2 is called by +*> STGSYL. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+2) +*> \endverbatim +*> +*> \param[out] PQ +*> \verbatim +*> PQ is INTEGER +*> On exit, the number of subsystems (of size 2-by-2, 4-by-4 and +*> 8-by-8) solved by this routine. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, if INFO is set to +*> =0: Successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: The matrix pairs (A, D) and (B, E) have common or very +*> close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE STGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ IWORK, PQ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N, + $ PQ + REAL RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to SCOPY by calls to SLASET. +* Sven Hammarling, 27/5/02. +* +* .. Parameters .. + INTEGER LDZ + PARAMETER ( LDZ = 8 ) + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1, + $ K, MB, NB, P, Q, ZDIM + REAL ALPHA, SCALOC +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + REAL RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMM, SGEMV, SGER, SGESC2, + $ SGETC2, SSCAL, SLASET, SLATDF, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSY2', -INFO ) + RETURN + END IF +* +* Determine block structure of A +* + PQ = 0 + P = 0 + I = 1 + 10 CONTINUE + IF( I.GT.M ) + $ GO TO 20 + P = P + 1 + IWORK( P ) = I + IF( I.EQ.M ) + $ GO TO 20 + IF( A( I+1, I ).NE.ZERO ) THEN + I = I + 2 + ELSE + I = I + 1 + END IF + GO TO 10 + 20 CONTINUE + IWORK( P+1 ) = M + 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 30 CONTINUE + IF( J.GT.N ) + $ GO TO 40 + Q = Q + 1 + IWORK( Q ) = J + IF( J.EQ.N ) + $ GO TO 40 + IF( B( J+1, J ).NE.ZERO ) THEN + J = J + 2 + ELSE + J = J + 1 + END IF + GO TO 30 + 40 CONTINUE + IWORK( Q+1 ) = N + 1 + PQ = P*( Q-P-1 ) +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + SCALE = ONE + SCALOC = ONE + DO 120 J = P + 2, Q + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 110 I = P, 1, -1 +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + ZDIM = MB*NB*2 +* + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = D( IS, IS ) + Z( 1, 2 ) = -B( JS, JS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 50 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL SAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ), + $ 1 ) + CALL SAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ), + $ 1 ) + END IF + IF( J.LT.Q ) THEN + CALL SAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = D( IS, IS ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = -B( JS, JSP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = -E( JS, JSP1 ) +* + Z( 1, 4 ) = -B( JSP1, JS ) + Z( 2, 4 ) = -B( JSP1, JSP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 60 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 60 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ), + $ 1, C( 1, JS ), LDC ) + CALL SGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ), + $ 1, F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL SAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + CALL SAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB, + $ C( IS, JE+1 ), LDC ) + CALL SAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE, + $ F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 3, 1 ) = D( IS, IS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = D( IS, ISP1 ) + Z( 4, 2 ) = D( ISP1, ISP1 ) +* + Z( 1, 3 ) = -B( JS, JS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = -B( JS, JS ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 70 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA, + $ RHS( 1 ), 1, ONE, C( 1, JS ), 1 ) + CALL SGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD, + $ RHS( 1 ), 1, ONE, F( 1, JS ), 1 ) + END IF + IF( J.LT.Q ) THEN + CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC ) + CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1, + $ E( JS, JE+1 ), LDE, F( IS, JE+1 ), LDF ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z * x = RHS +* + CALL SLASET( 'F', LDZ, LDZ, ZERO, ZERO, Z, LDZ ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( ISP1, IS ) + Z( 5, 1 ) = D( IS, IS ) +* + Z( 1, 2 ) = A( IS, ISP1 ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 5, 2 ) = D( IS, ISP1 ) + Z( 6, 2 ) = D( ISP1, ISP1 ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( ISP1, IS ) + Z( 7, 3 ) = D( IS, IS ) +* + Z( 3, 4 ) = A( IS, ISP1 ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 7, 4 ) = D( IS, ISP1 ) + Z( 8, 4 ) = D( ISP1, ISP1 ) +* + Z( 1, 5 ) = -B( JS, JS ) + Z( 3, 5 ) = -B( JS, JSP1 ) + Z( 5, 5 ) = -E( JS, JS ) + Z( 7, 5 ) = -E( JS, JSP1 ) +* + Z( 2, 6 ) = -B( JS, JS ) + Z( 4, 6 ) = -B( JS, JSP1 ) + Z( 6, 6 ) = -E( JS, JS ) + Z( 8, 6 ) = -E( JS, JSP1 ) +* + Z( 1, 7 ) = -B( JSP1, JS ) + Z( 3, 7 ) = -B( JSP1, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 2, 8 ) = -B( JSP1, JS ) + Z( 4, 8 ) = -B( JSP1, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 80 JJ = 0, NB - 1 + CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 80 CONTINUE +* +* Solve Z * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, + $ SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 90 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM, + $ RDSCAL, IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 100 JJ = 0, NB - 1 + CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 100 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, RHS( 1 ), MB, ONE, + $ C( 1, JS ), LDC ) + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, RHS( 1 ), MB, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + K = MB*NB + 1 + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, B( JS, JE+1 ), LDB, ONE, + $ C( IS, JE+1 ), LDC ) + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ), + $ MB, E( JS, JE+1 ), LDE, ONE, + $ F( IS, JE+1 ), LDF ) + END IF +* + END IF +* + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Solve (I, J) - subsystem +* A(I, I)**T * R(I, J) + D(I, I)**T * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 200 I = 1, P +* + IS = IWORK( I ) + ISP1 = IS + 1 + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 190 J = Q, P + 2, -1 +* + JS = IWORK( J ) + JSP1 = JS + 1 + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + ZDIM = MB*NB*2 + IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 2-by-2 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = -B( JS, JS ) + Z( 1, 2 ) = D( IS, IS ) + Z( 2, 2 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = F( IS, JS ) +* +* Solve Z**T * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 130 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 130 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + F( IS, JS ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + ALPHA = RHS( 1 ) + CALL SAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + ALPHA = RHS( 2 ) + CALL SAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + ALPHA = -RHS( 1 ) + CALL SAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA, + $ C( IE+1, JS ), 1 ) + ALPHA = -RHS( 2 ) + CALL SAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD, + $ C( IE+1, JS ), 1 ) + END IF +* + ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build a 4-by-4 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = ZERO + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = ZERO + Z( 2, 2 ) = A( IS, IS ) + Z( 3, 2 ) = -B( JS, JSP1 ) + Z( 4, 2 ) = -B( JSP1, JSP1 ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = ZERO + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( IS, IS ) + Z( 3, 4 ) = -E( JS, JSP1 ) + Z( 4, 4 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( IS, JSP1 ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( IS, JSP1 ) +* +* Solve Z**T * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 140 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( IS, JSP1 ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( IS, JSP1 ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1, + $ F( IS, 1 ), LDF ) + CALL SAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA, + $ RHS( 1 ), 1, C( IE+1, JS ), LDC ) + CALL SGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD, + $ RHS( 3 ), 1, C( IE+1, JS ), LDC ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN +* +* Build a 4-by-4 system Z**T * x = RHS +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 3, 1 ) = -B( JS, JS ) + Z( 4, 1 ) = ZERO +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 3, 2 ) = ZERO + Z( 4, 2 ) = -B( JS, JS ) +* + Z( 1, 3 ) = D( IS, IS ) + Z( 2, 3 ) = D( IS, ISP1 ) + Z( 3, 3 ) = -E( JS, JS ) + Z( 4, 3 ) = ZERO +* + Z( 1, 4 ) = ZERO + Z( 2, 4 ) = D( ISP1, ISP1 ) + Z( 3, 4 ) = ZERO + Z( 4, 4 ) = -E( JS, JS ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( IS, JS ) + RHS( 2 ) = C( ISP1, JS ) + RHS( 3 ) = F( IS, JS ) + RHS( 4 ) = F( ISP1, JS ) +* +* Solve Z**T * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 150 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( IS, JS ) = RHS( 1 ) + C( ISP1, JS ) = RHS( 2 ) + F( IS, JS ) = RHS( 3 ) + F( ISP1, JS ) = RHS( 4 ) +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + CALL SGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ), + $ 1, F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ), + $ LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + CALL SGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ), + $ LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ), + $ 1 ) + END IF +* + ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN +* +* Build an 8-by-8 system Z**T * x = RHS +* + CALL SLASET( 'F', LDZ, LDZ, ZERO, ZERO, Z, LDZ ) +* + Z( 1, 1 ) = A( IS, IS ) + Z( 2, 1 ) = A( IS, ISP1 ) + Z( 5, 1 ) = -B( JS, JS ) + Z( 7, 1 ) = -B( JSP1, JS ) +* + Z( 1, 2 ) = A( ISP1, IS ) + Z( 2, 2 ) = A( ISP1, ISP1 ) + Z( 6, 2 ) = -B( JS, JS ) + Z( 8, 2 ) = -B( JSP1, JS ) +* + Z( 3, 3 ) = A( IS, IS ) + Z( 4, 3 ) = A( IS, ISP1 ) + Z( 5, 3 ) = -B( JS, JSP1 ) + Z( 7, 3 ) = -B( JSP1, JSP1 ) +* + Z( 3, 4 ) = A( ISP1, IS ) + Z( 4, 4 ) = A( ISP1, ISP1 ) + Z( 6, 4 ) = -B( JS, JSP1 ) + Z( 8, 4 ) = -B( JSP1, JSP1 ) +* + Z( 1, 5 ) = D( IS, IS ) + Z( 2, 5 ) = D( IS, ISP1 ) + Z( 5, 5 ) = -E( JS, JS ) +* + Z( 2, 6 ) = D( ISP1, ISP1 ) + Z( 6, 6 ) = -E( JS, JS ) +* + Z( 3, 7 ) = D( IS, IS ) + Z( 4, 7 ) = D( IS, ISP1 ) + Z( 5, 7 ) = -E( JS, JSP1 ) + Z( 7, 7 ) = -E( JSP1, JSP1 ) +* + Z( 4, 8 ) = D( ISP1, ISP1 ) + Z( 6, 8 ) = -E( JS, JSP1 ) + Z( 8, 8 ) = -E( JSP1, JSP1 ) +* +* Set up right hand side(s) +* + K = 1 + II = MB*NB + 1 + DO 160 JJ = 0, NB - 1 + CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 ) + CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 ) + K = K + MB + II = II + MB + 160 CONTINUE +* +* +* Solve Z**T * x = RHS +* + CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR +* + CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 170 K = 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + K = 1 + II = MB*NB + 1 + DO 180 JJ = 0, NB - 1 + CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 ) + CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 ) + K = K + MB + II = II + MB + 180 CONTINUE +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( J.GT.P+2 ) THEN + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ C( IS, JS ), LDC, B( 1, JS ), LDB, ONE, + $ F( IS, 1 ), LDF ) + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, + $ F( IS, JS ), LDF, E( 1, JS ), LDE, ONE, + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, + $ ONE, C( IE+1, JS ), LDC ) + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, + $ ONE, C( IE+1, JS ), LDC ) + END IF +* + END IF +* + 190 CONTINUE + 200 CONTINUE +* + END IF + RETURN +* +* End of STGSY2 +* + END diff --git a/dspl/liblapack/SRC/stgsyl.f b/dspl/liblapack/SRC/stgsyl.f new file mode 100644 index 0000000..cd597f3 --- /dev/null +++ b/dspl/liblapack/SRC/stgsyl.f @@ -0,0 +1,682 @@ +*> \brief \b STGSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STGSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, +* $ LWORK, M, N +* REAL DIF, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STGSYL solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> where R and L are unknown m-by-n matrices, (A, D), (B, E) and +*> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +*> respectively, with real entries. (A, D) and (B, E) must be in +*> generalized (real) Schur canonical form, i.e. A, B are upper quasi +*> triangular and D, E are upper triangular. +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +*> scaling factor chosen to avoid overflow. +*> +*> In matrix notation (1) is equivalent to solve Zx = scale b, where +*> Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**T, Im) ] (2) +*> [ kron(In, D) -kron(E**T, Im) ]. +*> +*> Here Ik is the identity matrix of size k and X**T is the transpose of +*> X. kron(X, Y) is the Kronecker product between the matrices X and Y. +*> +*> If TRANS = 'T', STGSYL solves the transposed system Z**T*y = scale*b, +*> which is equivalent to solve for R and L in +*> +*> A**T * R + D**T * L = scale * C (3) +*> R * B**T + L * E**T = scale * -F +*> +*> This case (TRANS = 'T') is used to compute an one-norm-based estimate +*> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +*> and (B,E), using SLACON. +*> +*> If IJOB >= 1, STGSYL computes a Frobenius norm-based estimate +*> of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +*> reciprocal of the smallest singular value of Z. See [1-2] for more +*> information. +*> +*> This is a level 3 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T', solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: The functionality of 0 and 3. +*> =2: The functionality of 0 and 4. +*> =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (look ahead strategy IJOB = 1 is used). +*> =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> ( SGECON on sub-systems is used ). +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrices A and D, and the row dimension of +*> the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices B and E, and the column dimension +*> of the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA, M) +*> The upper quasi triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB, N) +*> The upper quasi triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, C has been overwritten by +*> the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is REAL array, dimension (LDD, M) +*> The upper triangular matrix D. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the array D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is REAL array, dimension (LDE, N) +*> The upper triangular matrix E. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the array E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is REAL array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, F has been overwritten by +*> the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is REAL +*> On exit DIF is the reciprocal of a lower bound of the +*> reciprocal of the Dif-function, i.e. DIF is an upper bound of +*> Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2). +*> IF IJOB = 0 or TRANS = 'T', DIF is not touched. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> On exit SCALE is the scaling factor in (1) or (3). +*> If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +*> to a slightly perturbed system but the input matrices A, B, D +*> and E have not been changed. If SCALE = 0, C and F hold the +*> solutions R and L, respectively, to the homogeneous system +*> with C = F = 0. Normally, SCALE = 1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK > = 1. +*> If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+6) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: (A, D) and (B, E) have common or close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> +*> [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +*> Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +*> Appl., 15(4):1045-1060, 1994 +*> +*> [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +*> Condition Estimators for Solving the Generalized Sylvester +*> Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +*> July 1989, pp 745-751. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + REAL DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to SCOPY by calls to SLASET. +* Sven Hammarling, 1/5/02. +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q + REAL DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, SLACPY, SLASET, SSCAL, STGSY2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, REAL, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NOTRAN ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF + ELSE + LWMIN = 1 + END IF + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + SCALE = 1 + IF( NOTRAN ) THEN + IF( IJOB.NE.0 ) THEN + DIF = 0 + END IF + END IF + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'STGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'STGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( NOTRAN ) THEN + IF( IJOB.GE.3 ) THEN + IFUNC = IJOB - 2 + CALL SLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL SLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* + DO 30 IROUND = 1, ISOLVE +* +* Use unblocked Level 2 solver +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + CALL STGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ IWORK, PQ, INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF +* + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL SLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL SLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL SLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL SLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + IF( A( I, I-1 ).NE.ZERO ) + $ I = I + 1 + GO TO 40 + 50 CONTINUE +* + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + IF( B( J, J-1 ).NE.ZERO ) + $ J = J + 1 + GO TO 60 + 70 CONTINUE +* + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN +* + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J)-subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1,..., 1; J = 1, 2,..., Q +* + DSCALE = ZERO + DSUM = ONE + PQ = 0 + SCALE = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + PPQQ = 0 + CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO +* + PQ = PQ + PPQQ + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining +* equation. +* + IF( I.GT.1 ) THEN + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ A( 1, IS ), LDA, C( IS, JS ), LDC, ONE, + $ C( 1, JS ), LDC ) + CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE, + $ D( 1, IS ), LDD, C( IS, JS ), LDC, ONE, + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, B( JS, JE+1 ), LDB, + $ ONE, C( IS, JE+1 ), LDC ) + CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, + $ F( IS, JS ), LDF, E( JS, JE+1 ), LDE, + $ ONE, F( IS, JE+1 ), LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL SLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL SLASET( 'F', M, N, ZERO, ZERO, C, LDC ) + CALL SLASET( 'F', M, N, ZERO, ZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL SLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE +* + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)**T * R(I, J) + D(I, I)**T * L(I, J) = C(I, J) +* R(I, J) * B(J, J)**T + L(I, J) * E(J, J)**T = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ IWORK( Q+2 ), PPQQ, LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 ) + CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL SSCAL( M, SCALOC, C( 1, K ), 1 ) + CALL SSCAL( M, SCALOC, F( 1, K ), 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ), + $ LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ), + $ LDF ) + CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ), + $ LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ), + $ LDF ) + END IF + IF( I.LT.P ) THEN + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE, + $ C( IE+1, JS ), LDC ) + CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE, + $ D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE, + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE +* + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of STGSYL +* + END diff --git a/dspl/liblapack/SRC/stpcon.f b/dspl/liblapack/SRC/stpcon.f new file mode 100644 index 0000000..2814e69 --- /dev/null +++ b/dspl/liblapack/SRC/stpcon.f @@ -0,0 +1,267 @@ +*> \brief \b STPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, N +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPCON estimates the reciprocal of the condition number of a packed +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTP + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTP +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATPS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL SLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STPCON +* + END diff --git a/dspl/liblapack/SRC/stplqt.f b/dspl/liblapack/SRC/stplqt.f new file mode 100644 index 0000000..8077b08 --- /dev/null +++ b/dspl/liblapack/SRC/stplqt.f @@ -0,0 +1,270 @@ +*> \brief \b STPLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPLQT computes a blocked LQ factorization of a real +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B, and the order of the +*> triangular matrix A. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. M >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The lower triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MB*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ] [ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L +*> upper trapezoidal matrix B2: +*> [ B ] = [ B1 ] [ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> [ C ] = [ A ] [ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> [ W ] = [ I ] [ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> [ V ] = [ V1 ] [ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(M/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = M - (M-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, NB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL STPLQT2, STPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( MB.LT.1 .OR. (MB.GT.M .AND. M.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPLQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, M, MB +* +* Compute the QR factorization of the current block +* + IB = MIN( M-I+1, MB ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF +* + CALL STPLQT2( IB, NB, LB, A(I,I), LDA, B( I, 1 ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**T to B(I+IB:M,:) from the right +* + IF( I+IB.LE.M ) THEN + CALL STPRFB( 'R', 'N', 'F', 'R', M-I-IB+1, NB, IB, LB, + $ B( I, 1 ), LDB, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, B( I+IB, 1 ), LDB, + $ WORK, M-I-IB+1) + END IF + END DO + RETURN +* +* End of STPLQT +* + END diff --git a/dspl/liblapack/SRC/stplqt2.f b/dspl/liblapack/SRC/stplqt2.f new file mode 100644 index 0000000..ca38a95 --- /dev/null +++ b/dspl/liblapack/SRC/stplqt2.f @@ -0,0 +1,312 @@ +*> \brief \b STPLQT2 computes a LQ factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPLQT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPLQT2 computes a LQ a factorization of a real "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,M) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ][ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ][ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> +*> C = [ A ][ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ][ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> W = [ V1 ][ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W**T * T * W +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SGEMV, SGER, STRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPLQT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, M +* +* Generate elementary reflector H(I) to annihilate B(I,:) +* + P = N-L+MIN( L, I ) + CALL SLARFG( P+1, A( I, I ), B( I, 1 ), LDB, T( 1, I ) ) + IF( I.LT.M ) THEN +* +* W(M-I:1) := C(I+1:M,I:N) * C(I,I:N) [use W = T(M,:)] +* + DO J = 1, M-I + T( M, J ) = (A( I+J, I )) + END DO + CALL SGEMV( 'N', M-I, P, ONE, B( I+1, 1 ), LDB, + $ B( I, 1 ), LDB, ONE, T( M, 1 ), LDT ) +* +* C(I+1:M,I:N) = C(I+1:M,I:N) + alpha * C(I,I:N)*W(M-1:1)^H +* + ALPHA = -(T( 1, I )) + DO J = 1, M-I + A( I+J, I ) = A( I+J, I ) + ALPHA*(T( M, J )) + END DO + CALL SGER( M-I, P, ALPHA, T( M, 1 ), LDT, + $ B( I, 1 ), LDB, B( I+1, 1 ), LDB ) + END IF + END DO +* + DO I = 2, M +* +* T(I,1:I-1) := C(I:I-1,1:N) * (alpha * C(I,I:N)^H) +* + ALPHA = -T( 1, I ) + + DO J = 1, I-1 + T( I, J ) = ZERO + END DO + P = MIN( I-1, L ) + NP = MIN( N-L+1, N ) + MP = MIN( P+1, M ) +* +* Triangular part of B2 +* + DO J = 1, P + T( I, J ) = ALPHA*B( I, N-L+J ) + END DO + CALL STRMV( 'L', 'N', 'N', P, B( 1, NP ), LDB, + $ T( I, 1 ), LDT ) +* +* Rectangular part of B2 +* + CALL SGEMV( 'N', I-1-P, L, ALPHA, B( MP, NP ), LDB, + $ B( I, NP ), LDB, ZERO, T( I,MP ), LDT ) +* +* B1 +* + CALL SGEMV( 'N', I-1, N-L, ALPHA, B, LDB, B( I, 1 ), LDB, + $ ONE, T( I, 1 ), LDT ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(I,1:I-1) +* + CALL STRMV( 'L', 'T', 'N', I-1, T, LDT, T( I, 1 ), LDT ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( 1, I ) + T( 1, I ) = ZERO + END DO + DO I=1,M + DO J= I+1,M + T(I,J)=T(J,I) + T(J,I)= ZERO + END DO + END DO + +* +* End of STPLQT2 +* + END diff --git a/dspl/liblapack/SRC/stpmlqt.f b/dspl/liblapack/SRC/stpmlqt.f new file mode 100644 index 0000000..565dadd --- /dev/null +++ b/dspl/liblapack/SRC/stpmlqt.f @@ -0,0 +1,366 @@ +*> \brief \b DTPMLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. +* REAL V( LDV, * ), A( LDA, * ), B( LDB, * ), +* $ T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPMQRT applies a real orthogonal matrix Q obtained from a +*> "triangular-pentagonal" real block reflector H to a general +*> real matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DTPLQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (LDA,K) +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTPLQT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DTPLQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**T*C or C*Q or C*Q**T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array. The dimension of WORK is +*> N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] [V2]. +*> +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is lower trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is lower triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is K-by-M. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is K-by-N. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='T' and SIDE='L', C is on exit replaced with Q**T * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='T' and SIDE='R', C is on exit replaced with C * Q**T. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. + REAL V( LDV, * ), A( LDA, * ), B( LDB, * ), + $ T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, NB, LB, KF, LDAQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLARFB, STPRFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.K ) THEN + INFO = -9 + ELSE IF( LDT.LT.MB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL STPRFB( 'L', 'T', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL STPRFB( 'R', 'N', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL STPRFB( 'L', 'N', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL STPRFB( 'R', 'T', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of STPMLQT +* + END diff --git a/dspl/liblapack/SRC/stpmqrt.f b/dspl/liblapack/SRC/stpmqrt.f new file mode 100644 index 0000000..b1813b7 --- /dev/null +++ b/dspl/liblapack/SRC/stpmqrt.f @@ -0,0 +1,368 @@ +*> \brief \b STPMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. +* REAL V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPMQRT applies a real orthogonal matrix Q obtained from a +*> "triangular-pentagonal" real block reflector H to a general +*> real matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q^T from the Left; +*> = 'R': apply Q or Q^T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q^T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CTPQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTPQRT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CTPQRT, stored as a NB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q^T*C or C*Q or C*Q^T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q^T*C or C*Q or C*Q^T. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] +*> [V2]. +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='T' and SIDE='L', C is on exit replaced with Q^T * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='T' and SIDE='R', C is on exit replaced with C * Q^T. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. + REAL V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, MB, LB, KF, LDAQ, LDVQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STPRFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'T' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDVQ = MAX( 1, M ) + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDVQ = MAX( 1, N ) + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.LDVQ ) THEN + INFO = -9 + ELSE IF( LDT.LT.NB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL STPRFB( 'L', 'T', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL STPRFB( 'R', 'N', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL STPRFB( 'L', 'N', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL STPRFB( 'R', 'T', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of STPMQRT +* + END diff --git a/dspl/liblapack/SRC/stpqrt.f b/dspl/liblapack/SRC/stpqrt.f new file mode 100644 index 0000000..815d83a --- /dev/null +++ b/dspl/liblapack/SRC/stpqrt.f @@ -0,0 +1,270 @@ +*> \brief \b STPQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPQRT computes a blocked QR factorization of a real +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of the +*> triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(N/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, MB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL STPQRT2, STPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. (NB.GT.N .AND. N.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, N, NB +* +* Compute the QR factorization of the current block +* + IB = MIN( N-I+1, NB ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF +* + CALL STPQRT2( MB, IB, LB, A(I,I), LDA, B( 1, I ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H^H to B(:,I+IB:N) from the left +* + IF( I+IB.LE.N ) THEN + CALL STPRFB( 'L', 'T', 'F', 'C', MB, N-I-IB+1, IB, LB, + $ B( 1, I ), LDB, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, B( 1, I+IB ), LDB, + $ WORK, IB ) + END IF + END DO + RETURN +* +* End of STPQRT +* + END diff --git a/dspl/liblapack/SRC/stpqrt2.f b/dspl/liblapack/SRC/stpqrt2.f new file mode 100644 index 0000000..f235712 --- /dev/null +++ b/dspl/liblapack/SRC/stpqrt2.f @@ -0,0 +1,302 @@ +*> \brief \b STPQRT2 computes a QR factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPQRT2 computes a QR factorization of a real "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W * T * W^H +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + REAL ALPHA +* .. +* .. External Subroutines .. + EXTERNAL SLARFG, SGEMV, SGER, STRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPQRT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, N +* +* Generate elementary reflector H(I) to annihilate B(:,I) +* + P = M-L+MIN( L, I ) + CALL SLARFG( P+1, A( I, I ), B( 1, I ), 1, T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* W(1:N-I) := C(I:M,I+1:N)^H * C(I:M,I) [use W = T(:,N)] +* + DO J = 1, N-I + T( J, N ) = (A( I, I+J )) + END DO + CALL SGEMV( 'T', P, N-I, ONE, B( 1, I+1 ), LDB, + $ B( 1, I ), 1, ONE, T( 1, N ), 1 ) +* +* C(I:M,I+1:N) = C(I:m,I+1:N) + alpha*C(I:M,I)*W(1:N-1)^H +* + ALPHA = -(T( I, 1 )) + DO J = 1, N-I + A( I, I+J ) = A( I, I+J ) + ALPHA*(T( J, N )) + END DO + CALL SGER( P, N-I, ALPHA, B( 1, I ), 1, + $ T( 1, N ), 1, B( 1, I+1 ), LDB ) + END IF + END DO +* + DO I = 2, N +* +* T(1:I-1,I) := C(I:M,1:I-1)^H * (alpha * C(I:M,I)) +* + ALPHA = -T( I, 1 ) + + DO J = 1, I-1 + T( J, I ) = ZERO + END DO + P = MIN( I-1, L ) + MP = MIN( M-L+1, M ) + NP = MIN( P+1, N ) +* +* Triangular part of B2 +* + DO J = 1, P + T( J, I ) = ALPHA*B( M-L+J, I ) + END DO + CALL STRMV( 'U', 'T', 'N', P, B( MP, 1 ), LDB, + $ T( 1, I ), 1 ) +* +* Rectangular part of B2 +* + CALL SGEMV( 'T', L, I-1-P, ALPHA, B( MP, NP ), LDB, + $ B( MP, I ), 1, ZERO, T( NP, I ), 1 ) +* +* B1 +* + CALL SGEMV( 'T', M-L, I-1, ALPHA, B, LDB, B( 1, I ), 1, + $ ONE, T( 1, I ), 1 ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL STRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1 ) = ZERO + END DO + +* +* End of STPQRT2 +* + END diff --git a/dspl/liblapack/SRC/stprfb.f b/dspl/liblapack/SRC/stprfb.f new file mode 100644 index 0000000..66e6725 --- /dev/null +++ b/dspl/liblapack/SRC/stprfb.f @@ -0,0 +1,811 @@ +*> \brief \b STPRFB applies a real or complex "triangular-pentagonal" blocked reflector to a real or complex matrix, which is composed of two blocks. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPRFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, +* V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPRFB applies a real "triangular-pentagonal" block reflector H or its +*> conjugate transpose H^H to a real matrix C, which is composed of two +*> blocks A and B, either from the left or right. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H^H from the Left +*> = 'R': apply H or H^H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H^H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columns +*> = 'R': Rows +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T, i.e. the number of elementary +*> reflectors whose product defines the block reflector. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is REAL array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The pentagonal matrix V, which contains the elementary reflectors +*> H(1), H(2), ..., H(K). See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension +*> (LDA,N) if SIDE = 'L' or (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> H*C or H^H*C or C*H or C*H^H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> H*C or H^H*C or C*H or C*H^H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension +*> (LDWORK,N) if SIDE = 'L', +*> (LDWORK,K) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= K; +*> if SIDE = 'R', LDWORK >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix C is a composite matrix formed from blocks A and B. +*> The block B is of size M-by-N; if SIDE = 'R', A is of size M-by-K, +*> and if SIDE = 'L', A is of size K-by-N. +*> +*> If SIDE = 'R' and DIRECT = 'F', C = [A B]. +*> +*> If SIDE = 'L' and DIRECT = 'F', C = [A] +*> [B]. +*> +*> If SIDE = 'R' and DIRECT = 'B', C = [B A]. +*> +*> If SIDE = 'L' and DIRECT = 'B', C = [B] +*> [A]. +*> +*> The pentagonal matrix V is composed of a rectangular block V1 and a +*> trapezoidal block V2. The size of the trapezoidal block is determined by +*> the parameter L, where 0<=L<=K. If L=K, the V2 block of V is triangular; +*> if L=0, there is no trapezoidal block, thus V = V1 is rectangular. +*> +*> If DIRECT = 'F' and STOREV = 'C': V = [V1] +*> [V2] +*> - V2 is upper trapezoidal (first L rows of K-by-K upper triangular) +*> +*> If DIRECT = 'F' and STOREV = 'R': V = [V1 V2] +*> +*> - V2 is lower trapezoidal (first L columns of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'C': V = [V2] +*> [V1] +*> - V2 is lower trapezoidal (last L rows of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'R': V = [V2 V1] +*> +*> - V2 is upper trapezoidal (last L columns of K-by-K upper triangular) +*> +*> If STOREV = 'C' and SIDE = 'L', V is M-by-K with V2 L-by-K. +*> +*> If STOREV = 'C' and SIDE = 'R', V is N-by-K with V2 L-by-K. +*> +*> If STOREV = 'R' and SIDE = 'L', V is K-by-M with V2 K-by-L. +*> +*> If STOREV = 'R' and SIDE = 'R', V is K-by-N with V2 K-by-L. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, + $ V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* ========================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0, ZERO = 0.0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, MP, NP, KP + LOGICAL LEFT, FORWARD, COLUMN, RIGHT, BACKWARD, ROW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SGEMM, STRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 .OR. L.LT.0 ) RETURN +* + IF( LSAME( STOREV, 'C' ) ) THEN + COLUMN = .TRUE. + ROW = .FALSE. + ELSE IF ( LSAME( STOREV, 'R' ) ) THEN + COLUMN = .FALSE. + ROW = .TRUE. + ELSE + COLUMN = .FALSE. + ROW = .FALSE. + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN + LEFT = .TRUE. + RIGHT = .FALSE. + ELSE IF( LSAME( SIDE, 'R' ) ) THEN + LEFT = .FALSE. + RIGHT = .TRUE. + ELSE + LEFT = .FALSE. + RIGHT = .FALSE. + END IF +* + IF( LSAME( DIRECT, 'F' ) ) THEN + FORWARD = .TRUE. + BACKWARD = .FALSE. + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + FORWARD = .FALSE. + BACKWARD = .TRUE. + ELSE + FORWARD = .FALSE. + BACKWARD = .FALSE. + END IF +* +* --------------------------------------------------------------------------- +* + IF( COLUMN .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (M-by-K) +* +* Form H C or H^H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W T W^H or H^H = I - W T^H W^H +* +* A = A - T (A + V^H B) or A = A - T^H (A + V^H B) +* B = B - V T (A + V^H B) or B = B - V T^H (A + V^H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL STRMM( 'L', 'U', 'T', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + CALL SGEMM( 'T', 'N', L, N, M-L, ONE, V, LDV, B, LDB, + $ ONE, WORK, LDWORK ) + CALL SGEMM( 'T', 'N', K-L, N, M, ONE, V( 1, KP ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL SGEMM( 'N', 'N', L, N, K-L, -ONE, V( MP, KP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL STRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (N-by-K) +* +* Form C H or C H^H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W T W^H or H^H = I - W T^H W^H +* +* A = A - (A + B V) T or A = A - (A + B V) T^H +* B = B - (A + B V) T V^H or B = B - (A + B V) T^H V^H +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL STRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + CALL SGEMM( 'N', 'N', M, L, N-L, ONE, B, LDB, + $ V, LDV, ONE, WORK, LDWORK ) + CALL SGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V( 1, KP ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'T', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL SGEMM( 'N', 'T', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( NP, KP ), LDV, ONE, B( 1, NP ), LDB ) + CALL STRMM( 'R', 'U', 'T', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (M-by-K) +* [ I ] (K-by-K) +* +* Form H C or H^H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W T W^H or H^H = I - W T^H W^H +* +* A = A - T (A + V^H B) or A = A - T^H (A + V^H B) +* B = B - V T (A + V^H B) or B = B - V T^H (A + V^H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO +* + CALL STRMM( 'L', 'L', 'T', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL SGEMM( 'T', 'N', L, N, M-L, ONE, V( MP, KP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL SGEMM( 'T', 'N', K-L, N, M, ONE, V, LDV, + $ B, LDB, ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'L', 'L', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'N', M-L, N, K, -ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL SGEMM( 'N', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL STRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (N-by-K) +* [ I ] (K-by-K) +* +* Form C H or C H^H where C = [ B A ] (B is M-by-N, A is M-by-K) +* +* H = I - W T W^H or H^H = I - W T^H W^H +* +* A = A - (A + B V) T or A = A - (A + B V) T^H +* B = B - (A + B V) T V^H or B = B - (A + B V) T^H V^H +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL STRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL SGEMM( 'N', 'N', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( NP, KP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL SGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V, LDV, ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'T', M, N-L, K, -ONE, WORK, LDWORK, + $ V( NP, 1 ), LDV, ONE, B( 1, NP ), LDB ) + CALL SGEMM( 'N', 'T', M, L, K-L, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL STRMM( 'R', 'L', 'T', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H^H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W^H T W or H^H = I - W^H T^H W +* +* A = A - T (A + V B) or A = A - T^H (A + V B) +* B = B - V^H T (A + V B) or B = B - V^H T^H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL STRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDB ) + CALL SGEMM( 'N', 'N', L, N, M-L, ONE, V, LDV,B, LDB, + $ ONE, WORK, LDWORK ) + CALL SGEMM( 'N', 'N', K-L, N, M, ONE, V( KP, 1 ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'T', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL SGEMM( 'T', 'N', L, N, K-L, -ONE, V( KP, MP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL STRMM( 'L', 'L', 'T', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H^H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W^H T W or H^H = I - W^H T^H W +* +* A = A - (A + B V^H) T or A = A - (A + B V^H) T^H +* B = B - (A + B V^H) T V or B = B - (A + B V^H) T^H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL STRMM( 'R', 'L', 'T', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + CALL SGEMM( 'N', 'T', M, L, N-L, ONE, B, LDB, V, LDV, + $ ONE, WORK, LDWORK ) + CALL SGEMM( 'N', 'T', M, K-L, N, ONE, B, LDB, + $ V( KP, 1 ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL SGEMM( 'N', 'N', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( KP, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL STRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H^H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W^H T W or H^H = I - W^H T^H W +* +* A = A - T (A + V B) or A = A - T^H (A + V B) +* B = B - V^H T (A + V B) or B = B - V^H T^H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO + CALL STRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL SGEMM( 'N', 'N', L, N, M-L, ONE, V( KP, MP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL SGEMM( 'N', 'N', K-L, N, M, ONE, V, LDV, B, LDB, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'L', 'L ', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'T', 'N', M-L, N, K, -ONE, V( 1, MP ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL SGEMM( 'T', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL STRMM( 'L', 'U', 'T', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H^H where C = [ B A ] (A is M-by-K, B is M-by-N) +* +* H = I - W^H T W or H^H = I - W^H T^H W +* +* A = A - (A + B V^H) T or A = A - (A + B V^H) T^H +* B = B - (A + B V^H) T V or B = B - (A + B V^H) T^H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL STRMM( 'R', 'U', 'T', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL SGEMM( 'N', 'T', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( KP, NP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL SGEMM( 'N', 'T', M, K-L, N, ONE, B, LDB, V, LDV, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL STRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL SGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V( 1, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL SGEMM( 'N', 'N', M, L, K-L , -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL STRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* + END IF +* + RETURN +* +* End of STPRFB +* + END diff --git a/dspl/liblapack/SRC/stprfs.f b/dspl/liblapack/SRC/stprfs.f new file mode 100644 index 0000000..76ae10d --- /dev/null +++ b/dspl/liblapack/SRC/stprfs.f @@ -0,0 +1,473 @@ +*> \brief \b STPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, +* FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL AP( * ), B( LDB, * ), BERR( * ), FERR( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular packed +*> coefficient matrix. +*> +*> The solution matrix X must be computed by STPTRS or some other +*> means before entering this routine. STPRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL AP( * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, KC, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, STPMV, STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL STPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STPRFS +* + END diff --git a/dspl/liblapack/SRC/stptri.f b/dspl/liblapack/SRC/stptri.f new file mode 100644 index 0000000..3acc709 --- /dev/null +++ b/dspl/liblapack/SRC/stptri.f @@ -0,0 +1,241 @@ +*> \brief \b STPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPTRI( UPLO, DIAG, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPTRI computes the inverse of a real upper or lower triangular +*> matrix A stored in packed format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangular matrix A, stored +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same packed storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A triangular matrix A can be transferred to packed storage using one +*> of the following program segments: +*> +*> UPLO = 'U': UPLO = 'L': +*> +*> JC = 1 JC = 1 +*> DO 2 J = 1, N DO 2 J = 1, N +*> DO 1 I = 1, J DO 1 I = J, N +*> AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +*> 1 CONTINUE 1 CONTINUE +*> JC = JC + J JC = JC + N - J + 1 +*> 2 CONTINUE 2 CONTINUE +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, STPMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL STPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL SSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL STPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL SSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of STPTRI +* + END diff --git a/dspl/liblapack/SRC/stptrs.f b/dspl/liblapack/SRC/stptrs.f new file mode 100644 index 0000000..fc422c5 --- /dev/null +++ b/dspl/liblapack/SRC/stptrs.f @@ -0,0 +1,228 @@ +*> \brief \b STPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular matrix of order N stored in packed format, +*> and B is an N-by-NRHS matrix. A check is made to verify that A is +*> nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STPSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b or A**T * x = b. +* + DO 30 J = 1, NRHS + CALL STPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of STPTRS +* + END diff --git a/dspl/liblapack/SRC/stpttf.f b/dspl/liblapack/SRC/stpttf.f new file mode 100644 index 0000000..230be17 --- /dev/null +++ b/dspl/liblapack/SRC/stpttf.f @@ -0,0 +1,502 @@ +*> \brief \b STPTTF copies a triangular matrix from the standard packed format (TP) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* REAL AP( 0: * ), ARF( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPTTF copies a triangular matrix A from standard packed format (TP) +*> to rectangular full packed format (TF). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal format is wanted; +*> = 'T': ARF in Conjugate-transpose format is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is REAL array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + REAL AP( 0: * ), ARF( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = AP( 0 ) + ELSE + ARF( 0 ) = AP( 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of STPTTF +* + END diff --git a/dspl/liblapack/SRC/stpttr.f b/dspl/liblapack/SRC/stpttr.f new file mode 100644 index 0000000..a58f7f0 --- /dev/null +++ b/dspl/liblapack/SRC/stpttr.f @@ -0,0 +1,176 @@ +*> \brief \b STPTTR copies a triangular matrix from the standard packed format (TP) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STPTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STPTTR copies a triangular matrix A from standard packed format (TP) +*> to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is REAL array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is REAL array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STPTTR', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + END IF +* +* + RETURN +* +* End of STPTTR +* + END diff --git a/dspl/liblapack/SRC/strcon.f b/dspl/liblapack/SRC/strcon.f new file mode 100644 index 0000000..ec92b53 --- /dev/null +++ b/dspl/liblapack/SRC/strcon.f @@ -0,0 +1,276 @@ +*> \brief \b STRCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, LDA, N +* REAL RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRCON estimates the reciprocal of the condition number of a +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is REAL +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + REAL RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + REAL AINVNM, ANORM, SCALE, SMLNUM, XNORM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SLAMCH, SLANTR + EXTERNAL LSAME, ISAMAX, SLAMCH, SLANTR +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLATRS, SRSCL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = SLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL SLACN2( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL SLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, WORK( 2*N+1 ), INFO ) + ELSE +* +* Multiply by inv(A**T). +* + CALL SLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA, + $ WORK, SCALE, WORK( 2*N+1 ), INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = ISAMAX( N, WORK, 1 ) + XNORM = ABS( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL SRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of STRCON +* + END diff --git a/dspl/liblapack/SRC/strevc.f b/dspl/liblapack/SRC/strevc.f new file mode 100644 index 0000000..37513a3 --- /dev/null +++ b/dspl/liblapack/SRC/strevc.f @@ -0,0 +1,1077 @@ +*> \brief \b STREVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STREVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, MM, M, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STREVC computes some or all of the right and/or left eigenvectors of +*> a real upper quasi-triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal blocks of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the orthogonal factor that reduces a matrix +*> A to Schur form T, then Q*X and Q*Y are the matrices of right and +*> left eigenvectors of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> If w(j) is a real eigenvalue, the corresponding real +*> eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector is +*> computed if either SELECT(j) or SELECT(j+1) is .TRUE., and +*> on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to +*> .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The upper quasi-triangular matrix T in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by SHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by SHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected real eigenvector occupies one column and each +*> selected complex eigenvector occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2 + REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLABAD, SLALN2, SSCAL, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + REAL X( 2, 2 ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* + N2 = 2*N +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IP = 0 + IS = M + DO 140 KI = N, 1, -1 +* + IF( IP.EQ.1 ) + $ GO TO 130 + IF( KI.EQ.1 ) + $ GO TO 40 + IF( T( KI, KI-1 ).EQ.ZERO ) + $ GO TO 40 + IP = -1 +* + 40 CONTINUE + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 130 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real right eigenvector +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 50 K = 1, KI - 1 + WORK( K+N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve the upper quasi-triangular system: +* (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 ) +* + II = ISAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL SGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI+N ), + $ VR( 1, KI ), 1 ) +* + II = ISAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* + ELSE +* +* Complex right eigenvector. +* +* Initial solve +* [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0. +* [ (T(KI,KI-1) T(KI,KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1+N ) = ONE + WORK( KI+N2 ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1+N ) = -WI / T( KI, KI-1 ) + WORK( KI+N2 ) = ONE + END IF + WORK( KI+N ) = ZERO + WORK( KI-1+N2 ) = ZERO +* +* Form right-hand side +* + DO 80 K = 1, KI - 2 + WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 ) + WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, WI, + $ X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+N ), N, WR, WI, X, 2, SCALE, + $ XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 ) + END IF + WORK( J-1+N ) = X( 1, 1 ) + WORK( J+N ) = X( 2, 1 ) + WORK( J-1+N2 ) = X( 1, 2 ) + WORK( J+N2 ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+N ), 1 ) + CALL SAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+N2 ), 1 ) + CALL SAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+N2 ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 ) + CALL SCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE +* + REMAX = ONE / EMAX + CALL SSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE +* + IF( KI.GT.2 ) THEN + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N ), 1, WORK( KI-1+N ), + $ VR( 1, KI-1 ), 1 ) + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1+N2 ), 1, WORK( KI+N2 ), + $ VR( 1, KI ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 ) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF + END IF +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 130 CONTINUE + IF( IP.EQ.1 ) + $ IP = 0 + IF( IP.EQ.-1 ) + $ IP = 1 + 140 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IP = 0 + IS = 1 + DO 260 KI = 1, N +* + IF( IP.EQ.-1 ) + $ GO TO 250 + IF( KI.EQ.N ) + $ GO TO 150 + IF( T( KI+1, KI ).EQ.ZERO ) + $ GO TO 150 + IP = 1 +* + 150 CONTINUE + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 250 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* Real left eigenvector. +* + WORK( KI+N ) = ONE +* +* Form right-hand side +* + DO 160 K = KI + 1, N + WORK( K+N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve the quasi-triangular system: +* (T(KI+1:N,KI+1:N) - WR)**T*X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve (T(J,J)-WR)**T*X = WORK +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+N ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+N ), 1 ) +* +* Solve +* [T(J,J)-WR T(J,J+1) ]**T* X = SCALE*( WORK1 ) +* [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + WORK( J+N ) = X( 1, 1 ) + WORK( J+1+N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+1+N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) +* + II = ISAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE +* + IF( KI.LT.N ) + $ CALL SGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) +* + II = ISAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + END IF +* + ELSE +* +* Complex left eigenvector. +* +* Initial solve: +* ((T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI))*X = 0. +* ((T(KI+1,KI) T(KI+1,KI+1)) ) +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI+N ) = WI / T( KI, KI+1 ) + WORK( KI+1+N2 ) = ONE + ELSE + WORK( KI+N ) = ONE + WORK( KI+1+N2 ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1+N ) = ZERO + WORK( KI+N2 ) = ZERO +* +* Form right-hand side +* + DO 190 K = KI + 2, N + WORK( K+N ) = -WORK( KI+N )*T( KI, K ) + WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K ) + 190 CONTINUE +* +* Solve complex quasi-triangular system: +* ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+N ) ), + $ ABS( WORK( J+N2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+N ) = WORK( J+N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+N2 ) = WORK( J+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+N2 ), 1 ) +* + WORK( J+1+N ) = WORK( J+1+N ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N ), 1 ) +* + WORK( J+1+N2 ) = WORK( J+1+N2 ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+N2 ), 1 ) +* +* Solve 2-by-2 complex linear equation +* ([T(j,j) T(j,j+1) ]**T-(wr-i*wi)*I)*X = SCALE*B +* ([T(j+1,j) T(j+1,j+1)] ) +* + CALL SLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 ) + CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 ) + END IF + WORK( J+N ) = X( 1, 1 ) + WORK( J+N2 ) = X( 1, 2 ) + WORK( J+1+N ) = X( 2, 1 ) + WORK( J+1+N2 ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 ) + CALL SCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ), + $ 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE + ELSE + IF( KI.LT.N-1 ) THEN + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N ), 1, WORK( KI+N ), + $ VL( 1, KI ), 1 ) + CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ), + $ LDVL, WORK( KI+2+N2 ), 1, + $ WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + ELSE + CALL SSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 ) + CALL SSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 ) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL SSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + END IF +* + END IF +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 250 CONTINUE + IF( IP.EQ.-1 ) + $ IP = 0 + IF( IP.EQ.1 ) + $ IP = -1 +* + 260 CONTINUE +* + END IF +* + RETURN +* +* End of STREVC +* + END diff --git a/dspl/liblapack/SRC/strevc3.f b/dspl/liblapack/SRC/strevc3.f new file mode 100644 index 0000000..0df1189 --- /dev/null +++ b/dspl/liblapack/SRC/strevc3.f @@ -0,0 +1,1304 @@ +*> \brief \b STREVC3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STREVC3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, +* VR, LDVR, MM, M, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STREVC3 computes some or all of the right and/or left eigenvectors of +*> a real upper quasi-triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a real general matrix: A = Q*T*Q**T, as computed by SHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal blocks of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the orthogonal factor that reduces a matrix +*> A to Schur form T, then Q*X and Q*Y are the matrices of right and +*> left eigenvectors of A. +*> +*> This uses a Level 3 BLAS version of the back transformation. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in,out] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> If w(j) is a real eigenvalue, the corresponding real +*> eigenvector is computed if SELECT(j) is .TRUE.. +*> If w(j) and w(j+1) are the real and imaginary parts of a +*> complex eigenvalue, the corresponding complex eigenvector is +*> computed if either SELECT(j) or SELECT(j+1) is .TRUE., and +*> on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to +*> .FALSE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The upper quasi-triangular matrix T in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by SHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part, and the second the imaginary part. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1, and if SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the orthogonal matrix Q +*> of Schur vectors returned by SHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> A complex eigenvector corresponding to a complex eigenvalue +*> is stored in two consecutive columns, the first holding the +*> real part and the second the imaginary part. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1, and if SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected real eigenvector occupies one column and each +*> selected complex eigenvector occupies two columns. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. LWORK >= max(1,3*N). +*> For optimum performance, LWORK >= N + 2*N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* @generated from dtrevc3.f, fortran d -> s, Tue Apr 19 01:47:44 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, + $ VR, LDVR, MM, M, WORK, LWORK, INFO ) + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) + INTEGER NBMIN, NBMAX + PARAMETER ( NBMIN = 8, NBMAX = 128 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, PAIR, + $ RIGHTV, SOMEV + INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, + $ IV, MAXWRK, NB, KI2 + REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, + $ XNORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ISAMAX, ILAENV + REAL SDOT, SLAMCH + EXTERNAL LSAME, ISAMAX, ILAENV, SDOT, SLAMCH +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SGEMV, SLALN2, SSCAL, XERBLA, + $ SLACPY, SGEMM, SLABAD, SLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Local Arrays .. + REAL X( 2, 2 ) + INTEGER ISCOMPLEX( NBMAX ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + NB = ILAENV( 1, 'STREVC', SIDE // HOWMNY, N, -1, -1, -1 ) + MAXWRK = N + 2*N*NB + WORK(1) = MAXWRK + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE +* +* Set M to the number of columns required to store the selected +* eigenvectors, standardize the array SELECT if necessary, and +* test MM. +* + IF( SOMEV ) THEN + M = 0 + PAIR = .FALSE. + DO 10 J = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + SELECT( J ) = .FALSE. + ELSE + IF( J.LT.N ) THEN + IF( T( J+1, J ).EQ.ZERO ) THEN + IF( SELECT( J ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN + SELECT( J ) = .TRUE. + M = M + 2 + END IF + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -11 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREVC3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Use blocked version of back-transformation if sufficient workspace. +* Zero-out the workspace to avoid potential NaN propagation. +* + IF( OVER .AND. LWORK .GE. N + 2*N*NBMIN ) THEN + NB = (LWORK - N) / (2*N) + NB = MIN( NB, NBMAX ) + CALL SLASET( 'F', N, 1+2*NB, ZERO, ZERO, WORK, N ) + ELSE + NB = 1 + END IF +* +* Set the constants to control overflow. +* + UNFL = SLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL SLABAD( UNFL, OVFL ) + ULP = SLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) + BIGNUM = ( ONE-ULP ) / SMLNUM +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + WORK( 1 ) = ZERO + DO 30 J = 2, N + WORK( J ) = ZERO + DO 20 I = 1, J - 1 + WORK( J ) = WORK( J ) + ABS( T( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* +* Index IP is used to specify the real or complex eigenvalue: +* IP = 0, real eigenvalue, +* 1, first of conjugate complex pair: (wr,wi) +* -1, second of conjugate complex pair: (wr,wi) +* ISCOMPLEX array stores IP for each column in current block. +* + IF( RIGHTV ) THEN +* +* ============================================================ +* Compute right eigenvectors. +* +* IV is index of column in current block. +* For complex right vector, uses IV-1 for real part and IV for complex part. +* Non-blocked version always uses IV=2; +* blocked version starts with IV=NB, goes down to 1 or 2. +* (Note the "0-th" column is used for 1-norms computed above.) + IV = 2 + IF( NB.GT.2 ) THEN + IV = NB + END IF + + IP = 0 + IS = M + DO 140 KI = N, 1, -1 + IF( IP.EQ.-1 ) THEN +* previous iteration (ki+1) was second of conjugate pair, +* so this ki is first of conjugate pair; skip to end of loop + IP = 1 + GO TO 140 + ELSE IF( KI.EQ.1 ) THEN +* last column, so this ki must be real eigenvalue + IP = 0 + ELSE IF( T( KI, KI-1 ).EQ.ZERO ) THEN +* zero on sub-diagonal, so this ki is real eigenvalue + IP = 0 + ELSE +* non-zero on sub-diagonal, so this ki is second of conjugate pair + IP = -1 + END IF + + IF( SOMEV ) THEN + IF( IP.EQ.0 ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 140 + ELSE + IF( .NOT.SELECT( KI-1 ) ) + $ GO TO 140 + END IF + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI-1 ) ) )* + $ SQRT( ABS( T( KI-1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* -------------------------------------------------------- +* Real right eigenvector +* + WORK( KI + IV*N ) = ONE +* +* Form right-hand side. +* + DO 50 K = 1, KI - 1 + WORK( K + IV*N ) = -T( K, KI ) + 50 CONTINUE +* +* Solve upper quasi-triangular system: +* [ T(1:KI-1,1:KI-1) - WR ]*X = SCALE*WORK. +* + JNXT = KI - 1 + DO 60 J = KI - 1, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 60 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+IV*N ), 1 ) + WORK( J+IV*N ) = X( 1, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+IV*N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+IV*N ), N, WR, ZERO, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(2,1) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 2, 1 ) = X( 2, 1 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( KI, SCALE, WORK( 1+IV*N ), 1 ) + WORK( J-1+IV*N ) = X( 1, 1 ) + WORK( J +IV*N ) = X( 2, 1 ) +* +* Update right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+IV*N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+IV*N ), 1 ) + END IF + 60 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL SCOPY( KI, WORK( 1 + IV*N ), 1, VR( 1, IS ), 1 ) +* + II = ISAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / ABS( VR( II, IS ) ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 70 K = KI + 1, N + VR( K, IS ) = ZERO + 70 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.1 ) + $ CALL SGEMV( 'N', N, KI-1, ONE, VR, LDVR, + $ WORK( 1 + IV*N ), 1, WORK( KI + IV*N ), + $ VR( 1, KI ), 1 ) +* + II = ISAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / ABS( VR( II, KI ) ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + IV*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP +* back-transform and normalization is done below + END IF + ELSE +* +* -------------------------------------------------------- +* Complex right eigenvector. +* +* Initial solve +* [ ( T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I*WI) ]*X = 0. +* [ ( T(KI, KI-1) T(KI, KI) ) ] +* + IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN + WORK( KI-1 + (IV-1)*N ) = ONE + WORK( KI + (IV )*N ) = WI / T( KI-1, KI ) + ELSE + WORK( KI-1 + (IV-1)*N ) = -WI / T( KI, KI-1 ) + WORK( KI + (IV )*N ) = ONE + END IF + WORK( KI + (IV-1)*N ) = ZERO + WORK( KI-1 + (IV )*N ) = ZERO +* +* Form right-hand side. +* + DO 80 K = 1, KI - 2 + WORK( K+(IV-1)*N ) = -WORK( KI-1+(IV-1)*N )*T(K,KI-1) + WORK( K+(IV )*N ) = -WORK( KI +(IV )*N )*T(K,KI ) + 80 CONTINUE +* +* Solve upper quasi-triangular system: +* [ T(1:KI-2,1:KI-2) - (WR+i*WI) ]*X = SCALE*(WORK+i*WORK2) +* + JNXT = KI - 2 + DO 90 J = KI - 2, 1, -1 + IF( J.GT.JNXT ) + $ GO TO 90 + J1 = J + J2 = J + JNXT = J - 1 + IF( J.GT.1 ) THEN + IF( T( J, J-1 ).NE.ZERO ) THEN + J1 = J - 1 + JNXT = J - 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+(IV-1)*N ), N, + $ WR, WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale X(1,1) and X(1,2) to avoid overflow when +* updating the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + IF( WORK( J ).GT.BIGNUM / XNORM ) THEN + X( 1, 1 ) = X( 1, 1 ) / XNORM + X( 1, 2 ) = X( 1, 2 ) / XNORM + SCALE = SCALE / XNORM + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+(IV-1)*N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+(IV )*N ), 1 ) + END IF + WORK( J+(IV-1)*N ) = X( 1, 1 ) + WORK( J+(IV )*N ) = X( 1, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL SAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1, + $ WORK( 1+(IV )*N ), 1 ) +* + ELSE +* +* 2-by-2 diagonal block +* + CALL SLALN2( .FALSE., 2, 2, SMIN, ONE, + $ T( J-1, J-1 ), LDT, ONE, ONE, + $ WORK( J-1+(IV-1)*N ), N, WR, WI, X, 2, + $ SCALE, XNORM, IERR ) +* +* Scale X to avoid overflow when updating +* the right-hand side. +* + IF( XNORM.GT.ONE ) THEN + BETA = MAX( WORK( J-1 ), WORK( J ) ) + IF( BETA.GT.BIGNUM / XNORM ) THEN + REC = ONE / XNORM + X( 1, 1 ) = X( 1, 1 )*REC + X( 1, 2 ) = X( 1, 2 )*REC + X( 2, 1 ) = X( 2, 1 )*REC + X( 2, 2 ) = X( 2, 2 )*REC + SCALE = SCALE*REC + END IF + END IF +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( KI, SCALE, WORK( 1+(IV-1)*N ), 1 ) + CALL SSCAL( KI, SCALE, WORK( 1+(IV )*N ), 1 ) + END IF + WORK( J-1+(IV-1)*N ) = X( 1, 1 ) + WORK( J +(IV-1)*N ) = X( 2, 1 ) + WORK( J-1+(IV )*N ) = X( 1, 2 ) + WORK( J +(IV )*N ) = X( 2, 2 ) +* +* Update the right-hand side +* + CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1, + $ WORK( 1+(IV-1)*N ), 1 ) + CALL SAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1, + $ WORK( 1+(IV )*N ), 1 ) + CALL SAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1, + $ WORK( 1+(IV )*N ), 1 ) + END IF + 90 CONTINUE +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL SCOPY( KI, WORK( 1+(IV-1)*N ), 1, VR(1,IS-1), 1 ) + CALL SCOPY( KI, WORK( 1+(IV )*N ), 1, VR(1,IS ), 1 ) +* + EMAX = ZERO + DO 100 K = 1, KI + EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+ + $ ABS( VR( K, IS ) ) ) + 100 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( KI, REMAX, VR( 1, IS-1 ), 1 ) + CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 110 K = KI + 1, N + VR( K, IS-1 ) = ZERO + VR( K, IS ) = ZERO + 110 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.2 ) THEN + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1 + (IV-1)*N ), 1, + $ WORK( KI-1 + (IV-1)*N ), VR(1,KI-1), 1) + CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR, + $ WORK( 1 + (IV)*N ), 1, + $ WORK( KI + (IV)*N ), VR( 1, KI ), 1 ) + ELSE + CALL SSCAL( N, WORK(KI-1+(IV-1)*N), VR(1,KI-1), 1) + CALL SSCAL( N, WORK(KI +(IV )*N), VR(1,KI ), 1) + END IF +* + EMAX = ZERO + DO 120 K = 1, N + EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+ + $ ABS( VR( K, KI ) ) ) + 120 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VR( 1, KI-1 ), 1 ) + CALL SSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + (IV-1)*N ) = ZERO + WORK( K + (IV )*N ) = ZERO + END DO + ISCOMPLEX( IV-1 ) = -IP + ISCOMPLEX( IV ) = IP + IV = IV - 1 +* back-transform and normalization is done below + END IF + END IF + + IF( NB.GT.1 ) THEN +* -------------------------------------------------------- +* Blocked version of back-transform +* For complex case, KI2 includes both vectors (KI-1 and KI) + IF( IP.EQ.0 ) THEN + KI2 = KI + ELSE + KI2 = KI - 1 + END IF + +* Columns IV:NB of work are valid vectors. +* When the number of vectors stored reaches NB-1 or NB, +* or if this was last vector, do the GEMM + IF( (IV.LE.2) .OR. (KI2.EQ.1) ) THEN + CALL SGEMM( 'N', 'N', N, NB-IV+1, KI2+NB-IV, ONE, + $ VR, LDVR, + $ WORK( 1 + (IV)*N ), N, + $ ZERO, + $ WORK( 1 + (NB+IV)*N ), N ) +* normalize vectors + DO K = IV, NB + IF( ISCOMPLEX(K).EQ.0 ) THEN +* real eigenvector + II = ISAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / ABS( WORK( II + (NB+K)*N ) ) + ELSE IF( ISCOMPLEX(K).EQ.1 ) THEN +* first eigenvector of conjugate pair + EMAX = ZERO + DO II = 1, N + EMAX = MAX( EMAX, + $ ABS( WORK( II + (NB+K )*N ) )+ + $ ABS( WORK( II + (NB+K+1)*N ) ) ) + END DO + REMAX = ONE / EMAX +* else if ISCOMPLEX(K).EQ.-1 +* second eigenvector of conjugate pair +* reuse same REMAX as previous K + END IF + CALL SSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL SLACPY( 'F', N, NB-IV+1, + $ WORK( 1 + (NB+IV)*N ), N, + $ VR( 1, KI2 ), LDVR ) + IV = NB + ELSE + IV = IV - 1 + END IF + END IF ! blocked back-transform +* + IS = IS - 1 + IF( IP.NE.0 ) + $ IS = IS - 1 + 140 CONTINUE + END IF + + IF( LEFTV ) THEN +* +* ============================================================ +* Compute left eigenvectors. +* +* IV is index of column in current block. +* For complex left vector, uses IV for real part and IV+1 for complex part. +* Non-blocked version always uses IV=1; +* blocked version starts with IV=1, goes up to NB-1 or NB. +* (Note the "0-th" column is used for 1-norms computed above.) + IV = 1 + IP = 0 + IS = 1 + DO 260 KI = 1, N + IF( IP.EQ.1 ) THEN +* previous iteration (ki-1) was first of conjugate pair, +* so this ki is second of conjugate pair; skip to end of loop + IP = -1 + GO TO 260 + ELSE IF( KI.EQ.N ) THEN +* last column, so this ki must be real eigenvalue + IP = 0 + ELSE IF( T( KI+1, KI ).EQ.ZERO ) THEN +* zero on sub-diagonal, so this ki is real eigenvalue + IP = 0 + ELSE +* non-zero on sub-diagonal, so this ki is first of conjugate pair + IP = 1 + END IF +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 260 + END IF +* +* Compute the KI-th eigenvalue (WR,WI). +* + WR = T( KI, KI ) + WI = ZERO + IF( IP.NE.0 ) + $ WI = SQRT( ABS( T( KI, KI+1 ) ) )* + $ SQRT( ABS( T( KI+1, KI ) ) ) + SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM ) +* + IF( IP.EQ.0 ) THEN +* +* -------------------------------------------------------- +* Real left eigenvector +* + WORK( KI + IV*N ) = ONE +* +* Form right-hand side. +* + DO 160 K = KI + 1, N + WORK( K + IV*N ) = -T( KI, K ) + 160 CONTINUE +* +* Solve transposed quasi-triangular system: +* [ T(KI+1:N,KI+1:N) - WR ]**T * X = SCALE*WORK +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 1 + DO 170 J = KI + 1, N + IF( J.LT.JNXT ) + $ GO TO 170 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+IV*N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+IV*N ) = WORK( J+IV*N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* +* Solve [ T(J,J) - WR ]**T * X = WORK +* + CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+IV*N ), 1 ) + WORK( J+IV*N ) = X( 1, 1 ) + VMAX = MAX( ABS( WORK( J+IV*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK( KI+IV*N ), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+IV*N ) = WORK( J+IV*N ) - + $ SDOT( J-KI-1, T( KI+1, J ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* + WORK( J+1+IV*N ) = WORK( J+1+IV*N ) - + $ SDOT( J-KI-1, T( KI+1, J+1 ), 1, + $ WORK( KI+1+IV*N ), 1 ) +* +* Solve +* [ T(J,J)-WR T(J,J+1) ]**T * X = SCALE*( WORK1 ) +* [ T(J+1,J) T(J+1,J+1)-WR ] ( WORK2 ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ ZERO, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) + $ CALL SSCAL( N-KI+1, SCALE, WORK( KI+IV*N ), 1 ) + WORK( J +IV*N ) = X( 1, 1 ) + WORK( J+1+IV*N ) = X( 2, 1 ) +* + VMAX = MAX( ABS( WORK( J +IV*N ) ), + $ ABS( WORK( J+1+IV*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 170 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL SCOPY( N-KI+1, WORK( KI + IV*N ), 1, + $ VL( KI, IS ), 1 ) +* + II = ISAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / ABS( VL( II, IS ) ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 180 K = 1, KI - 1 + VL( K, IS ) = ZERO + 180 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N ) + $ CALL SGEMV( 'N', N, N-KI, ONE, + $ VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 + IV*N ), 1, + $ WORK( KI + IV*N ), VL( 1, KI ), 1 ) +* + II = ISAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / ABS( VL( II, KI ) ) + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + IV*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP +* back-transform and normalization is done below + END IF + ELSE +* +* -------------------------------------------------------- +* Complex left eigenvector. +* +* Initial solve: +* [ ( T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI) ]*X = 0. +* [ ( T(KI+1,KI) T(KI+1,KI+1) ) ] +* + IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN + WORK( KI + (IV )*N ) = WI / T( KI, KI+1 ) + WORK( KI+1 + (IV+1)*N ) = ONE + ELSE + WORK( KI + (IV )*N ) = ONE + WORK( KI+1 + (IV+1)*N ) = -WI / T( KI+1, KI ) + END IF + WORK( KI+1 + (IV )*N ) = ZERO + WORK( KI + (IV+1)*N ) = ZERO +* +* Form right-hand side. +* + DO 190 K = KI + 2, N + WORK( K+(IV )*N ) = -WORK( KI +(IV )*N )*T(KI, K) + WORK( K+(IV+1)*N ) = -WORK( KI+1+(IV+1)*N )*T(KI+1,K) + 190 CONTINUE +* +* Solve transposed quasi-triangular system: +* [ T(KI+2:N,KI+2:N)**T - (WR-i*WI) ]*X = WORK1+i*WORK2 +* + VMAX = ONE + VCRIT = BIGNUM +* + JNXT = KI + 2 + DO 200 J = KI + 2, N + IF( J.LT.JNXT ) + $ GO TO 200 + J1 = J + J2 = J + JNXT = J + 1 + IF( J.LT.N ) THEN + IF( T( J+1, J ).NE.ZERO ) THEN + J2 = J + 1 + JNXT = J + 2 + END IF + END IF +* + IF( J1.EQ.J2 ) THEN +* +* 1-by-1 diagonal block +* +* Scale if necessary to avoid overflow when +* forming the right-hand side elements. +* + IF( WORK( J ).GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK(KI+(IV )*N), 1 ) + CALL SSCAL( N-KI+1, REC, WORK(KI+(IV+1)*N), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J+(IV )*N ) = WORK( J+(IV)*N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) + WORK( J+(IV+1)*N ) = WORK( J+(IV+1)*N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* +* Solve [ T(J,J)-(WR-i*WI) ]*(X11+i*X12)= WK+I*WK2 +* + CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK(KI+(IV )*N), 1) + CALL SSCAL( N-KI+1, SCALE, WORK(KI+(IV+1)*N), 1) + END IF + WORK( J+(IV )*N ) = X( 1, 1 ) + WORK( J+(IV+1)*N ) = X( 1, 2 ) + VMAX = MAX( ABS( WORK( J+(IV )*N ) ), + $ ABS( WORK( J+(IV+1)*N ) ), VMAX ) + VCRIT = BIGNUM / VMAX +* + ELSE +* +* 2-by-2 diagonal block +* +* Scale if necessary to avoid overflow when forming +* the right-hand side elements. +* + BETA = MAX( WORK( J ), WORK( J+1 ) ) + IF( BETA.GT.VCRIT ) THEN + REC = ONE / VMAX + CALL SSCAL( N-KI+1, REC, WORK(KI+(IV )*N), 1 ) + CALL SSCAL( N-KI+1, REC, WORK(KI+(IV+1)*N), 1 ) + VMAX = ONE + VCRIT = BIGNUM + END IF +* + WORK( J +(IV )*N ) = WORK( J+(IV)*N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) +* + WORK( J +(IV+1)*N ) = WORK( J+(IV+1)*N ) - + $ SDOT( J-KI-2, T( KI+2, J ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* + WORK( J+1+(IV )*N ) = WORK( J+1+(IV)*N ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+(IV)*N ), 1 ) +* + WORK( J+1+(IV+1)*N ) = WORK( J+1+(IV+1)*N ) - + $ SDOT( J-KI-2, T( KI+2, J+1 ), 1, + $ WORK( KI+2+(IV+1)*N ), 1 ) +* +* Solve 2-by-2 complex linear equation +* [ (T(j,j) T(j,j+1) )**T - (wr-i*wi)*I ]*X = SCALE*B +* [ (T(j+1,j) T(j+1,j+1)) ] +* + CALL SLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ), + $ LDT, ONE, ONE, WORK( J+IV*N ), N, WR, + $ -WI, X, 2, SCALE, XNORM, IERR ) +* +* Scale if necessary +* + IF( SCALE.NE.ONE ) THEN + CALL SSCAL( N-KI+1, SCALE, WORK(KI+(IV )*N), 1) + CALL SSCAL( N-KI+1, SCALE, WORK(KI+(IV+1)*N), 1) + END IF + WORK( J +(IV )*N ) = X( 1, 1 ) + WORK( J +(IV+1)*N ) = X( 1, 2 ) + WORK( J+1+(IV )*N ) = X( 2, 1 ) + WORK( J+1+(IV+1)*N ) = X( 2, 2 ) + VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ), + $ ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), + $ VMAX ) + VCRIT = BIGNUM / VMAX +* + END IF + 200 CONTINUE +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL SCOPY( N-KI+1, WORK( KI + (IV )*N ), 1, + $ VL( KI, IS ), 1 ) + CALL SCOPY( N-KI+1, WORK( KI + (IV+1)*N ), 1, + $ VL( KI, IS+1 ), 1 ) +* + EMAX = ZERO + DO 220 K = KI, N + EMAX = MAX( EMAX, ABS( VL( K, IS ) )+ + $ ABS( VL( K, IS+1 ) ) ) + 220 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) + CALL SSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 ) +* + DO 230 K = 1, KI - 1 + VL( K, IS ) = ZERO + VL( K, IS+1 ) = ZERO + 230 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N-1 ) THEN + CALL SGEMV( 'N', N, N-KI-1, ONE, + $ VL( 1, KI+2 ), LDVL, + $ WORK( KI+2 + (IV)*N ), 1, + $ WORK( KI + (IV)*N ), + $ VL( 1, KI ), 1 ) + CALL SGEMV( 'N', N, N-KI-1, ONE, + $ VL( 1, KI+2 ), LDVL, + $ WORK( KI+2 + (IV+1)*N ), 1, + $ WORK( KI+1 + (IV+1)*N ), + $ VL( 1, KI+1 ), 1 ) + ELSE + CALL SSCAL( N, WORK(KI+ (IV )*N), VL(1, KI ), 1) + CALL SSCAL( N, WORK(KI+1+(IV+1)*N), VL(1, KI+1), 1) + END IF +* + EMAX = ZERO + DO 240 K = 1, N + EMAX = MAX( EMAX, ABS( VL( K, KI ) )+ + $ ABS( VL( K, KI+1 ) ) ) + 240 CONTINUE + REMAX = ONE / EMAX + CALL SSCAL( N, REMAX, VL( 1, KI ), 1 ) + CALL SSCAL( N, REMAX, VL( 1, KI+1 ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + (IV )*N ) = ZERO + WORK( K + (IV+1)*N ) = ZERO + END DO + ISCOMPLEX( IV ) = IP + ISCOMPLEX( IV+1 ) = -IP + IV = IV + 1 +* back-transform and normalization is done below + END IF + END IF + + IF( NB.GT.1 ) THEN +* -------------------------------------------------------- +* Blocked version of back-transform +* For complex case, KI2 includes both vectors (KI and KI+1) + IF( IP.EQ.0 ) THEN + KI2 = KI + ELSE + KI2 = KI + 1 + END IF + +* Columns 1:IV of work are valid vectors. +* When the number of vectors stored reaches NB-1 or NB, +* or if this was last vector, do the GEMM + IF( (IV.GE.NB-1) .OR. (KI2.EQ.N) ) THEN + CALL SGEMM( 'N', 'N', N, IV, N-KI2+IV, ONE, + $ VL( 1, KI2-IV+1 ), LDVL, + $ WORK( KI2-IV+1 + (1)*N ), N, + $ ZERO, + $ WORK( 1 + (NB+1)*N ), N ) +* normalize vectors + DO K = 1, IV + IF( ISCOMPLEX(K).EQ.0) THEN +* real eigenvector + II = ISAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / ABS( WORK( II + (NB+K)*N ) ) + ELSE IF( ISCOMPLEX(K).EQ.1) THEN +* first eigenvector of conjugate pair + EMAX = ZERO + DO II = 1, N + EMAX = MAX( EMAX, + $ ABS( WORK( II + (NB+K )*N ) )+ + $ ABS( WORK( II + (NB+K+1)*N ) ) ) + END DO + REMAX = ONE / EMAX +* else if ISCOMPLEX(K).EQ.-1 +* second eigenvector of conjugate pair +* reuse same REMAX as previous K + END IF + CALL SSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL SLACPY( 'F', N, IV, + $ WORK( 1 + (NB+1)*N ), N, + $ VL( 1, KI2-IV+1 ), LDVL ) + IV = 1 + ELSE + IV = IV + 1 + END IF + END IF ! blocked back-transform +* + IS = IS + 1 + IF( IP.NE.0 ) + $ IS = IS + 1 + 260 CONTINUE + END IF +* + RETURN +* +* End of STREVC3 +* + END diff --git a/dspl/liblapack/SRC/strexc.f b/dspl/liblapack/SRC/strexc.f new file mode 100644 index 0000000..8aaaccd --- /dev/null +++ b/dspl/liblapack/SRC/strexc.f @@ -0,0 +1,428 @@ +*> \brief \b STREXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STREXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ +* INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. +* REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STREXC reorders the real Schur factorization of a real matrix +*> A = Q*T*Q**T, so that the diagonal block of T with row index IFST is +*> moved to row ILST. +*> +*> The real Schur form T is reordered by an orthogonal similarity +*> transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors +*> is updated by postmultiplying it with Z. +*> +*> T must be in Schur canonical form (as returned by SHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> If N == 0 arguments ILST and IFST may be any value. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> Schur canonical form. +*> On exit, the reordered upper quasi-triangular matrix, again +*> in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> orthogonal transformation matrix Z which reorders T. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1, and if +*> COMPQ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> +*> Specify the reordering of the diagonal blocks of T. +*> The block with row index IFST is moved to row ILST, by a +*> sequence of transpositions between adjacent blocks. +*> On exit, if IFST pointed on entry to the second row of a +*> 2-by-2 block, it is changed to point to the first row; ILST +*> always points to the first row of the block in its final +*> position (which may differ from its input value by +1 or -1). +*> 1 <= IFST <= N; 1 <= ILST <= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: two adjacent blocks were too close to swap (the problem +*> is very ill-conditioned); T may have been partially +*> reordered, and ILST points to the first row of the +*> current position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + REAL Q( LDQ, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER HERE, NBF, NBL, NBNEXT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SLAEXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input arguments. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF(( IFST.LT.1 .OR. IFST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -7 + ELSE IF(( ILST.LT.1 .OR. ILST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Determine the first row of specified block +* and find out it is 1 by 1 or 2 by 2. +* + IF( IFST.GT.1 ) THEN + IF( T( IFST, IFST-1 ).NE.ZERO ) + $ IFST = IFST - 1 + END IF + NBF = 1 + IF( IFST.LT.N ) THEN + IF( T( IFST+1, IFST ).NE.ZERO ) + $ NBF = 2 + END IF +* +* Determine the first row of the final block +* and find out it is 1 by 1 or 2 by 2. +* + IF( ILST.GT.1 ) THEN + IF( T( ILST, ILST-1 ).NE.ZERO ) + $ ILST = ILST - 1 + END IF + NBL = 1 + IF( ILST.LT.N ) THEN + IF( T( ILST+1, ILST ).NE.ZERO ) + $ NBL = 2 + END IF +* + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Update ILST +* + IF( NBF.EQ.2 .AND. NBL.EQ.1 ) + $ ILST = ILST - 1 + IF( NBF.EQ.1 .AND. NBL.EQ.2 ) + $ ILST = ILST + 1 +* + HERE = IFST +* + 10 CONTINUE +* +* Swap block with next one below +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE+NBF+1.LE.N ) THEN + IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE+3.LE.N ) THEN + IF( T( HERE+3, HERE+2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT, + $ WORK, INFO ) + HERE = HERE + 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE+2, HERE+1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, + $ NBNEXT, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1, + $ WORK, INFO ) + HERE = HERE + 2 + END IF + END IF + END IF + IF( HERE.LT.ILST ) + $ GO TO 10 +* + ELSE +* + HERE = IFST + 20 CONTINUE +* +* Swap block with next one above +* + IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN +* +* Current block either 1 by 1 or 2 by 2 +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ NBF, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - NBNEXT +* +* Test if 2 by 2 block breaks into two 1 by 1 blocks +* + IF( NBF.EQ.2 ) THEN + IF( T( HERE+1, HERE ).EQ.ZERO ) + $ NBF = 3 + END IF +* + ELSE +* +* Current block consists of two 1 by 1 blocks each of which +* must be swapped individually +* + NBNEXT = 1 + IF( HERE.GE.3 ) THEN + IF( T( HERE-1, HERE-2 ).NE.ZERO ) + $ NBNEXT = 2 + END IF + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT, + $ 1, WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + IF( NBNEXT.EQ.1 ) THEN +* +* Swap two 1 by 1 blocks, no problems possible +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1, + $ WORK, INFO ) + HERE = HERE - 1 + ELSE +* +* Recompute NBNEXT in case 2 by 2 split +* + IF( T( HERE, HERE-1 ).EQ.ZERO ) + $ NBNEXT = 1 + IF( NBNEXT.EQ.2 ) THEN +* +* 2 by 2 Block did not split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1, + $ WORK, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 2 + ELSE +* +* 2 by 2 Block did split +* + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1, + $ WORK, INFO ) + CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1, + $ WORK, INFO ) + HERE = HERE - 2 + END IF + END IF + END IF + IF( HERE.GT.ILST ) + $ GO TO 20 + END IF + ILST = HERE +* + RETURN +* +* End of STREXC +* + END diff --git a/dspl/liblapack/SRC/strrfs.f b/dspl/liblapack/SRC/strrfs.f new file mode 100644 index 0000000..9b21910 --- /dev/null +++ b/dspl/liblapack/SRC/strrfs.f @@ -0,0 +1,472 @@ +*> \brief \b STRRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, +* LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* REAL A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular +*> coefficient matrix. +*> +*> The solution matrix X must be computed by STRTRS or some other +*> means before entering this routine. STRRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is REAL array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is REAL array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is REAL array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + REAL A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANST + INTEGER I, J, K, KASE, NZ + REAL EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL SAXPY, SCOPY, SLACN2, STRMV, STRSV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLAMCH + EXTERNAL LSAME, SLAMCH +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = SLAMCH( 'Epsilon' ) + SAFMIN = SLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A or A**T, depending on TRANS. +* + CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 ) + CALL STRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 ) + CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + WORK( I ) = ABS( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = ABS( X( K, J ) ) + DO 30 I = 1, K + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = ABS( X( K, J ) ) + DO 50 I = 1, K - 1 + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 50 CONTINUE + WORK( K ) = WORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = ABS( X( K, J ) ) + DO 70 I = K, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = ABS( X( K, J ) ) + DO 90 I = K + 1, N + WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK + 90 CONTINUE + WORK( K ) = WORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**T)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 110 CONTINUE + WORK( K ) = WORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = ABS( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 130 CONTINUE + WORK( K ) = WORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 150 CONTINUE + WORK( K ) = WORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = ABS( X( K, J ) ) + DO 170 I = K + 1, N + S = S + ABS( A( I, K ) )*ABS( X( I, J ) ) + 170 CONTINUE + WORK( K ) = WORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) ) + ELSE + S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) / + $ ( WORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use SLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( WORK( I ).GT.SAFE2 ) THEN + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + ELSE + WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL SLACN2( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ), + $ KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**T). +* + CALL STRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + DO 220 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( N+I ) = WORK( I )*WORK( N+I ) + 230 CONTINUE + CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of STRRFS +* + END diff --git a/dspl/liblapack/SRC/strsen.f b/dspl/liblapack/SRC/strsen.f new file mode 100644 index 0000000..a52c627 --- /dev/null +++ b/dspl/liblapack/SRC/strsen.f @@ -0,0 +1,571 @@ +*> \brief \b STRSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, +* M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, JOB +* INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N +* REAL S, SEP +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), +* $ WR( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRSEN reorders the real Schur factorization of a real matrix +*> A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in +*> the leading diagonal blocks of the upper quasi-triangular matrix T, +*> and the leading columns of Q form an orthonormal basis of the +*> corresponding right invariant subspace. +*> +*> Optionally the routine computes the reciprocal condition numbers of +*> the cluster of eigenvalues and/or the invariant subspace. +*> +*> T must be in Schur canonical form (as returned by SHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (S) or the invariant subspace (SEP): +*> = 'N': none; +*> = 'E': for eigenvalues only (S); +*> = 'V': for invariant subspace only (SEP); +*> = 'B': for both eigenvalues and invariant subspace (S and +*> SEP). +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select a real eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. To select a complex conjugate pair of eigenvalues +*> w(j) and w(j+1), corresponding to a 2-by-2 diagonal block, +*> either SELECT(j) or SELECT(j+1) or both must be set to +*> .TRUE.; a complex conjugate pair of eigenvalues must be +*> either both included in the cluster or both excluded. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> On entry, the upper quasi-triangular matrix T, in Schur +*> canonical form. +*> On exit, T is overwritten by the reordered matrix T, again in +*> Schur canonical form, with the selected eigenvalues in the +*> leading diagonal blocks. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is REAL array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> orthogonal transformation matrix which reorders T; the +*> leading M columns of Q form an orthonormal basis for the +*> specified invariant subspace. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +*> \endverbatim +*> +*> \param[out] WR +*> \verbatim +*> WR is REAL array, dimension (N) +*> \endverbatim +*> +*> \param[out] WI +*> \verbatim +*> WI is REAL array, dimension (N) +*> +*> The real and imaginary parts, respectively, of the reordered +*> eigenvalues of T. The eigenvalues are stored in the same +*> order as on the diagonal of T, with WR(i) = T(i,i) and, if +*> T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and +*> WI(i+1) = -WI(i). Note that if a complex eigenvalue is +*> sufficiently ill-conditioned, then its value may differ +*> significantly from its value before reordering. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified invariant subspace. +*> 0 < = M <= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL +*> If JOB = 'E' or 'B', S is a lower bound on the reciprocal +*> condition number for the selected cluster of eigenvalues. +*> S cannot underestimate the true reciprocal condition number +*> by more than a factor of sqrt(N). If M = 0 or N, S = 1. +*> If JOB = 'N' or 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is REAL +*> If JOB = 'V' or 'B', SEP is the estimated reciprocal +*> condition number of the specified invariant subspace. If +*> M = 0 or N, SEP = norm(T). +*> If JOB = 'N' or 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOB = 'N', LWORK >= max(1,N); +*> if JOB = 'E', LWORK >= max(1,M*(N-M)); +*> if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If JOB = 'N' or 'E', LIWORK >= 1; +*> if JOB = 'V' or 'B', LIWORK >= max(1,M*(N-M)). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: reordering of T failed because some eigenvalues are too +*> close to separate (the problem is very ill-conditioned); +*> T may have been partially reordered, and WR and WI +*> contain the eigenvalues in the same order as in T; S and +*> SEP (if requested) are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> STRSEN first collects the selected eigenvalues by computing an +*> orthogonal transformation Z to move them to the top left corner of T. +*> In other words, the selected eigenvalues are the eigenvalues of T11 +*> in: +*> +*> Z**T * T * Z = ( T11 T12 ) n1 +*> ( 0 T22 ) n2 +*> n1 n2 +*> +*> where N = n1+n2 and Z**T means the transpose of Z. The first n1 columns +*> of Z span the specified invariant subspace of T. +*> +*> If T has been obtained from the real Schur factorization of a matrix +*> A = Q*T*Q**T, then the reordered real Schur factorization of A is given +*> by A = (Q*Z)*(Z**T*T*Z)*(Q*Z)**T, and the first n1 columns of Q*Z span +*> the corresponding invariant subspace of A. +*> +*> The reciprocal condition number of the average of the eigenvalues of +*> T11 may be returned in S. S lies between 0 (very badly conditioned) +*> and 1 (very well conditioned). It is computed as follows. First we +*> compute R so that +*> +*> P = ( I R ) n1 +*> ( 0 0 ) n2 +*> n1 n2 +*> +*> is the projector on the invariant subspace associated with T11. +*> R is the solution of the Sylvester equation: +*> +*> T11*R - R*T22 = T12. +*> +*> Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +*> the two-norm of M. Then S is computed as the lower bound +*> +*> (1 + F-norm(R)**2)**(-1/2) +*> +*> on the reciprocal of 2-norm(P), the true reciprocal condition number. +*> S cannot underestimate 1 / 2-norm(P) by more than a factor of +*> sqrt(N). +*> +*> An approximate error bound for the computed average of the +*> eigenvalues of T11 is +*> +*> EPS * norm(T) / S +*> +*> where EPS is the machine precision. +*> +*> The reciprocal condition number of the right invariant subspace +*> spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +*> SEP is defined as the separation of T11 and T22: +*> +*> sep( T11, T22 ) = sigma-min( C ) +*> +*> where sigma-min(C) is the smallest singular value of the +*> n1*n2-by-n1*n2 matrix +*> +*> C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +*> +*> I(m) is an m by m identity matrix, and kprod denotes the Kronecker +*> product. We estimate sigma-min(C) by the reciprocal of an estimate of +*> the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +*> cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +*> +*> When SEP is small, small changes in T can cause large changes in +*> the invariant subspace. An approximate bound on the maximum angular +*> error in the computed right invariant subspace is +*> +*> EPS * norm(T) / SEP +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI, + $ M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LIWORK, LWORK, M, N + REAL S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ), + $ WR( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS, + $ WANTSP + INTEGER IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2, + $ NN + REAL EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SLANGE + EXTERNAL LSAME, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLACN2, SLACPY, STREXC, STRSYL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE +* +* Set M to the dimension of the specified invariant subspace, +* and test LWORK and LIWORK. +* + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + LIWMIN = MAX( 1, NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = MAX( 1, N ) + LIWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + LIWMIN = 1 + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = SLANGE( '1', N, N, T, LDT, WORK ) + GO TO 40 + END IF +* +* Collect the selected blocks at the top-left corner of T. +* + KS = 0 + PAIR = .FALSE. + DO 20 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + SWAP = SELECT( K ) + IF( K.LT.N ) THEN + IF( T( K+1, K ).NE.ZERO ) THEN + PAIR = .TRUE. + SWAP = SWAP .OR. SELECT( K+1 ) + END IF + END IF + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. +* + IERR = 0 + KK = K + IF( K.NE.KS ) + $ CALL STREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK, + $ IERR ) + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Blocks too close to swap: exit. +* + INFO = 1 + IF( WANTS ) + $ S = ZERO + IF( WANTSP ) + $ SEP = ZERO + GO TO 40 + END IF + IF( PAIR ) + $ KS = KS + 1 + END IF + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL SLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = SLANGE( 'F', N1, N2, WORK, N1, WORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL SLACN2( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11**T*R - R*T22**T = scale*X. +* + CALL STRSYL( 'T', 'T', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Store the output eigenvalues in WR and WI. +* + DO 50 K = 1, N + WR( K ) = T( K, K ) + WI( K ) = ZERO + 50 CONTINUE + DO 60 K = 1, N - 1 + IF( T( K+1, K ).NE.ZERO ) THEN + WI( K ) = SQRT( ABS( T( K, K+1 ) ) )* + $ SQRT( ABS( T( K+1, K ) ) ) + WI( K+1 ) = -WI( K ) + END IF + 60 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of STRSEN +* + END diff --git a/dspl/liblapack/SRC/strsna.f b/dspl/liblapack/SRC/strsna.f new file mode 100644 index 0000000..1dc7fe7 --- /dev/null +++ b/dspl/liblapack/SRC/strsna.f @@ -0,0 +1,603 @@ +*> \brief \b STRSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* REAL S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or right eigenvectors of a real upper +*> quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q +*> orthogonal). +*> +*> T must be in Schur canonical form (as returned by SHSEQR), that is, +*> block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each +*> 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (SEP): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (SEP); +*> = 'B': for both eigenvalues and eigenvectors (S and SEP). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the eigenpair corresponding to a real eigenvalue w(j), +*> SELECT(j) must be set to .TRUE.. To select condition numbers +*> corresponding to a complex conjugate pair of eigenvalues w(j) +*> and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be +*> set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is REAL array, dimension (LDT,N) +*> The upper quasi-triangular matrix T, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is REAL array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of T +*> (or of any Q*T*Q**T with Q orthogonal), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VL, as returned by +*> SHSEIN or STREVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is REAL array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of T +*> (or of any Q*T*Q**T with Q orthogonal), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VR, as returned by +*> SHSEIN or STREVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is REAL array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. For a complex conjugate pair of eigenvalues two +*> consecutive elements of S are set to the same value. Thus +*> S(j), SEP(j), and the j-th columns of VL and VR all +*> correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is REAL array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. For a complex eigenvector two +*> consecutive elements of SEP are set to the same value. If +*> the eigenvalues cannot be reordered to compute SEP(j), SEP(j) +*> is set to 0; this can only occur when the true value would be +*> very small anyway. +*> If JOB = 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S (if JOB = 'E' or 'B') +*> and/or SEP (if JOB = 'V' or 'B'). MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and/or SEP actually +*> used to store the estimated condition numbers. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (LDWORK,N+6) +*> If JOB = 'E', WORK is not referenced. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (2*(N-1)) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of an eigenvalue lambda is +*> defined as +*> +*> S(lambda) = |v**T*u| / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of T corresponding +*> to lambda; v**T denotes the transpose of v, and norm(u) +*> denotes the Euclidean norm. These reciprocal condition numbers always +*> lie between zero (very badly conditioned) and one (very well +*> conditioned). If n = 1, S(lambda) is defined to be 1. +*> +*> An approximate error bound for a computed eigenvalue W(i) is given by +*> +*> EPS * norm(T) / S(i) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> corresponding to lambda is defined as follows. Suppose +*> +*> T = ( lambda c ) +*> ( 0 T22 ) +*> +*> Then the reciprocal condition number is +*> +*> SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +*> +*> where sigma-min denotes the smallest singular value. We approximate +*> the smallest singular value by the reciprocal of an estimate of the +*> one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +*> defined to be abs(T(1,1)). +*> +*> An approximate error bound for a computed right eigenvector VR(i) +*> is given by +*> +*> EPS * norm(T) / SEP(i) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + REAL S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL PAIR, SOMCON, WANTBH, WANTS, WANTSP + INTEGER I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN + REAL BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM, + $ MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + REAL DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLAPY2, SNRM2 + EXTERNAL LSAME, SDOT, SLAMCH, SLAPY2, SNRM2 +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLACN2, SLACPY, SLAQTR, STREXC, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + PAIR = .FALSE. + DO 10 K = 1, N + IF( PAIR ) THEN + PAIR = .FALSE. + ELSE + IF( K.LT.N ) THEN + IF( T( K+1, K ).EQ.ZERO ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + PAIR = .TRUE. + IF( SELECT( K ) .OR. SELECT( K+1 ) ) + $ M = M + 2 + END IF + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + END IF + 10 CONTINUE + ELSE + M = N + END IF +* + IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) +* + KS = 0 + PAIR = .FALSE. + DO 60 K = 1, N +* +* Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block. +* + IF( PAIR ) THEN + PAIR = .FALSE. + GO TO 60 + ELSE + IF( K.LT.N ) + $ PAIR = T( K+1, K ).NE.ZERO + END IF +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( PAIR ) THEN + IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) ) + $ GO TO 60 + ELSE + IF( .NOT.SELECT( K ) ) + $ GO TO 60 + END IF + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + IF( .NOT.PAIR ) THEN +* +* Real eigenvalue. +* + PROD = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = SNRM2( N, VR( 1, KS ), 1 ) + LNRM = SNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) + ELSE +* +* Complex eigenvalue. +* + PROD1 = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + PROD1 = PROD1 + SDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ), + $ 1 ) + PROD2 = SDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 ) + PROD2 = PROD2 - SDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ), + $ 1 ) + RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ), + $ SNRM2( N, VR( 1, KS+1 ), 1 ) ) + LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ), + $ SNRM2( N, VL( 1, KS+1 ), 1 ) ) + COND = SLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM ) + S( KS ) = COND + S( KS+1 ) = COND + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the diagonal +* block beginning at T(k,k) to the (1,1) position. +* + CALL SLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + IFST = K + ILST = 1 + CALL STREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST, + $ WORK( 1, N+1 ), IERR ) +* + IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN +* +* Could not swap because blocks not well separated +* + SCALE = ONE + EST = BIGNUM + ELSE +* +* Reordering successful +* + IF( WORK( 2, 1 ).EQ.ZERO ) THEN +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE + N2 = 1 + NN = N - 1 + ELSE +* +* Triangularize the 2 by 2 block by unitary +* transformation U = [ cs i*ss ] +* [ i*ss cs ]. +* such that the (1,1) position of WORK is complex +* eigenvalue lambda with positive imaginary part. (2,2) +* position of WORK is the complex eigenvalue lambda +* with negative imaginary part. +* + MU = SQRT( ABS( WORK( 1, 2 ) ) )* + $ SQRT( ABS( WORK( 2, 1 ) ) ) + DELTA = SLAPY2( MU, WORK( 2, 1 ) ) + CS = MU / DELTA + SN = -WORK( 2, 1 ) / DELTA +* +* Form +* +* C**T = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ] +* [ mu ] +* [ .. ] +* [ .. ] +* [ mu ] +* where C**T is transpose of matrix C, +* and RWORK is stored starting in the N+1-st column of +* WORK. +* + DO 30 J = 3, N + WORK( 2, J ) = CS*WORK( 2, J ) + WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 ) + 30 CONTINUE + WORK( 2, 2 ) = ZERO +* + WORK( 1, N+1 ) = TWO*MU + DO 40 I = 2, N - 1 + WORK( I, N+1 ) = SN*WORK( 1, I+1 ) + 40 CONTINUE + N2 = 2 + NN = 2*( N-1 ) + END IF +* +* Estimate norm(inv(C**T)) +* + EST = ZERO + KASE = 0 + 50 CONTINUE + CALL SLACN2( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK, + $ EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C**T*x = scale*c. +* + CALL SLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C**T*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL SLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ), + $ LDWORK, WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + END IF + ELSE + IF( N2.EQ.1 ) THEN +* +* Real eigenvalue: solve C*x = scale*c. +* + CALL SLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ), + $ LDWORK, DUMMY, DUMM, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) + ELSE +* +* Complex eigenvalue: solve +* C*(p+iq) = scale*(c+id) in real arithmetic. +* + CALL SLAQTR( .FALSE., .FALSE., N-1, + $ WORK( 2, 2 ), LDWORK, + $ WORK( 1, N+1 ), MU, SCALE, + $ WORK( 1, N+4 ), WORK( 1, N+6 ), + $ IERR ) +* + END IF + END IF +* + GO TO 50 + END IF + END IF +* + SEP( KS ) = SCALE / MAX( EST, SMLNUM ) + IF( PAIR ) + $ SEP( KS+1 ) = SEP( KS ) + END IF +* + IF( PAIR ) + $ KS = KS + 1 +* + 60 CONTINUE + RETURN +* +* End of STRSNA +* + END diff --git a/dspl/liblapack/SRC/strsyl.f b/dspl/liblapack/SRC/strsyl.f new file mode 100644 index 0000000..029367c --- /dev/null +++ b/dspl/liblapack/SRC/strsyl.f @@ -0,0 +1,1002 @@ +*> \brief \b STRSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, +* LDC, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N +* REAL SCALE +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRSYL solves the real Sylvester matrix equation: +*> +*> op(A)*X + X*op(B) = scale*C or +*> op(A)*X - X*op(B) = scale*C, +*> +*> where op(A) = A or A**T, and A and B are both upper quasi- +*> triangular. A is M-by-M and B is N-by-N; the right hand side C and +*> the solution X are M-by-N; and scale is an output scale factor, set +*> <= 1 to avoid overflow in X. +*> +*> A and B must be in Schur canonical form (as returned by SHSEQR), that +*> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; +*> each 2-by-2 diagonal block has its diagonal elements equal and its +*> off-diagonal elements of opposite sign. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANA +*> \verbatim +*> TRANA is CHARACTER*1 +*> Specifies the option op(A): +*> = 'N': op(A) = A (No transpose) +*> = 'T': op(A) = A**T (Transpose) +*> = 'C': op(A) = A**H (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] TRANB +*> \verbatim +*> TRANB is CHARACTER*1 +*> Specifies the option op(B): +*> = 'N': op(B) = B (No transpose) +*> = 'T': op(B) = B**T (Transpose) +*> = 'C': op(B) = B**H (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> Specifies the sign in the equation: +*> = +1: solve op(A)*X + X*op(B) = scale*C +*> = -1: solve op(A)*X - X*op(B) = scale*C +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrix A, and the number of rows in the +*> matrices X and C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B, and the number of columns in the +*> matrices X and C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,M) +*> The upper quasi-triangular matrix A, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is REAL array, dimension (LDB,N) +*> The upper quasi-triangular matrix B, in Schur canonical form. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is REAL array, dimension (LDC,N) +*> On entry, the M-by-N right hand side matrix C. +*> On exit, C is overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M) +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is REAL +*> The scale factor, scale, set <= 1 to avoid overflow in X. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: A and B have common or very close eigenvalues; perturbed +*> values were used to solve the equation (but the matrices +*> A and B are unchanged). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realSYcomputational +* +* ===================================================================== + SUBROUTINE STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + REAL SCALE +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT + REAL A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM, SUML, SUMR, XNORM +* .. +* .. Local Arrays .. + REAL DUM( 1 ), VEC( 2, 2 ), X( 2, 2 ) +* .. +* .. External Functions .. + LOGICAL LSAME + REAL SDOT, SLAMCH, SLANGE + EXTERNAL LSAME, SDOT, SLAMCH, SLANGE +* .. +* .. External Subroutines .. + EXTERNAL SLABAD, SLALN2, SLASY2, SSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT. + $ LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT. + $ LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + SCALE = ONE + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = SLAMCH( 'P' ) + SMLNUM = SLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL SLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*REAL( M*N ) / EPS + BIGNUM = ONE / SMLNUM +* + SMIN = MAX( SMLNUM, EPS*SLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*SLANGE( 'M', N, N, B, LDB, DUM ) ) +* + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* +* Start column loop (index = L) +* L1 (L2) : column index of the first (first) row of X(K,L). +* + LNEXT = 1 + DO 70 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 70 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L). +* + KNEXT = M + DO 60 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 60 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 20 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 20 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .FALSE., ISGN, 2, 2, + $ A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC, + $ 2, SCALOC, X, 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 50 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 50 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 60 CONTINUE +* + 70 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A**T *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = 1 + DO 130 L = 1, N + IF( L.LT.LNEXT ) + $ GO TO 130 + IF( L.EQ.N ) THEN + L1 = L + L2 = L + ELSE + IF( B( L+1, L ).NE.ZERO ) THEN + L1 = L + L2 = L + 1 + LNEXT = L + 2 + ELSE + L1 = L + L2 = L + LNEXT = L + 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 120 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 120 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 80 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 80 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 90 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 90 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 110 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A**T*X + ISGN*X*B**T = scale*C. +* +* The (K,L)th block of X is determined starting from +* top-right corner column by column by +* +* A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) +* +* Where +* K-1 N +* R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. +* I=1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 190 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 190 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = 1 + DO 180 K = 1, M + IF( K.LT.KNEXT ) + $ GO TO 180 + IF( K.EQ.M ) THEN + K1 = K + K2 = K + ELSE + IF( A( K+1, K ).NE.ZERO ) THEN + K1 = K + K2 = K + 1 + KNEXT = K + 2 + ELSE + K1 = K + K2 = K + KNEXT = K + 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 140 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 140 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 150 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 150 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 160 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 160 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN(L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 170 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 170 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 180 CONTINUE + 190 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B**T = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-right corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T]. +* I=K+1 J=L+1 +* +* Start column loop (index = L) +* L1 (L2): column index of the first (last) row of X(K,L) +* + LNEXT = N + DO 250 L = N, 1, -1 + IF( L.GT.LNEXT ) + $ GO TO 250 + IF( L.EQ.1 ) THEN + L1 = L + L2 = L + ELSE + IF( B( L, L-1 ).NE.ZERO ) THEN + L1 = L - 1 + L2 = L + LNEXT = L - 2 + ELSE + L1 = L + L2 = L + LNEXT = L - 1 + END IF + END IF +* +* Start row loop (index = K) +* K1 (K2): row index of the first (last) row of X(K,L) +* + KNEXT = M + DO 240 K = M, 1, -1 + IF( K.GT.KNEXT ) + $ GO TO 240 + IF( K.EQ.1 ) THEN + K1 = K + K2 = K + ELSE + IF( A( K, K-1 ).NE.ZERO ) THEN + K1 = K - 1 + K2 = K + KNEXT = K - 2 + ELSE + K1 = K + K2 = K + KNEXT = K - 1 + END IF + END IF +* + IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN + SUML = SDOT( M-K1, A( K1, MIN(K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC, + $ B( L1, MIN( L1+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) + SCALOC = ONE +* + A11 = A( K1, K1 ) + SGN*B( L1, L1 ) + DA11 = ABS( A11 ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( VEC( 1, 1 ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11 +* + IF( SCALOC.NE.ONE ) THEN + DO 200 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 200 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) +* + ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ), + $ LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 210 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 210 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K2, L1 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) ) +* + SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA, + $ C( MIN( K1+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) ) +* + CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ), + $ LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ), + $ ZERO, X, 2, SCALOC, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 220 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 220 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 2, 1 ) +* + ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L1 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L1, MIN( L2+1, N ) ), LDB ) + VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR ) +* + SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA, + $ C( MIN( K2+1, M ), L2 ), 1 ) + SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC, + $ B( L2, MIN( L2+1, N ) ), LDB ) + VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR ) +* + CALL SLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ), + $ LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X, + $ 2, XNORM, IERR ) + IF( IERR.NE.0 ) + $ INFO = 1 +* + IF( SCALOC.NE.ONE ) THEN + DO 230 J = 1, N + CALL SSCAL( M, SCALOC, C( 1, J ), 1 ) + 230 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K1, L1 ) = X( 1, 1 ) + C( K1, L2 ) = X( 1, 2 ) + C( K2, L1 ) = X( 2, 1 ) + C( K2, L2 ) = X( 2, 2 ) + END IF +* + 240 CONTINUE + 250 CONTINUE +* + END IF +* + RETURN +* +* End of STRSYL +* + END diff --git a/dspl/liblapack/SRC/strti2.f b/dspl/liblapack/SRC/strti2.f new file mode 100644 index 0000000..e3de4a3 --- /dev/null +++ b/dspl/liblapack/SRC/strti2.f @@ -0,0 +1,212 @@ +*> \brief \b STRTI2 computes the inverse of a triangular matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRTI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRTI2 computes the inverse of a real upper or lower triangular +*> matrix. +*> +*> This is the Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading n by n upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE + PARAMETER ( ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + REAL AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL SSCAL, STRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL STRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL SSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL STRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL SSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of STRTI2 +* + END diff --git a/dspl/liblapack/SRC/strtri.f b/dspl/liblapack/SRC/strtri.f new file mode 100644 index 0000000..267de1a --- /dev/null +++ b/dspl/liblapack/SRC/strtri.f @@ -0,0 +1,242 @@ +*> \brief \b STRTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRTRI computes the inverse of a real upper or lower triangular +*> matrix A. +*> +*> This is the Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ONE, ZERO + PARAMETER ( ONE = 1.0E+0, ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL STRMM, STRSM, STRTI2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL STRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL STRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL STRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL STRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL STRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL STRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of STRTRI +* + END diff --git a/dspl/liblapack/SRC/strtrs.f b/dspl/liblapack/SRC/strtrs.f new file mode 100644 index 0000000..9eb6e36 --- /dev/null +++ b/dspl/liblapack/SRC/strtrs.f @@ -0,0 +1,226 @@ +*> \brief \b STRTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRTRS solves a triangular system of the form +*> +*> A * X = B or A**T * X = B, +*> +*> where A is a triangular matrix of order N, and B is an N-by-NRHS +*> matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is REAL array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the solutions +*> X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + REAL A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL STRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b or A**T * x = b. +* + CALL STRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of STRTRS +* + END diff --git a/dspl/liblapack/SRC/strttf.f b/dspl/liblapack/SRC/strttf.f new file mode 100644 index 0000000..5853be9 --- /dev/null +++ b/dspl/liblapack/SRC/strttf.f @@ -0,0 +1,492 @@ +*> \brief \b STRTTF copies a triangular matrix from the standard full format (TR) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* REAL A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRTTF copies a triangular matrix A from standard full format (TR) +*> to rectangular full packed format (TF) . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal form is wanted; +*> = 'T': ARF in Transpose form is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N). +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is REAL array, dimension (NT). +*> NT=N*(N+1)/2. On exit, the triangular matrix A in RFP format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup realOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Rectangular Full Packed (RFP) Format when N is +*> even. We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> the transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> the transpose of the last three columns of AP lower. +*> This covers the case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 03 04 05 33 43 53 +*> 13 14 15 00 44 54 +*> 23 24 25 10 11 55 +*> 33 34 35 20 21 22 +*> 00 44 45 30 31 32 +*> 01 11 55 40 41 42 +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We then consider Rectangular Full Packed (RFP) Format when N is +*> odd. We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> the transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> the transpose of the last two columns of AP lower. +*> This covers the case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> 02 03 04 00 33 43 +*> 12 13 14 10 11 44 +*> 22 23 24 20 21 22 +*> 00 33 34 30 31 32 +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'T'. RFP A in both UPLO cases is just the +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> 02 12 22 00 01 00 10 20 30 40 50 +*> 03 13 23 33 11 33 11 21 31 41 51 +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* ===================================================================== + SUBROUTINE STRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + REAL A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER I, IJ, J, K, L, N1, N2, NT, NX2, NP1X2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'T' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + ARF( 0 ) = A( 0, 0 ) + END IF + RETURN + END IF +* +* Size of array ARF(0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + ARF( IJ ) = A( N2+J, I ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + ARF( IJ ) = A( J-N1, L ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + ARF( IJ ) = A( I, N1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is odd, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + ARF( IJ ) = A( N2+J, L ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + ARF( IJ ) = A( K+J, I ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + ARF( IJ ) = A( J-K, L ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'T' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'T', and UPLO = 'L' +* + IJ = 0 + J = K + DO I = K, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + ARF( IJ ) = A( I, K+1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* N is even, TRANSR = 'T', and UPLO = 'U' +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + ARF( IJ ) = A( J, I ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + ARF( IJ ) = A( K+1+J, L ) + IJ = IJ + 1 + END DO + END DO +* Note that here, on exit of the loop, J = K-1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of STRTTF +* + END diff --git a/dspl/liblapack/SRC/strttp.f b/dspl/liblapack/SRC/strttp.f new file mode 100644 index 0000000..557ccfa --- /dev/null +++ b/dspl/liblapack/SRC/strttp.f @@ -0,0 +1,175 @@ +*> \brief \b STRTTP copies a triangular matrix from the standard full format (TR) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STRTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STRTTP copies a triangular matrix A from full format (TR) to standard +*> packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices AP and A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is REAL array, dimension (N*(N+1)/2) +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup realOTHERcomputational +* +* ===================================================================== + SUBROUTINE STRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + REAL A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STRTTP', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + END IF +* + RETURN +* +* End of STRTTP +* + END diff --git a/dspl/liblapack/SRC/stzrzf.f b/dspl/liblapack/SRC/stzrzf.f new file mode 100644 index 0000000..6da3c4a --- /dev/null +++ b/dspl/liblapack/SRC/stzrzf.f @@ -0,0 +1,313 @@ +*> \brief \b STZRZF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download STZRZF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE STZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> STZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A +*> to upper triangular form by means of orthogonal transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N orthogonal matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is REAL array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> orthogonal matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is REAL array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is REAL array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup realOTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The N-by-N matrix Z can be computed by +*> +*> Z = Z(1)*Z(2)* ... *Z(M) +*> +*> where each N-by-N Z(k) is given by +*> +*> Z(k) = I - tau(k)*v(k)*v(k)**T +*> +*> with v(k) is the kth row vector of the M-by-N matrix +*> +*> V = ( I A(:,M+1:N) ) +*> +*> I is the M-by-M identity matrix, A(:,M+1:N) +*> is the output stored in A on exit from DTZRZF, +*> and tau(k) is the kth element of the array TAU. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE STZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + REAL A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO + PARAMETER ( ZERO = 0.0E+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKMIN, LWKOPT, + $ M1, MU, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, SLARZB, SLARZT, SLATRZ +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + LWKMIN = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + LWKMIN = MAX( 1, M ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'STZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL SLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL SLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL SLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL SLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of STZRZF +* + END diff --git a/dspl/liblapack/SRC/xerbla.f b/dspl/liblapack/SRC/xerbla.f new file mode 100644 index 0000000..4a03509 --- /dev/null +++ b/dspl/liblapack/SRC/xerbla.f @@ -0,0 +1,99 @@ +*> \brief \b XERBLA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download XERBLA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE XERBLA( SRNAME, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER*(*) SRNAME +* INTEGER INFO +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> XERBLA is an error handler for the LAPACK routines. +*> It is called by an LAPACK routine if an input parameter has an +*> invalid value. A message is printed and execution stops. +*> +*> Installers may consider modifying the STOP statement in order to +*> call system-specific exception-handling facilities. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SRNAME +*> \verbatim +*> SRNAME is CHARACTER*(*) +*> The name of the routine which called XERBLA. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The position of the invalid parameter in the parameter list +*> of the calling routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE XERBLA( SRNAME, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*(*) SRNAME + INTEGER INFO +* .. +* +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC LEN_TRIM +* .. +* .. Executable Statements .. +* + WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO +* + STOP +* + 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', + $ 'an illegal value' ) +* +* End of XERBLA +* + END diff --git a/dspl/liblapack/SRC/xerbla_array.f b/dspl/liblapack/SRC/xerbla_array.f new file mode 100644 index 0000000..84fe7de --- /dev/null +++ b/dspl/liblapack/SRC/xerbla_array.f @@ -0,0 +1,129 @@ +*> \brief \b XERBLA_ARRAY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download XERBLA_ARRAY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE XERBLA_ARRAY( SRNAME_ARRAY, SRNAME_LEN, INFO) +* +* .. Scalar Arguments .. +* INTEGER SRNAME_LEN, INFO +* .. +* .. Array Arguments .. +* CHARACTER(1) SRNAME_ARRAY(SRNAME_LEN) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK +*> and BLAS error handler. Rather than taking a Fortran string argument +*> as the function's name, XERBLA_ARRAY takes an array of single +*> characters along with the array's length. XERBLA_ARRAY then copies +*> up to 32 characters of that array into a Fortran string and passes +*> that to XERBLA. If called with a non-positive SRNAME_LEN, +*> XERBLA_ARRAY will call XERBLA with a string of all blank characters. +*> +*> Say some macro or other device makes XERBLA_ARRAY available to C99 +*> by a name lapack_xerbla and with a common Fortran calling convention. +*> Then a C99 program could invoke XERBLA via: +*> { +*> int flen = strlen(__func__); +*> lapack_xerbla(__func__, &flen, &info); +*> } +*> +*> Providing XERBLA_ARRAY is not necessary for intercepting LAPACK +*> errors. XERBLA_ARRAY calls XERBLA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SRNAME_ARRAY +*> \verbatim +*> SRNAME_ARRAY is CHARACTER(1) array, dimension (SRNAME_LEN) +*> The name of the routine which called XERBLA_ARRAY. +*> \endverbatim +*> +*> \param[in] SRNAME_LEN +*> \verbatim +*> SRNAME_LEN is INTEGER +*> The length of the name in SRNAME_ARRAY. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The position of the invalid parameter in the parameter list +*> of the calling routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup OTHERauxiliary +* +* ===================================================================== + SUBROUTINE XERBLA_ARRAY( SRNAME_ARRAY, SRNAME_LEN, INFO) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER SRNAME_LEN, INFO +* .. +* .. Array Arguments .. + CHARACTER(1) SRNAME_ARRAY(SRNAME_LEN) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Local Arrays .. + CHARACTER*32 SRNAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, LEN +* .. +* .. External Functions .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. + SRNAME = '' + DO I = 1, MIN( SRNAME_LEN, LEN( SRNAME ) ) + SRNAME( I:I ) = SRNAME_ARRAY( I ) + END DO + + CALL XERBLA( SRNAME, INFO ) + + RETURN + END diff --git a/dspl/liblapack/SRC/zbbcsd.f b/dspl/liblapack/SRC/zbbcsd.f new file mode 100644 index 0000000..4647181 --- /dev/null +++ b/dspl/liblapack/SRC/zbbcsd.f @@ -0,0 +1,1085 @@ +*> \brief \b ZBBCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZBBCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, +* THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, +* V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, +* B22D, B22E, RWORK, LRWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ), +* $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), +* $ PHI( * ), THETA( * ), RWORK( * ) +* COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZBBCSD computes the CS decomposition of a unitary matrix in +*> bidiagonal-block form, +*> +*> +*> [ B11 | B12 0 0 ] +*> [ 0 | 0 -I 0 ] +*> X = [----------------] +*> [ B21 | B22 0 0 ] +*> [ 0 | 0 0 I ] +*> +*> [ C | -S 0 0 ] +*> [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H +*> = [---------] [---------------] [---------] . +*> [ | U2 ] [ S | C 0 0 ] [ | V2 ] +*> [ 0 | 0 0 I ] +*> +*> X is M-by-M, its top-left block is P-by-Q, and Q must be no larger +*> than P, M-P, or M-Q. (If Q is not the smallest index, then X must be +*> transposed and/or permuted. This can be done in constant time using +*> the TRANS and SIGNS options. See ZUNCSD for details.) +*> +*> The bidiagonal matrices B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1:Q) and PHI(1:Q-1). +*> +*> The unitary matrices U1, U2, V1T, and V2T are input/output. +*> The input matrices are pre- or post-multiplied by the appropriate +*> singular vector matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is updated; +*> otherwise: U1 is not updated. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is updated; +*> otherwise: U2 is not updated. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is updated; +*> otherwise: V1T is not updated. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is updated; +*> otherwise: V2T is not updated. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X, the unitary matrix in +*> bidiagonal-block form. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in the top-left block of X. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in the top-left block of X. +*> 0 <= Q <= MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> On entry, the angles THETA(1),...,THETA(Q) that, along with +*> PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block +*> form. On exit, the angles whose cosines and sines define the +*> diagonal blocks in the CS decomposition. +*> \endverbatim +*> +*> \param[in,out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., +*> THETA(Q), define the matrix in bidiagonal-block form. +*> \endverbatim +*> +*> \param[in,out] U1 +*> \verbatim +*> U1 is COMPLEX*16 array, dimension (LDU1,P) +*> On entry, a P-by-P matrix. On exit, U1 is postmultiplied +*> by the left singular vector matrix common to [ B11 ; 0 ] and +*> [ B12 0 0 ; 0 -I 0 0 ]. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of the array U1, LDU1 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] U2 +*> \verbatim +*> U2 is COMPLEX*16 array, dimension (LDU2,M-P) +*> On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is +*> postmultiplied by the left singular vector matrix common to +*> [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of the array U2, LDU2 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] V1T +*> \verbatim +*> V1T is COMPLEX*16 array, dimension (LDV1T,Q) +*> On entry, a Q-by-Q matrix. On exit, V1T is premultiplied +*> by the conjugate transpose of the right singular vector +*> matrix common to [ B11 ; 0 ] and [ B21 ; 0 ]. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of the array V1T, LDV1T >= MAX(1,Q). +*> \endverbatim +*> +*> \param[in,out] V2T +*> \verbatim +*> V2T is COMPLEX*16 array, dimension (LDV2T,M-Q) +*> On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is +*> premultiplied by the conjugate transpose of the right +*> singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and +*> [ B22 0 0 ; 0 0 I ]. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] B11D +*> \verbatim +*> B11D is DOUBLE PRECISION array, dimension (Q) +*> When ZBBCSD converges, B11D contains the cosines of THETA(1), +*> ..., THETA(Q). If ZBBCSD fails to converge, then B11D +*> contains the diagonal of the partially reduced top-left +*> block. +*> \endverbatim +*> +*> \param[out] B11E +*> \verbatim +*> B11E is DOUBLE PRECISION array, dimension (Q-1) +*> When ZBBCSD converges, B11E contains zeros. If ZBBCSD fails +*> to converge, then B11E contains the superdiagonal of the +*> partially reduced top-left block. +*> \endverbatim +*> +*> \param[out] B12D +*> \verbatim +*> B12D is DOUBLE PRECISION array, dimension (Q) +*> When ZBBCSD converges, B12D contains the negative sines of +*> THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then +*> B12D contains the diagonal of the partially reduced top-right +*> block. +*> \endverbatim +*> +*> \param[out] B12E +*> \verbatim +*> B12E is DOUBLE PRECISION array, dimension (Q-1) +*> When ZBBCSD converges, B12E contains zeros. If ZBBCSD fails +*> to converge, then B12E contains the subdiagonal of the +*> partially reduced top-right block. +*> \endverbatim +*> +*> \param[out] B21D +*> \verbatim +*> B21D is DOUBLE PRECISION array, dimension (Q) +*> When ZBBCSD converges, B21D contains the negative sines of +*> THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then +*> B21D contains the diagonal of the partially reduced bottom-left +*> block. +*> \endverbatim +*> +*> \param[out] B21E +*> \verbatim +*> B21E is DOUBLE PRECISION array, dimension (Q-1) +*> When ZBBCSD converges, B21E contains zeros. If ZBBCSD fails +*> to converge, then B21E contains the subdiagonal of the +*> partially reduced bottom-left block. +*> \endverbatim +*> +*> \param[out] B22D +*> \verbatim +*> B22D is DOUBLE PRECISION array, dimension (Q) +*> When ZBBCSD converges, B22D contains the negative sines of +*> THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then +*> B22D contains the diagonal of the partially reduced bottom-right +*> block. +*> \endverbatim +*> +*> \param[out] B22E +*> \verbatim +*> B22E is DOUBLE PRECISION array, dimension (Q-1) +*> When ZBBCSD converges, B22E contains zeros. If ZBBCSD fails +*> to converge, then B22E contains the subdiagonal of the +*> partially reduced bottom-right block. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. LRWORK >= MAX(1,8*Q). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the RWORK array, +*> returns this value as the first entry of the work array, and +*> no error message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if ZBBCSD did not converge, INFO specifies the number +*> of nonzero entries in PHI, and B11D, B11E, etc., +*> contain the partially reduced matrix. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they +*> are within TOLMUL*EPS of either bound. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E, + $ B22D, B22E, RWORK, LRWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q +* .. +* .. Array Arguments .. + DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ), + $ B21D( * ), B21E( * ), B22D( * ), B22E( * ), + $ PHI( * ), THETA( * ), RWORK( * ) + COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) + DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO + PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0, + $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0, + $ TEN = 10.0D0, ZERO = 0.0D0 ) + COMPLEX*16 NEGONECOMPLEX + PARAMETER ( NEGONECOMPLEX = (-1.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12, + $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T, + $ WANTV2T + INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS, + $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J, + $ LRWORKMIN, LRWORKOPT, MAXIT, MINI + DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY, + $ EPS, MU, NU, R, SIGMA11, SIGMA21, + $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL, + $ UNFL, X1, X2, Y1, Y2 +* + EXTERNAL DLARTGP, DLARTGS, DLAS2, XERBLA, ZLASR, ZSCAL, + $ ZSWAP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LRWORK .EQ. -1 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) +* + IF( M .LT. 0 ) THEN + INFO = -6 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -7 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -8 + ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN + INFO = -8 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -12 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -14 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -16 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -18 + END IF +* +* Quick return if Q = 0 +* + IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN + LRWORKMIN = 1 + RWORK(1) = LRWORKMIN + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + IU1CS = 1 + IU1SN = IU1CS + Q + IU2CS = IU1SN + Q + IU2SN = IU2CS + Q + IV1TCS = IU2SN + Q + IV1TSN = IV1TCS + Q + IV2TCS = IV1TSN + Q + IV2TSN = IV2TCS + Q + LRWORKOPT = IV2TSN + Q - 1 + LRWORKMIN = LRWORKOPT + RWORK(1) = LRWORKOPT + IF( LRWORK .LT. LRWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -28 + END IF + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZBBCSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) + TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) ) + TOL = TOLMUL*EPS + THRESH = MAX( TOL, MAXITR*Q*Q*UNFL ) +* +* Test for negligible sines or cosines +* + DO I = 1, Q + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = 1, Q-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Initial deflation +* + IMAX = Q + DO WHILE( IMAX .GT. 1 ) + IF( PHI(IMAX-1) .NE. ZERO ) THEN + EXIT + END IF + IMAX = IMAX - 1 + END DO + IMIN = IMAX - 1 + IF ( IMIN .GT. 1 ) THEN + DO WHILE( PHI(IMIN-1) .NE. ZERO ) + IMIN = IMIN - 1 + IF ( IMIN .LE. 1 ) EXIT + END DO + END IF +* +* Initialize iteration counter +* + MAXIT = MAXITR*Q*Q + ITER = 0 +* +* Begin main iteration loop +* + DO WHILE( IMAX .GT. 1 ) +* +* Compute the matrix entries +* + B11D(IMIN) = COS( THETA(IMIN) ) + B21D(IMIN) = -SIN( THETA(IMIN) ) + DO I = IMIN, IMAX - 1 + B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) ) + B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) ) + B12D(I) = SIN( THETA(I) ) * COS( PHI(I) ) + B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) ) + B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) ) + B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) ) + B22D(I) = COS( THETA(I) ) * COS( PHI(I) ) + B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) ) + END DO + B12D(IMAX) = SIN( THETA(IMAX) ) + B22D(IMAX) = COS( THETA(IMAX) ) +* +* Abort if not converging; otherwise, increment ITER +* + IF( ITER .GT. MAXIT ) THEN + INFO = 0 + DO I = 1, Q + IF( PHI(I) .NE. ZERO ) + $ INFO = INFO + 1 + END DO + RETURN + END IF +* + ITER = ITER + IMAX - IMIN +* +* Compute shifts +* + THETAMAX = THETA(IMIN) + THETAMIN = THETA(IMIN) + DO I = IMIN+1, IMAX + IF( THETA(I) > THETAMAX ) + $ THETAMAX = THETA(I) + IF( THETA(I) < THETAMIN ) + $ THETAMIN = THETA(I) + END DO +* + IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN +* +* Zero on diagonals of B11 and B22; induce deflation with a +* zero shift +* + MU = ZERO + NU = ONE +* + ELSE IF( THETAMIN .LT. THRESH ) THEN +* +* Zero on diagonals of B12 and B22; induce deflation with a +* zero shift +* + MU = ONE + NU = ZERO +* + ELSE +* +* Compute shifts for B11 and B21 and use the lesser +* + CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11, + $ DUMMY ) + CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21, + $ DUMMY ) +* + IF( SIGMA11 .LE. SIGMA21 ) THEN + MU = SIGMA11 + NU = SQRT( ONE - MU**2 ) + IF( MU .LT. THRESH ) THEN + MU = ZERO + NU = ONE + END IF + ELSE + NU = SIGMA21 + MU = SQRT( 1.0 - NU**2 ) + IF( NU .LT. THRESH ) THEN + MU = ONE + NU = ZERO + END IF + END IF + END IF +* +* Rotate to produce bulges in B11 and B21 +* + IF( MU .LE. NU ) THEN + CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) ) + ELSE + CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) ) + END IF +* + TEMP = RWORK(IV1TCS+IMIN-1)*B11D(IMIN) + + $ RWORK(IV1TSN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = RWORK(IV1TCS+IMIN-1)*B11E(IMIN) - + $ RWORK(IV1TSN+IMIN-1)*B11D(IMIN) + B11D(IMIN) = TEMP + B11BULGE = RWORK(IV1TSN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B11D(IMIN+1) + TEMP = RWORK(IV1TCS+IMIN-1)*B21D(IMIN) + + $ RWORK(IV1TSN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = RWORK(IV1TCS+IMIN-1)*B21E(IMIN) - + $ RWORK(IV1TSN+IMIN-1)*B21D(IMIN) + B21D(IMIN) = TEMP + B21BULGE = RWORK(IV1TSN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B21D(IMIN+1) +* +* Compute THETA(IMIN) +* + THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ), + $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) ) +* +* Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN) +* + IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN + CALL DLARTGP( B11BULGE, B11D(IMIN), RWORK(IU1SN+IMIN-1), + $ RWORK(IU1CS+IMIN-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) ) + ELSE + CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) ) + END IF + IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN + CALL DLARTGP( B21BULGE, B21D(IMIN), RWORK(IU2SN+IMIN-1), + $ RWORK(IU2CS+IMIN-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) ) + ELSE + CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) ) + END IF + RWORK(IU2CS+IMIN-1) = -RWORK(IU2CS+IMIN-1) + RWORK(IU2SN+IMIN-1) = -RWORK(IU2SN+IMIN-1) +* + TEMP = RWORK(IU1CS+IMIN-1)*B11E(IMIN) + + $ RWORK(IU1SN+IMIN-1)*B11D(IMIN+1) + B11D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11D(IMIN+1) - + $ RWORK(IU1SN+IMIN-1)*B11E(IMIN) + B11E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B11BULGE = RWORK(IU1SN+IMIN-1)*B11E(IMIN+1) + B11E(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11E(IMIN+1) + END IF + TEMP = RWORK(IU1CS+IMIN-1)*B12D(IMIN) + + $ RWORK(IU1SN+IMIN-1)*B12E(IMIN) + B12E(IMIN) = RWORK(IU1CS+IMIN-1)*B12E(IMIN) - + $ RWORK(IU1SN+IMIN-1)*B12D(IMIN) + B12D(IMIN) = TEMP + B12BULGE = RWORK(IU1SN+IMIN-1)*B12D(IMIN+1) + B12D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B12D(IMIN+1) + TEMP = RWORK(IU2CS+IMIN-1)*B21E(IMIN) + + $ RWORK(IU2SN+IMIN-1)*B21D(IMIN+1) + B21D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21D(IMIN+1) - + $ RWORK(IU2SN+IMIN-1)*B21E(IMIN) + B21E(IMIN) = TEMP + IF( IMAX .GT. IMIN+1 ) THEN + B21BULGE = RWORK(IU2SN+IMIN-1)*B21E(IMIN+1) + B21E(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21E(IMIN+1) + END IF + TEMP = RWORK(IU2CS+IMIN-1)*B22D(IMIN) + + $ RWORK(IU2SN+IMIN-1)*B22E(IMIN) + B22E(IMIN) = RWORK(IU2CS+IMIN-1)*B22E(IMIN) - + $ RWORK(IU2SN+IMIN-1)*B22D(IMIN) + B22D(IMIN) = TEMP + B22BULGE = RWORK(IU2SN+IMIN-1)*B22D(IMIN+1) + B22D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B22D(IMIN+1) +* +* Inner loop: chase bulges from B11(IMIN,IMIN+2), +* B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to +* bottom-right +* + DO I = IMIN+1, IMAX-1 +* +* Compute PHI(I-1) +* + X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1) + X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE + Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1) + Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE +* + PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I-1,I+1), B12(I-1,I), +* B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL DLARTGP( X2, X1, RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN + CALL DLARTGP( B11BULGE, B11E(I-1), RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN + CALL DLARTGP( B21BULGE, B21E(I-1), RWORK(IV1TSN+I-1), + $ RWORK(IV1TCS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11D(I), B11E(I), MU, RWORK(IV1TCS+I-1), + $ RWORK(IV1TSN+I-1) ) + ELSE + CALL DLARTGS( B21D(I), B21E(I), NU, RWORK(IV1TCS+I-1), + $ RWORK(IV1TSN+I-1) ) + END IF + RWORK(IV1TCS+I-1) = -RWORK(IV1TCS+I-1) + RWORK(IV1TSN+I-1) = -RWORK(IV1TSN+I-1) + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL DLARTGP( B12BULGE, B12D(I-1), RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22D(I-1), RWORK(IV2TSN+I-1-1), + $ RWORK(IV2TCS+I-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B12E(I-1), B12D(I), NU, + $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) ) + ELSE + CALL DLARTGS( B22E(I-1), B22D(I), MU, + $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) ) + END IF +* + TEMP = RWORK(IV1TCS+I-1)*B11D(I) + RWORK(IV1TSN+I-1)*B11E(I) + B11E(I) = RWORK(IV1TCS+I-1)*B11E(I) - + $ RWORK(IV1TSN+I-1)*B11D(I) + B11D(I) = TEMP + B11BULGE = RWORK(IV1TSN+I-1)*B11D(I+1) + B11D(I+1) = RWORK(IV1TCS+I-1)*B11D(I+1) + TEMP = RWORK(IV1TCS+I-1)*B21D(I) + RWORK(IV1TSN+I-1)*B21E(I) + B21E(I) = RWORK(IV1TCS+I-1)*B21E(I) - + $ RWORK(IV1TSN+I-1)*B21D(I) + B21D(I) = TEMP + B21BULGE = RWORK(IV1TSN+I-1)*B21D(I+1) + B21D(I+1) = RWORK(IV1TCS+I-1)*B21D(I+1) + TEMP = RWORK(IV2TCS+I-1-1)*B12E(I-1) + + $ RWORK(IV2TSN+I-1-1)*B12D(I) + B12D(I) = RWORK(IV2TCS+I-1-1)*B12D(I) - + $ RWORK(IV2TSN+I-1-1)*B12E(I-1) + B12E(I-1) = TEMP + B12BULGE = RWORK(IV2TSN+I-1-1)*B12E(I) + B12E(I) = RWORK(IV2TCS+I-1-1)*B12E(I) + TEMP = RWORK(IV2TCS+I-1-1)*B22E(I-1) + + $ RWORK(IV2TSN+I-1-1)*B22D(I) + B22D(I) = RWORK(IV2TCS+I-1-1)*B22D(I) - + $ RWORK(IV2TSN+I-1-1)*B22E(I-1) + B22E(I-1) = TEMP + B22BULGE = RWORK(IV2TSN+I-1-1)*B22E(I) + B22E(I) = RWORK(IV2TCS+I-1-1)*B22E(I) +* +* Compute THETA(I) +* + X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1) + X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE + Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1) + Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE +* + THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) ) +* +* Determine if there are bulges to chase or if a new direct +* summand has been reached +* + RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2 + RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2 + RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2 +* +* If possible, chase bulges from B11(I+1,I), B12(I+1,I-1), +* B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge- +* chasing by applying the original shift again. +* + IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL DLARTGP( X2, X1, RWORK(IU1SN+I-1), RWORK(IU1CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN + CALL DLARTGP( B11BULGE, B11D(I), RWORK(IU1SN+I-1), + $ RWORK(IU1CS+I-1), R ) + ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN + CALL DLARTGP( B12BULGE, B12E(I-1), RWORK(IU1SN+I-1), + $ RWORK(IU1CS+I-1), R ) + ELSE IF( MU .LE. NU ) THEN + CALL DLARTGS( B11E(I), B11D(I+1), MU, RWORK(IU1CS+I-1), + $ RWORK(IU1SN+I-1) ) + ELSE + CALL DLARTGS( B12D(I), B12E(I), NU, RWORK(IU1CS+I-1), + $ RWORK(IU1SN+I-1) ) + END IF + IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, RWORK(IU2SN+I-1), RWORK(IU2CS+I-1), + $ R ) + ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN + CALL DLARTGP( B21BULGE, B21D(I), RWORK(IU2SN+I-1), + $ RWORK(IU2CS+I-1), R ) + ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22E(I-1), RWORK(IU2SN+I-1), + $ RWORK(IU2CS+I-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B21E(I), B21E(I+1), NU, RWORK(IU2CS+I-1), + $ RWORK(IU2SN+I-1) ) + ELSE + CALL DLARTGS( B22D(I), B22E(I), MU, RWORK(IU2CS+I-1), + $ RWORK(IU2SN+I-1) ) + END IF + RWORK(IU2CS+I-1) = -RWORK(IU2CS+I-1) + RWORK(IU2SN+I-1) = -RWORK(IU2SN+I-1) +* + TEMP = RWORK(IU1CS+I-1)*B11E(I) + RWORK(IU1SN+I-1)*B11D(I+1) + B11D(I+1) = RWORK(IU1CS+I-1)*B11D(I+1) - + $ RWORK(IU1SN+I-1)*B11E(I) + B11E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B11BULGE = RWORK(IU1SN+I-1)*B11E(I+1) + B11E(I+1) = RWORK(IU1CS+I-1)*B11E(I+1) + END IF + TEMP = RWORK(IU2CS+I-1)*B21E(I) + RWORK(IU2SN+I-1)*B21D(I+1) + B21D(I+1) = RWORK(IU2CS+I-1)*B21D(I+1) - + $ RWORK(IU2SN+I-1)*B21E(I) + B21E(I) = TEMP + IF( I .LT. IMAX - 1 ) THEN + B21BULGE = RWORK(IU2SN+I-1)*B21E(I+1) + B21E(I+1) = RWORK(IU2CS+I-1)*B21E(I+1) + END IF + TEMP = RWORK(IU1CS+I-1)*B12D(I) + RWORK(IU1SN+I-1)*B12E(I) + B12E(I) = RWORK(IU1CS+I-1)*B12E(I) - + $ RWORK(IU1SN+I-1)*B12D(I) + B12D(I) = TEMP + B12BULGE = RWORK(IU1SN+I-1)*B12D(I+1) + B12D(I+1) = RWORK(IU1CS+I-1)*B12D(I+1) + TEMP = RWORK(IU2CS+I-1)*B22D(I) + RWORK(IU2SN+I-1)*B22E(I) + B22E(I) = RWORK(IU2CS+I-1)*B22E(I) - + $ RWORK(IU2SN+I-1)*B22D(I) + B22D(I) = TEMP + B22BULGE = RWORK(IU2SN+I-1)*B22D(I+1) + B22D(I+1) = RWORK(IU2CS+I-1)*B22D(I+1) +* + END DO +* +* Compute PHI(IMAX-1) +* + X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) + + $ COS(THETA(IMAX-1))*B21E(IMAX-1) + Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) + + $ COS(THETA(IMAX-1))*B22D(IMAX-1) + Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE +* + PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) ) +* +* Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX) +* + RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2 + RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2 +* + IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN + CALL DLARTGP( B12BULGE, B12D(IMAX-1), + $ RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN + CALL DLARTGP( B22BULGE, B22D(IMAX-1), + $ RWORK(IV2TSN+IMAX-1-1), + $ RWORK(IV2TCS+IMAX-1-1), R ) + ELSE IF( NU .LT. MU ) THEN + CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU, + $ RWORK(IV2TCS+IMAX-1-1), + $ RWORK(IV2TSN+IMAX-1-1) ) + ELSE + CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU, + $ RWORK(IV2TCS+IMAX-1-1), + $ RWORK(IV2TSN+IMAX-1-1) ) + END IF +* + TEMP = RWORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) + + $ RWORK(IV2TSN+IMAX-1-1)*B12D(IMAX) + B12D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B12D(IMAX) - + $ RWORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1) + B12E(IMAX-1) = TEMP + TEMP = RWORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) + + $ RWORK(IV2TSN+IMAX-1-1)*B22D(IMAX) + B22D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B22D(IMAX) - + $ RWORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1) + B22E(IMAX-1) = TEMP +* +* Update singular vectors +* + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL ZLASR( 'R', 'V', 'F', P, IMAX-IMIN+1, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1), + $ U1(1,IMIN), LDU1 ) + ELSE + CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, P, + $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1), + $ U1(IMIN,1), LDU1 ) + END IF + END IF + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL ZLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1), + $ U2(1,IMIN), LDU2 ) + ELSE + CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P, + $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1), + $ U2(IMIN,1), LDU2 ) + END IF + END IF + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1), + $ V1T(IMIN,1), LDV1T ) + ELSE + CALL ZLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1, + $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1), + $ V1T(1,IMIN), LDV1T ) + END IF + END IF + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q, + $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1), + $ V2T(IMIN,1), LDV2T ) + ELSE + CALL ZLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1, + $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1), + $ V2T(1,IMIN), LDV2T ) + END IF + END IF +* +* Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX) +* + IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN + B11D(IMAX) = -B11D(IMAX) + B21D(IMAX) = -B21D(IMAX) + IF( WANTV1T ) THEN + IF( COLMAJOR ) THEN + CALL ZSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T ) + ELSE + CALL ZSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Compute THETA(IMAX) +* + X1 = COS(PHI(IMAX-1))*B11D(IMAX) + + $ SIN(PHI(IMAX-1))*B12E(IMAX-1) + Y1 = COS(PHI(IMAX-1))*B21D(IMAX) + + $ SIN(PHI(IMAX-1))*B22E(IMAX-1) +* + THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) ) +* +* Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX), +* and B22(IMAX,IMAX-1) +* + IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN + B12D(IMAX) = -B12D(IMAX) + IF( WANTU1 ) THEN + IF( COLMAJOR ) THEN + CALL ZSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 ) + ELSE + CALL ZSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 ) + END IF + END IF + END IF + IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN + B22D(IMAX) = -B22D(IMAX) + IF( WANTU2 ) THEN + IF( COLMAJOR ) THEN + CALL ZSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 ) + ELSE + CALL ZSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 ) + END IF + END IF + END IF +* +* Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX) +* + IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN + IF( WANTV2T ) THEN + IF( COLMAJOR ) THEN + CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T ) + ELSE + CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 ) + END IF + END IF + END IF +* +* Test for negligible sines or cosines +* + DO I = IMIN, IMAX + IF( THETA(I) .LT. THRESH ) THEN + THETA(I) = ZERO + ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN + THETA(I) = PIOVER2 + END IF + END DO + DO I = IMIN, IMAX-1 + IF( PHI(I) .LT. THRESH ) THEN + PHI(I) = ZERO + ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN + PHI(I) = PIOVER2 + END IF + END DO +* +* Deflate +* + IF (IMAX .GT. 1) THEN + DO WHILE( PHI(IMAX-1) .EQ. ZERO ) + IMAX = IMAX - 1 + IF (IMAX .LE. 1) EXIT + END DO + END IF + IF( IMIN .GT. IMAX - 1 ) + $ IMIN = IMAX - 1 + IF (IMIN .GT. 1) THEN + DO WHILE (PHI(IMIN-1) .NE. ZERO) + IMIN = IMIN - 1 + IF (IMIN .LE. 1) EXIT + END DO + END IF +* +* Repeat main iteration loop +* + END DO +* +* Postprocessing: order THETA from least to greatest +* + DO I = 1, Q +* + MINI = I + THETAMIN = THETA(I) + DO J = I+1, Q + IF( THETA(J) .LT. THETAMIN ) THEN + MINI = J + THETAMIN = THETA(J) + END IF + END DO +* + IF( MINI .NE. I ) THEN + THETA(MINI) = THETA(I) + THETA(I) = THETAMIN + IF( COLMAJOR ) THEN + IF( WANTU1 ) + $ CALL ZSWAP( P, U1(1,I), 1, U1(1,MINI), 1 ) + IF( WANTU2 ) + $ CALL ZSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 ) + IF( WANTV1T ) + $ CALL ZSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T ) + IF( WANTV2T ) + $ CALL ZSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1), + $ LDV2T ) + ELSE + IF( WANTU1 ) + $ CALL ZSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 ) + IF( WANTU2 ) + $ CALL ZSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 ) + IF( WANTV1T ) + $ CALL ZSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 ) + IF( WANTV2T ) + $ CALL ZSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 ) + END IF + END IF +* + END DO +* + RETURN +* +* End of ZBBCSD +* + END + diff --git a/dspl/liblapack/SRC/zbdsqr.f b/dspl/liblapack/SRC/zbdsqr.f new file mode 100644 index 0000000..66281bb --- /dev/null +++ b/dspl/liblapack/SRC/zbdsqr.f @@ -0,0 +1,842 @@ +*> \brief \b ZBDSQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZBDSQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, +* LDU, C, LDC, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) +* COMPLEX*16 C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZBDSQR computes the singular values and, optionally, the right and/or +*> left singular vectors from the singular value decomposition (SVD) of +*> a real N-by-N (upper or lower) bidiagonal matrix B using the implicit +*> zero-shift QR algorithm. The SVD of B has the form +*> +*> B = Q * S * P**H +*> +*> where S is the diagonal matrix of singular values, Q is an orthogonal +*> matrix of left singular vectors, and P is an orthogonal matrix of +*> right singular vectors. If left singular vectors are requested, this +*> subroutine actually returns U*Q instead of Q, and, if right singular +*> vectors are requested, this subroutine returns P**H*VT instead of +*> P**H, for given complex input matrices U and VT. When U and VT are +*> the unitary matrices that reduce a general matrix A to bidiagonal +*> form: A = U*B*VT, as computed by ZGEBRD, then +*> +*> A = (U*Q) * S * (P**H*VT) +*> +*> is the SVD of A. Optionally, the subroutine may also compute Q**H*C +*> for a given complex input matrix C. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices With +*> Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan, +*> LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11, +*> no. 5, pp. 873-912, Sept 1990) and +*> "Accurate singular values and differential qd algorithms," by +*> B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics +*> Department, University of California at Berkeley, July 1992 +*> for a detailed description of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': B is upper bidiagonal; +*> = 'L': B is lower bidiagonal. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] NCVT +*> \verbatim +*> NCVT is INTEGER +*> The number of columns of the matrix VT. NCVT >= 0. +*> \endverbatim +*> +*> \param[in] NRU +*> \verbatim +*> NRU is INTEGER +*> The number of rows of the matrix U. NRU >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the bidiagonal matrix B. +*> On exit, if INFO=0, the singular values of B in decreasing +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the N-1 offdiagonal elements of the bidiagonal +*> matrix B. +*> On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E +*> will contain the diagonal and superdiagonal elements of a +*> bidiagonal matrix orthogonally equivalent to the one given +*> as input. +*> \endverbatim +*> +*> \param[in,out] VT +*> \verbatim +*> VT is COMPLEX*16 array, dimension (LDVT, NCVT) +*> On entry, an N-by-NCVT matrix VT. +*> On exit, VT is overwritten by P**H * VT. +*> Not referenced if NCVT = 0. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. +*> LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU, N) +*> On entry, an NRU-by-N matrix U. +*> On exit, U is overwritten by U * Q. +*> Not referenced if NRU = 0. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,NRU). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, NCC) +*> On entry, an N-by-NCC matrix C. +*> On exit, C is overwritten by Q**H * C. +*> Not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm did not converge; D and E contain the +*> elements of a bidiagonal matrix which is orthogonally +*> similar to the input matrix B; if INFO = i, i +*> elements of E have not converged to zero. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLMUL DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8))) +*> TOLMUL controls the convergence criterion of the QR loop. +*> If it is positive, TOLMUL*EPS is the desired relative +*> precision in the computed singular values. +*> If it is negative, abs(TOLMUL*EPS*sigma_max) is the +*> desired absolute accuracy in the computed singular +*> values (corresponds to relative accuracy +*> abs(TOLMUL*EPS) in the largest singular value. +*> abs(TOLMUL) should be between 1 and 1/EPS, and preferably +*> between 10 (for fast convergence) and .1/EPS +*> (for there to be some accuracy in the results). +*> Default is to lose at either one eighth or 2 of the +*> available decimal digits in each computed singular value +*> (whichever is smaller). +*> +*> MAXITR INTEGER, default = 6 +*> MAXITR controls the maximum number of passes of the +*> algorithm through its inner loop. The algorithms stops +*> (and so fails to converge) if the number of passes +*> through the inner loop exceeds MAXITR*N**2. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, + $ LDU, C, LDC, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 C( LDC, * ), U( LDU, * ), VT( LDVT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + DOUBLE PRECISION NEGONE + PARAMETER ( NEGONE = -1.0D0 ) + DOUBLE PRECISION HNDRTH + PARAMETER ( HNDRTH = 0.01D0 ) + DOUBLE PRECISION TEN + PARAMETER ( TEN = 10.0D0 ) + DOUBLE PRECISION HNDRD + PARAMETER ( HNDRD = 100.0D0 ) + DOUBLE PRECISION MEIGTH + PARAMETER ( MEIGTH = -0.125D0 ) + INTEGER MAXITR + PARAMETER ( MAXITR = 6 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, ROTATE + INTEGER I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1, + $ NM12, NM13, OLDLL, OLDM + DOUBLE PRECISION ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU, + $ OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL, + $ SINR, SLL, SMAX, SMIN, SMINL, SMINOA, + $ SN, THRESH, TOL, TOLMUL, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, DLAS2, DLASQ1, DLASV2, XERBLA, ZDROT, + $ ZDSCAL, ZLASR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NCVT.LT.0 ) THEN + INFO = -3 + ELSE IF( NRU.LT.0 ) THEN + INFO = -4 + ELSE IF( NCC.LT.0 ) THEN + INFO = -5 + ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR. + $ ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN + INFO = -11 + ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR. + $ ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZBDSQR', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) + $ GO TO 160 +* +* ROTATE is true if any singular vectors desired, false otherwise +* + ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 ) +* +* If no singular vectors desired, use qd algorithm +* + IF( .NOT.ROTATE ) THEN + CALL DLASQ1( N, D, E, RWORK, INFO ) +* +* If INFO equals 2, dqds didn't finish, try to finish +* + IF( INFO .NE. 2 ) RETURN + INFO = 0 + END IF +* + NM1 = N - 1 + NM12 = NM1 + NM1 + NM13 = NM12 + NM1 + IDIR = 0 +* +* Get machine constants +* + EPS = DLAMCH( 'Epsilon' ) + UNFL = DLAMCH( 'Safe minimum' ) +* +* If matrix lower bidiagonal, rotate to be upper bidiagonal +* by applying Givens rotations on the left +* + IF( LOWER ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + RWORK( I ) = CS + RWORK( NM1+I ) = SN + 10 CONTINUE +* +* Update singular vectors if desired +* + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ), + $ U, LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ), + $ C, LDC ) + END IF +* +* Compute singular values to relative accuracy TOL +* (By setting TOL to be negative, algorithm will compute +* singular values to absolute accuracy ABS(TOL)*norm(input matrix)) +* + TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) ) + TOL = TOLMUL*EPS +* +* Compute approximate maximum, minimum singular values +* + SMAX = ZERO + DO 20 I = 1, N + SMAX = MAX( SMAX, ABS( D( I ) ) ) + 20 CONTINUE + DO 30 I = 1, N - 1 + SMAX = MAX( SMAX, ABS( E( I ) ) ) + 30 CONTINUE + SMINL = ZERO + IF( TOL.GE.ZERO ) THEN +* +* Relative accuracy desired +* + SMINOA = ABS( D( 1 ) ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + MU = SMINOA + DO 40 I = 2, N + MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) ) + SMINOA = MIN( SMINOA, MU ) + IF( SMINOA.EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + SMINOA = SMINOA / SQRT( DBLE( N ) ) + THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL ) + ELSE +* +* Absolute accuracy desired +* + THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL ) + END IF +* +* Prepare for main iteration loop for the singular values +* (MAXIT is the maximum number of passes through the inner +* loop permitted before nonconvergence signalled.) +* + MAXIT = MAXITR*N*N + ITER = 0 + OLDLL = -1 + OLDM = -1 +* +* M points to last element of unconverged part of matrix +* + M = N +* +* Begin main iteration loop +* + 60 CONTINUE +* +* Check for convergence or exceeding iteration count +* + IF( M.LE.1 ) + $ GO TO 160 + IF( ITER.GT.MAXIT ) + $ GO TO 200 +* +* Find diagonal block of matrix to work on +* + IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH ) + $ D( M ) = ZERO + SMAX = ABS( D( M ) ) + SMIN = SMAX + DO 70 LLL = 1, M - 1 + LL = M - LLL + ABSS = ABS( D( LL ) ) + ABSE = ABS( E( LL ) ) + IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH ) + $ D( LL ) = ZERO + IF( ABSE.LE.THRESH ) + $ GO TO 80 + SMIN = MIN( SMIN, ABSS ) + SMAX = MAX( SMAX, ABSS, ABSE ) + 70 CONTINUE + LL = 0 + GO TO 90 + 80 CONTINUE + E( LL ) = ZERO +* +* Matrix splits since E(LL) = 0 +* + IF( LL.EQ.M-1 ) THEN +* +* Convergence of bottom singular value, return to top of loop +* + M = M - 1 + GO TO 60 + END IF + 90 CONTINUE + LL = LL + 1 +* +* E(LL) through E(M-1) are nonzero, E(LL-1) is zero +* + IF( LL.EQ.M-1 ) THEN +* +* 2 by 2 block, handle separately +* + CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR, + $ COSR, SINL, COSL ) + D( M-1 ) = SIGMX + E( M-1 ) = ZERO + D( M ) = SIGMN +* +* Compute singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL ZDROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, + $ COSR, SINR ) + IF( NRU.GT.0 ) + $ CALL ZDROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL ) + IF( NCC.GT.0 ) + $ CALL ZDROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL, + $ SINL ) + M = M - 2 + GO TO 60 + END IF +* +* If working on new submatrix, choose shift direction +* (from larger end diagonal element towards smaller) +* + IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN + IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN +* +* Chase bulge from top (big end) to bottom (small end) +* + IDIR = 1 + ELSE +* +* Chase bulge from bottom (big end) to top (small end) +* + IDIR = 2 + END IF + END IF +* +* Apply convergence tests +* + IF( IDIR.EQ.1 ) THEN +* +* Run convergence test in forward direction +* First apply standard test to bottom of matrix +* + IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN + E( M-1 ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion forward +* + MU = ABS( D( LL ) ) + SMINL = MU + DO 100 LLL = LL, M - 1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 100 CONTINUE + END IF +* + ELSE +* +* Run convergence test in backward direction +* First apply standard test to top of matrix +* + IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR. + $ ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN + E( LL ) = ZERO + GO TO 60 + END IF +* + IF( TOL.GE.ZERO ) THEN +* +* If relative accuracy desired, +* apply convergence criterion backward +* + MU = ABS( D( M ) ) + SMINL = MU + DO 110 LLL = M - 1, LL, -1 + IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN + E( LLL ) = ZERO + GO TO 60 + END IF + MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) ) + SMINL = MIN( SMINL, MU ) + 110 CONTINUE + END IF + END IF + OLDLL = LL + OLDM = M +* +* Compute shift. First, test if shifting would ruin relative +* accuracy, and if so set the shift to zero. +* + IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE. + $ MAX( EPS, HNDRTH*TOL ) ) THEN +* +* Use a zero shift to avoid loss of relative accuracy +* + SHIFT = ZERO + ELSE +* +* Compute the shift from 2-by-2 block at end of matrix +* + IF( IDIR.EQ.1 ) THEN + SLL = ABS( D( LL ) ) + CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R ) + ELSE + SLL = ABS( D( M ) ) + CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R ) + END IF +* +* Test if shift negligible, and if so set to zero +* + IF( SLL.GT.ZERO ) THEN + IF( ( SHIFT / SLL )**2.LT.EPS ) + $ SHIFT = ZERO + END IF + END IF +* +* Increment iteration count +* + ITER = ITER + M - LL +* +* If SHIFT = 0, do simplified QR iteration +* + IF( SHIFT.EQ.ZERO ) THEN + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 120 I = LL, M - 1 + CALL DLARTG( D( I )*CS, E( I ), CS, SN, R ) + IF( I.GT.LL ) + $ E( I-1 ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL+1 ) = CS + RWORK( I-LL+1+NM1 ) = SN + RWORK( I-LL+1+NM12 ) = OLDCS + RWORK( I-LL+1+NM13 ) = OLDSN + 120 CONTINUE + H = D( M )*CS + D( M ) = H*OLDCS + E( M-1 ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + CS = ONE + OLDCS = ONE + DO 130 I = M, LL + 1, -1 + CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R ) + IF( I.LT.M ) + $ E( I ) = OLDSN*R + CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) ) + RWORK( I-LL ) = CS + RWORK( I-LL+NM1 ) = -SN + RWORK( I-LL+NM12 ) = OLDCS + RWORK( I-LL+NM13 ) = -OLDSN + 130 CONTINUE + H = D( LL )*CS + D( LL ) = H*OLDCS + E( LL ) = H*OLDSN +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO + END IF + ELSE +* +* Use nonzero shift +* + IF( IDIR.EQ.1 ) THEN +* +* Chase bulge from top to bottom +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( LL ) )-SHIFT )* + $ ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) ) + G = E( LL ) + DO 140 I = LL, M - 1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.GT.LL ) + $ E( I-1 ) = R + F = COSR*D( I ) + SINR*E( I ) + E( I ) = COSR*E( I ) - SINR*D( I ) + G = SINR*D( I+1 ) + D( I+1 ) = COSR*D( I+1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I ) + SINL*D( I+1 ) + D( I+1 ) = COSL*D( I+1 ) - SINL*E( I ) + IF( I.LT.M-1 ) THEN + G = SINL*E( I+1 ) + E( I+1 ) = COSL*E( I+1 ) + END IF + RWORK( I-LL+1 ) = COSR + RWORK( I-LL+1+NM1 ) = SINR + RWORK( I-LL+1+NM12 ) = COSL + RWORK( I-LL+1+NM13 ) = SINL + 140 CONTINUE + E( M-1 ) = F +* +* Update singular vectors +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ), + $ RWORK( N ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), C( LL, 1 ), LDC ) +* +* Test convergence +* + IF( ABS( E( M-1 ) ).LE.THRESH ) + $ E( M-1 ) = ZERO +* + ELSE +* +* Chase bulge from bottom to top +* Save cosines and sines for later singular vector updates +* + F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT / + $ D( M ) ) + G = E( M-1 ) + DO 150 I = M, LL + 1, -1 + CALL DLARTG( F, G, COSR, SINR, R ) + IF( I.LT.M ) + $ E( I ) = R + F = COSR*D( I ) + SINR*E( I-1 ) + E( I-1 ) = COSR*E( I-1 ) - SINR*D( I ) + G = SINR*D( I-1 ) + D( I-1 ) = COSR*D( I-1 ) + CALL DLARTG( F, G, COSL, SINL, R ) + D( I ) = R + F = COSL*E( I-1 ) + SINL*D( I-1 ) + D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 ) + IF( I.GT.LL+1 ) THEN + G = SINL*E( I-2 ) + E( I-2 ) = COSL*E( I-2 ) + END IF + RWORK( I-LL ) = COSR + RWORK( I-LL+NM1 ) = -SINR + RWORK( I-LL+NM12 ) = COSL + RWORK( I-LL+NM13 ) = -SINL + 150 CONTINUE + E( LL ) = F +* +* Test convergence +* + IF( ABS( E( LL ) ).LE.THRESH ) + $ E( LL ) = ZERO +* +* Update singular vectors if desired +* + IF( NCVT.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ), + $ RWORK( NM13+1 ), VT( LL, 1 ), LDVT ) + IF( NRU.GT.0 ) + $ CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ), + $ RWORK( N ), U( 1, LL ), LDU ) + IF( NCC.GT.0 ) + $ CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ), + $ RWORK( N ), C( LL, 1 ), LDC ) + END IF + END IF +* +* QR iteration finished, go back and check convergence +* + GO TO 60 +* +* All singular values converged, so make them positive +* + 160 CONTINUE + DO 170 I = 1, N + IF( D( I ).LT.ZERO ) THEN + D( I ) = -D( I ) +* +* Change sign of singular vectors, if desired +* + IF( NCVT.GT.0 ) + $ CALL ZDSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT ) + END IF + 170 CONTINUE +* +* Sort the singular values into decreasing order (insertion sort on +* singular values, but only one transposition per singular vector) +* + DO 190 I = 1, N - 1 +* +* Scan for smallest D(I) +* + ISUB = 1 + SMIN = D( 1 ) + DO 180 J = 2, N + 1 - I + IF( D( J ).LE.SMIN ) THEN + ISUB = J + SMIN = D( J ) + END IF + 180 CONTINUE + IF( ISUB.NE.N+1-I ) THEN +* +* Swap singular values and vectors +* + D( ISUB ) = D( N+1-I ) + D( N+1-I ) = SMIN + IF( NCVT.GT.0 ) + $ CALL ZSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ), + $ LDVT ) + IF( NRU.GT.0 ) + $ CALL ZSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 ) + IF( NCC.GT.0 ) + $ CALL ZSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC ) + END IF + 190 CONTINUE + GO TO 220 +* +* Maximum number of iterations exceeded, failure to converge +* + 200 CONTINUE + INFO = 0 + DO 210 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 210 CONTINUE + 220 CONTINUE + RETURN +* +* End of ZBDSQR +* + END diff --git a/dspl/liblapack/SRC/zcgesv.f b/dspl/liblapack/SRC/zcgesv.f new file mode 100644 index 0000000..bb12d4f --- /dev/null +++ b/dspl/liblapack/SRC/zcgesv.f @@ -0,0 +1,448 @@ +*> \brief ZCGESV computes the solution to system of linear equations A * X = B for GE matrices (mixed precision with iterative refinement) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZCGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZCGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, +* SWORK, RWORK, ITER, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX SWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( N, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCGESV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> ZCGESV first attempts to factorize the matrix in COMPLEX and use this +*> factorization within an iterative refinement procedure to produce a +*> solution with COMPLEX*16 normwise backward error quality (see below). +*> If the approach fails the method switches to a COMPLEX*16 +*> factorization and solve. +*> +*> The iterative refinement is not going to be a winning strategy if +*> the ratio COMPLEX performance over COMPLEX*16 performance is too +*> small. A reasonable strategy should take the number of right-hand +*> sides and the size of the matrix into account. This might be done +*> with a call to ILAENV in the future. Up to now, we always try +*> iterative refinement. +*> +*> The iterative refinement process is stopped if +*> ITER > ITERMAX +*> or for all the RHS we have: +*> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX +*> where +*> o ITER is the number of the current iteration in the iterative +*> refinement process +*> o RNRM is the infinity-norm of the residual +*> o XNRM is the infinity-norm of the solution +*> o ANRM is the infinity-operator-norm of the matrix A +*> o EPS is the machine epsilon returned by DLAMCH('Epsilon') +*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 +*> respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, +*> dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, if iterative refinement has been successfully used +*> (INFO.EQ.0 and ITER.GE.0, see description below), then A is +*> unchanged, if double precision factorization has been used +*> (INFO.EQ.0 and ITER.LT.0, see description below), then the +*> array A contains the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> Corresponds either to the single precision factorization +*> (if INFO.EQ.0 and ITER.GE.0) or the double precision +*> factorization (if INFO.EQ.0 and ITER.LT.0). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N,NRHS) +*> This array is used to hold the residual vectors. +*> \endverbatim +*> +*> \param[out] SWORK +*> \verbatim +*> SWORK is COMPLEX array, dimension (N*(N+NRHS)) +*> This array is used to use the single precision matrix and the +*> right-hand sides or solutions in single precision. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ITER +*> \verbatim +*> ITER is INTEGER +*> < 0: iterative refinement has failed, COMPLEX*16 +*> factorization has been performed +*> -1 : the routine fell back to full precision for +*> implementation- or machine-specific reasons +*> -2 : narrowing the precision induced an overflow, +*> the routine fell back to full precision +*> -3 : failure of CGETRF +*> -31: stop the iterative refinement after the 30th +*> iterations +*> > 0: iterative refinement has been successfully used. +*> Returns the number of iterations +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) computed in COMPLEX*16 is exactly +*> zero. The factorization has been completed, but the +*> factor U is exactly singular, so the solution +*> could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZCGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK, + $ SWORK, RWORK, ITER, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX SWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( N, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + LOGICAL DOITREF + PARAMETER ( DOITREF = .TRUE. ) +* + INTEGER ITERMAX + PARAMETER ( ITERMAX = 30 ) +* + DOUBLE PRECISION BWDMAX + PARAMETER ( BWDMAX = 1.0E+00 ) +* + COMPLEX*16 NEGONE, ONE + PARAMETER ( NEGONE = ( -1.0D+00, 0.0D+00 ), + $ ONE = ( 1.0D+00, 0.0D+00 ) ) +* +* .. Local Scalars .. + INTEGER I, IITER, PTSA, PTSX + DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM + COMPLEX*16 ZDUM +* +* .. External Subroutines .. + EXTERNAL CGETRS, CGETRF, CLAG2Z, XERBLA, ZAXPY, ZGEMM, + $ ZLACPY, ZLAG2C, ZGETRF, ZGETRS +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL IZAMAX, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + ITER = 0 +* +* Test the input parameters. +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZCGESV', -INFO ) + RETURN + END IF +* +* Quick return if (N.EQ.0). +* + IF( N.EQ.0 ) + $ RETURN +* +* Skip single precision iterative refinement if a priori slower +* than double precision factorization. +* + IF( .NOT.DOITREF ) THEN + ITER = -1 + GO TO 40 + END IF +* +* Compute some constants. +* + ANRM = ZLANGE( 'I', N, N, A, LDA, RWORK ) + EPS = DLAMCH( 'Epsilon' ) + CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX +* +* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. +* + PTSA = 1 + PTSX = PTSA + N*N +* +* Convert B from double precision to single precision and store the +* result in SX. +* + CALL ZLAG2C( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Convert A from double precision to single precision and store the +* result in SA. +* + CALL ZLAG2C( N, N, A, LDA, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Compute the LU factorization of SA. +* + CALL CGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -3 + GO TO 40 + END IF +* +* Solve the system SA*SX = SB. +* + CALL CGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV, + $ SWORK( PTSX ), N, INFO ) +* +* Convert SX back to double precision +* + CALL CLAG2Z( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO ) +* +* Compute R = B - AX (R is WORK). +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL ZGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A, + $ LDA, X, LDX, ONE, WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=0 and return. +* + DO I = 1, NRHS + XNRM = CABS1( X( IZAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = CABS1( WORK( IZAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 10 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion. We are good to exit. +* + ITER = 0 + RETURN +* + 10 CONTINUE +* + DO 30 IITER = 1, ITERMAX +* +* Convert R (in WORK) from double precision to single precision +* and store the result in SX. +* + CALL ZLAG2C( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Solve the system SA*SX = SR. +* + CALL CGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV, + $ SWORK( PTSX ), N, INFO ) +* +* Convert SX back to double precision and update the current +* iterate. +* + CALL CLAG2Z( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO ) +* + DO I = 1, NRHS + CALL ZAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 ) + END DO +* +* Compute R = B - AX (R is WORK). +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL ZGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, + $ A, LDA, X, LDX, ONE, WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=IITER>0 and return. +* + DO I = 1, NRHS + XNRM = CABS1( X( IZAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = CABS1( WORK( IZAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 20 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion, we are good to exit. +* + ITER = IITER +* + RETURN +* + 20 CONTINUE +* + 30 CONTINUE +* +* If we are at this place of the code, this is because we have +* performed ITER=ITERMAX iterations and never satisified the stopping +* criterion, set up the ITER flag accordingly and follow up on double +* precision routine. +* + ITER = -ITERMAX - 1 +* + 40 CONTINUE +* +* Single-precision iterative refinement failed to converge to a +* satisfactory solution, so we resort to double precision. +* + CALL ZGETRF( N, N, A, LDA, IPIV, INFO ) +* + IF( INFO.NE.0 ) + $ RETURN +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, X, LDX ) + CALL ZGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX, + $ INFO ) +* + RETURN +* +* End of ZCGESV. +* + END diff --git a/dspl/liblapack/SRC/zcposv.f b/dspl/liblapack/SRC/zcposv.f new file mode 100644 index 0000000..eafcce6 --- /dev/null +++ b/dspl/liblapack/SRC/zcposv.f @@ -0,0 +1,457 @@ +*> \brief ZCPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZCPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZCPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, +* SWORK, RWORK, ITER, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX SWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( N, * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCPOSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> ZCPOSV first attempts to factorize the matrix in COMPLEX and use this +*> factorization within an iterative refinement procedure to produce a +*> solution with COMPLEX*16 normwise backward error quality (see below). +*> If the approach fails the method switches to a COMPLEX*16 +*> factorization and solve. +*> +*> The iterative refinement is not going to be a winning strategy if +*> the ratio COMPLEX performance over COMPLEX*16 performance is too +*> small. A reasonable strategy should take the number of right-hand +*> sides and the size of the matrix into account. This might be done +*> with a call to ILAENV in the future. Up to now, we always try +*> iterative refinement. +*> +*> The iterative refinement process is stopped if +*> ITER > ITERMAX +*> or for all the RHS we have: +*> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX +*> where +*> o ITER is the number of the current iteration in the iterative +*> refinement process +*> o RNRM is the infinity-norm of the residual +*> o XNRM is the infinity-norm of the solution +*> o ANRM is the infinity-operator-norm of the matrix A +*> o EPS is the machine epsilon returned by DLAMCH('Epsilon') +*> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00 +*> respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, +*> dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> Note that the imaginary parts of the diagonal +*> elements need not be set and are assumed to be zero. +*> +*> On exit, if iterative refinement has been successfully used +*> (INFO.EQ.0 and ITER.GE.0, see description below), then A is +*> unchanged, if double precision factorization has been used +*> (INFO.EQ.0 and ITER.LT.0, see description below), then the +*> array A contains the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N,NRHS) +*> This array is used to hold the residual vectors. +*> \endverbatim +*> +*> \param[out] SWORK +*> \verbatim +*> SWORK is COMPLEX array, dimension (N*(N+NRHS)) +*> This array is used to use the single precision matrix and the +*> right-hand sides or solutions in single precision. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] ITER +*> \verbatim +*> ITER is INTEGER +*> < 0: iterative refinement has failed, COMPLEX*16 +*> factorization has been performed +*> -1 : the routine fell back to full precision for +*> implementation- or machine-specific reasons +*> -2 : narrowing the precision induced an overflow, +*> the routine fell back to full precision +*> -3 : failure of CPOTRF +*> -31: stop the iterative refinement after the 30th +*> iterations +*> > 0: iterative refinement has been successfully used. +*> Returns the number of iterations +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of +*> (COMPLEX*16) A is not positive definite, so the +*> factorization could not be completed, and the solution +*> has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16POsolve +* +* ===================================================================== + SUBROUTINE ZCPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK, + $ SWORK, RWORK, ITER, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX SWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( N, * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + LOGICAL DOITREF + PARAMETER ( DOITREF = .TRUE. ) +* + INTEGER ITERMAX + PARAMETER ( ITERMAX = 30 ) +* + DOUBLE PRECISION BWDMAX + PARAMETER ( BWDMAX = 1.0E+00 ) +* + COMPLEX*16 NEGONE, ONE + PARAMETER ( NEGONE = ( -1.0D+00, 0.0D+00 ), + $ ONE = ( 1.0D+00, 0.0D+00 ) ) +* +* .. Local Scalars .. + INTEGER I, IITER, PTSA, PTSX + DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM + COMPLEX*16 ZDUM +* +* .. External Subroutines .. + EXTERNAL ZAXPY, ZHEMM, ZLACPY, ZLAT2C, ZLAG2C, CLAG2Z, + $ CPOTRF, CPOTRS, XERBLA, ZPOTRF, ZPOTRS +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANHE + LOGICAL LSAME + EXTERNAL IZAMAX, DLAMCH, ZLANHE, LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, SQRT +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + ITER = 0 +* +* Test the input parameters. +* + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZCPOSV', -INFO ) + RETURN + END IF +* +* Quick return if (N.EQ.0). +* + IF( N.EQ.0 ) + $ RETURN +* +* Skip single precision iterative refinement if a priori slower +* than double precision factorization. +* + IF( .NOT.DOITREF ) THEN + ITER = -1 + GO TO 40 + END IF +* +* Compute some constants. +* + ANRM = ZLANHE( 'I', UPLO, N, A, LDA, RWORK ) + EPS = DLAMCH( 'Epsilon' ) + CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX +* +* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. +* + PTSA = 1 + PTSX = PTSA + N*N +* +* Convert B from double precision to single precision and store the +* result in SX. +* + CALL ZLAG2C( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Convert A from double precision to single precision and store the +* result in SA. +* + CALL ZLAT2C( UPLO, N, A, LDA, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Compute the Cholesky factorization of SA. +* + CALL CPOTRF( UPLO, N, SWORK( PTSA ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -3 + GO TO 40 + END IF +* +* Solve the system SA*SX = SB. +* + CALL CPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N, + $ INFO ) +* +* Convert SX back to COMPLEX*16 +* + CALL CLAG2Z( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO ) +* +* Compute R = B - AX (R is WORK). +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL ZHEMM( 'Left', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE, + $ WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=0 and return. +* + DO I = 1, NRHS + XNRM = CABS1( X( IZAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = CABS1( WORK( IZAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 10 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion. We are good to exit. +* + ITER = 0 + RETURN +* + 10 CONTINUE +* + DO 30 IITER = 1, ITERMAX +* +* Convert R (in WORK) from double precision to single precision +* and store the result in SX. +* + CALL ZLAG2C( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO ) +* + IF( INFO.NE.0 ) THEN + ITER = -2 + GO TO 40 + END IF +* +* Solve the system SA*SX = SR. +* + CALL CPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N, + $ INFO ) +* +* Convert SX back to double precision and update the current +* iterate. +* + CALL CLAG2Z( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO ) +* + DO I = 1, NRHS + CALL ZAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 ) + END DO +* +* Compute R = B - AX (R is WORK). +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, WORK, N ) +* + CALL ZHEMM( 'L', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE, + $ WORK, N ) +* +* Check whether the NRHS normwise backward errors satisfy the +* stopping criterion. If yes, set ITER=IITER>0 and return. +* + DO I = 1, NRHS + XNRM = CABS1( X( IZAMAX( N, X( 1, I ), 1 ), I ) ) + RNRM = CABS1( WORK( IZAMAX( N, WORK( 1, I ), 1 ), I ) ) + IF( RNRM.GT.XNRM*CTE ) + $ GO TO 20 + END DO +* +* If we are here, the NRHS normwise backward errors satisfy the +* stopping criterion, we are good to exit. +* + ITER = IITER +* + RETURN +* + 20 CONTINUE +* + 30 CONTINUE +* +* If we are at this place of the code, this is because we have +* performed ITER=ITERMAX iterations and never satisified the +* stopping criterion, set up the ITER flag accordingly and follow +* up on double precision routine. +* + ITER = -ITERMAX - 1 +* + 40 CONTINUE +* +* Single-precision iterative refinement failed to converge to a +* satisfactory solution, so we resort to double precision. +* + CALL ZPOTRF( UPLO, N, A, LDA, INFO ) +* + IF( INFO.NE.0 ) + $ RETURN +* + CALL ZLACPY( 'All', N, NRHS, B, LDB, X, LDX ) + CALL ZPOTRS( UPLO, N, NRHS, A, LDA, X, LDX, INFO ) +* + RETURN +* +* End of ZCPOSV. +* + END diff --git a/dspl/liblapack/SRC/zdrscl.f b/dspl/liblapack/SRC/zdrscl.f new file mode 100644 index 0000000..03b87d4 --- /dev/null +++ b/dspl/liblapack/SRC/zdrscl.f @@ -0,0 +1,174 @@ +*> \brief \b ZDRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZDRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZDRSCL( N, SA, SX, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION SA +* .. +* .. Array Arguments .. +* COMPLEX*16 SX( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZDRSCL multiplies an n-element complex vector x by the real scalar +*> 1/a. This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] SA +*> \verbatim +*> SA is DOUBLE PRECISION +*> The scalar a which is used to divide each component of x. +*> SA must be >= 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] SX +*> \verbatim +*> SX is COMPLEX*16 array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector SX. +*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZDRSCL( N, SA, SX, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SA +* .. +* .. Array Arguments .. + COMPLEX*16 SX( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + DOUBLE PRECISION BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Initialize the denominator to SA and the numerator to 1. +* + CDEN = SA + CNUM = ONE +* + 10 CONTINUE + CDEN1 = CDEN*SMLNUM + CNUM1 = CNUM / BIGNUM + IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN +* +* Pre-multiply X by SMLNUM if CDEN is large compared to CNUM. +* + MUL = SMLNUM + DONE = .FALSE. + CDEN = CDEN1 + ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN +* +* Pre-multiply X by BIGNUM if CDEN is small compared to CNUM. +* + MUL = BIGNUM + DONE = .FALSE. + CNUM = CNUM1 + ELSE +* +* Multiply X by CNUM / CDEN and return. +* + MUL = CNUM / CDEN + DONE = .TRUE. + END IF +* +* Scale the vector X by MUL +* + CALL ZDSCAL( N, MUL, SX, INCX ) +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of ZDRSCL +* + END diff --git a/dspl/liblapack/SRC/zgbbrd.f b/dspl/liblapack/SRC/zgbbrd.f new file mode 100644 index 0000000..e299824 --- /dev/null +++ b/dspl/liblapack/SRC/zgbbrd.f @@ -0,0 +1,573 @@ +*> \brief \b ZGBBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, +* LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), +* $ Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBBRD reduces a complex general m-by-n band matrix A to real upper +*> bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> The routine computes B, and optionally forms Q or P**H, or computes +*> Q**H*C for a given matrix C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether or not the matrices Q and P**H are to be +*> formed. +*> = 'N': do not form Q or P**H; +*> = 'Q': form Q only; +*> = 'P': form P**H only; +*> = 'B': form both. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCC +*> \verbatim +*> NCC is INTEGER +*> The number of columns of the matrix C. NCC >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the m-by-n band matrix A, stored in rows 1 to +*> KL+KU+1. The j-th column of A is stored in the j-th column of +*> the array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> On exit, A is overwritten by values generated during the +*> reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The superdiagonal elements of the bidiagonal matrix B. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,M) +*> If VECT = 'Q' or 'B', the m-by-m unitary matrix Q. +*> If VECT = 'N' or 'P', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] PT +*> \verbatim +*> PT is COMPLEX*16 array, dimension (LDPT,N) +*> If VECT = 'P' or 'B', the n-by-n unitary matrix P'. +*> If VECT = 'N' or 'Q', the array PT is not referenced. +*> \endverbatim +*> +*> \param[in] LDPT +*> \verbatim +*> LDPT is INTEGER +*> The leading dimension of the array PT. +*> LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,NCC) +*> On entry, an m-by-ncc matrix C. +*> On exit, C is overwritten by Q**H*C. +*> C is not referenced if NCC = 0. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. +*> LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q, + $ LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ), + $ Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL WANTB, WANTC, WANTPT, WANTQ + INTEGER I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1, + $ KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT + DOUBLE PRECISION ABST, RC + COMPLEX*16 RA, RB, RS, T +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT, + $ ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTB = LSAME( VECT, 'B' ) + WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB + WANTPT = LSAME( VECT, 'P' ) .OR. WANTB + WANTC = NCC.GT.0 + KLU1 = KL + KU + 1 + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) ) + $ THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NCC.LT.0 ) THEN + INFO = -4 + ELSE IF( KL.LT.0 ) THEN + INFO = -5 + ELSE IF( KU.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KLU1 ) THEN + INFO = -8 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBBRD', -INFO ) + RETURN + END IF +* +* Initialize Q and P**H to the unit matrix, if needed +* + IF( WANTQ ) + $ CALL ZLASET( 'Full', M, M, CZERO, CONE, Q, LDQ ) + IF( WANTPT ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, PT, LDPT ) +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + MINMN = MIN( M, N ) +* + IF( KL+KU.GT.1 ) THEN +* +* Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce +* first to lower bidiagonal form and then transform to upper +* bidiagonal +* + IF( KU.GT.0 ) THEN + ML0 = 1 + MU0 = 2 + ELSE + ML0 = 2 + MU0 = 1 + END IF +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KLU1. +* +* The complex sines of the plane rotations are stored in WORK, +* and the real cosines in RWORK. +* + KLM = MIN( M-1, KL ) + KUN = MIN( N-1, KU ) + KB = KLM + KUN + KB1 = KB + 1 + INCA = KB1*LDAB + NR = 0 + J1 = KLM + 2 + J2 = 1 - KUN +* + DO 90 I = 1, MINMN +* +* Reduce i-th column and i-th row of matrix to bidiagonal form +* + ML = KLM + 1 + MU = KUN + 1 + DO 80 KK = 1, KB + J1 = J1 + KB + J2 = J2 + KB +* +* generate plane rotations to annihilate nonzero elements +* which have been created below the band +* + IF( NR.GT.0 ) + $ CALL ZLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA, + $ WORK( J1 ), KB1, RWORK( J1 ), KB1 ) +* +* apply plane rotations from the left +* + DO 10 L = 1, KB + IF( J2-KLM+L-1.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA, + $ AB( KLU1-L+1, J1-KLM+L-1 ), INCA, + $ RWORK( J1 ), WORK( J1 ), KB1 ) + 10 CONTINUE +* + IF( ML.GT.ML0 ) THEN + IF( ML.LE.M-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+ml-1,i) +* within the band, and apply rotation from the left +* + CALL ZLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ), + $ RWORK( I+ML-1 ), WORK( I+ML-1 ), RA ) + AB( KU+ML-1, I ) = RA + IF( I.LT.N ) + $ CALL ZROT( MIN( KU+ML-2, N-I ), + $ AB( KU+ML-2, I+1 ), LDAB-1, + $ AB( KU+ML-1, I+1 ), LDAB-1, + $ RWORK( I+ML-1 ), WORK( I+ML-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + DO 20 J = J1, J2, KB1 + CALL ZROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 20 CONTINUE + END IF +* + IF( WANTC ) THEN +* +* apply plane rotations to C +* + DO 30 J = J1, J2, KB1 + CALL ZROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC, + $ RWORK( J ), WORK( J ) ) + 30 CONTINUE + END IF +* + IF( J2+KUN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 40 J = J1, J2, KB1 +* +* create nonzero element a(j-1,j+ku) above the band +* and store it in WORK(n+1:2*n) +* + WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN ) + AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN ) + 40 CONTINUE +* +* generate plane rotations to annihilate nonzero elements +* which have been generated above the band +* + IF( NR.GT.0 ) + $ CALL ZLARGV( NR, AB( 1, J1+KUN-1 ), INCA, + $ WORK( J1+KUN ), KB1, RWORK( J1+KUN ), + $ KB1 ) +* +* apply plane rotations from the right +* + DO 50 L = 1, KB + IF( J2+L-1.GT.M ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA, + $ AB( L, J1+KUN ), INCA, + $ RWORK( J1+KUN ), WORK( J1+KUN ), KB1 ) + 50 CONTINUE +* + IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN + IF( MU.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+mu-1) +* within the band, and apply rotation from the right +* + CALL ZLARTG( AB( KU-MU+3, I+MU-2 ), + $ AB( KU-MU+2, I+MU-1 ), + $ RWORK( I+MU-1 ), WORK( I+MU-1 ), RA ) + AB( KU-MU+3, I+MU-2 ) = RA + CALL ZROT( MIN( KL+MU-2, M-I ), + $ AB( KU-MU+4, I+MU-2 ), 1, + $ AB( KU-MU+3, I+MU-1 ), 1, + $ RWORK( I+MU-1 ), WORK( I+MU-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KB1 + END IF +* + IF( WANTPT ) THEN +* +* accumulate product of plane rotations in P**H +* + DO 60 J = J1, J2, KB1 + CALL ZROT( N, PT( J+KUN-1, 1 ), LDPT, + $ PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ), + $ DCONJG( WORK( J+KUN ) ) ) + 60 CONTINUE + END IF +* + IF( J2+KB.GT.M ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KB1 + END IF +* + DO 70 J = J1, J2, KB1 +* +* create nonzero element a(j+kl+ku,j+ku-1) below the +* band and store it in WORK(1:n) +* + WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN ) + AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN ) + 70 CONTINUE +* + IF( ML.GT.ML0 ) THEN + ML = ML - 1 + ELSE + MU = MU - 1 + END IF + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KU.EQ.0 .AND. KL.GT.0 ) THEN +* +* A has been reduced to complex lower bidiagonal form +* +* Transform lower bidiagonal form to upper bidiagonal by applying +* plane rotations from the left, overwriting superdiagonal +* elements on subdiagonal elements +* + DO 100 I = 1, MIN( M-1, N ) + CALL ZLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA ) + AB( 1, I ) = RA + IF( I.LT.N ) THEN + AB( 2, I ) = RS*AB( 1, I+1 ) + AB( 1, I+1 ) = RC*AB( 1, I+1 ) + END IF + IF( WANTQ ) + $ CALL ZROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, + $ DCONJG( RS ) ) + IF( WANTC ) + $ CALL ZROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC, + $ RS ) + 100 CONTINUE + ELSE +* +* A has been reduced to complex upper bidiagonal form or is +* diagonal +* + IF( KU.GT.0 .AND. M.LT.N ) THEN +* +* Annihilate a(m,m+1) by applying plane rotations from the +* right +* + RB = AB( KU, M+1 ) + DO 110 I = M, 1, -1 + CALL ZLARTG( AB( KU+1, I ), RB, RC, RS, RA ) + AB( KU+1, I ) = RA + IF( I.GT.1 ) THEN + RB = -DCONJG( RS )*AB( KU, I ) + AB( KU, I ) = RC*AB( KU, I ) + END IF + IF( WANTPT ) + $ CALL ZROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT, + $ RC, DCONJG( RS ) ) + 110 CONTINUE + END IF + END IF +* +* Make diagonal and superdiagonal elements real, storing them in D +* and E +* + T = AB( KU+1, 1 ) + DO 120 I = 1, MINMN + ABST = ABS( T ) + D( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTQ ) + $ CALL ZSCAL( M, T, Q( 1, I ), 1 ) + IF( WANTC ) + $ CALL ZSCAL( NCC, DCONJG( T ), C( I, 1 ), LDC ) + IF( I.LT.MINMN ) THEN + IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN + E( I ) = ZERO + T = AB( 1, I+1 ) + ELSE + IF( KU.EQ.0 ) THEN + T = AB( 2, I )*DCONJG( T ) + ELSE + T = AB( KU, I+1 )*DCONJG( T ) + END IF + ABST = ABS( T ) + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( WANTPT ) + $ CALL ZSCAL( N, T, PT( I+1, 1 ), LDPT ) + T = AB( KU+1, I+1 )*DCONJG( T ) + END IF + END IF + 120 CONTINUE + RETURN +* +* End of ZGBBRD +* + END diff --git a/dspl/liblapack/SRC/zgbcon.f b/dspl/liblapack/SRC/zgbcon.f new file mode 100644 index 0000000..6d7c8d8 --- /dev/null +++ b/dspl/liblapack/SRC/zgbcon.f @@ -0,0 +1,320 @@ +*> \brief \b ZGBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, KL, KU, LDAB, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBCON estimates the reciprocal of the condition number of a complex +*> general band matrix A, in either the 1-norm or the infinity-norm, +*> using the LU factorization computed by ZGBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, KL, KU, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, ONENRM + CHARACTER NORMIN + INTEGER IX, J, JP, KASE, KASE1, KD, LM + DOUBLE PRECISION AINVNM, SCALE, SMLNUM + COMPLEX*16 T, ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZDOTC + EXTERNAL LSAME, IZAMAX, DLAMCH, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDRSCL, ZLACN2, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KD = KL + KU + 1 + LNOTI = KL.GT.0 + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + IF( LNOTI ) THEN + DO 20 J = 1, N - 1 + LM = MIN( KL, N-J ) + JP = IPIV( J ) + T = WORK( JP ) + IF( JP.NE.J ) THEN + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + CALL ZAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 ) + 20 CONTINUE + END IF +* +* Multiply by inv(U). +* + CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(U**H). +* + CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK, + $ INFO ) +* +* Multiply by inv(L**H). +* + IF( LNOTI ) THEN + DO 30 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + WORK( J ) = WORK( J ) - ZDOTC( LM, AB( KD+1, J ), 1, + $ WORK( J+1 ), 1 ) + JP = IPIV( J ) + IF( JP.NE.J ) THEN + T = WORK( JP ) + WORK( JP ) = WORK( J ) + WORK( J ) = T + END IF + 30 CONTINUE + END IF + END IF +* +* Divide X by 1/SCALE if doing so will not cause overflow. +* + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 40 CONTINUE + RETURN +* +* End of ZGBCON +* + END diff --git a/dspl/liblapack/SRC/zgbequ.f b/dspl/liblapack/SRC/zgbequ.f new file mode 100644 index 0000000..e893885 --- /dev/null +++ b/dspl/liblapack/SRC/zgbequ.f @@ -0,0 +1,333 @@ +*> \brief \b ZGBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBEQU computes row and column scalings intended to equilibrate an +*> M-by-N band matrix A and reduce its condition number. R returns the +*> row scale factors and C the column scale factors, chosen to try to +*> make the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0, or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + KD = KU + 1 + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGBEQU +* + END diff --git a/dspl/liblapack/SRC/zgbequb.f b/dspl/liblapack/SRC/zgbequb.f new file mode 100644 index 0000000..4b08ac1 --- /dev/null +++ b/dspl/liblapack/SRC/zgbequb.f @@ -0,0 +1,350 @@ +*> \brief \b ZGBEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from ZGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBEQUB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, + $ LOGRDX + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = DLAMCH( 'B' ) + LOGRDX = LOG(RADIX) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + KD = KU + 1 + DO 30 J = 1, N + DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG( R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M ) + C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGBEQUB +* + END diff --git a/dspl/liblapack/SRC/zgbrfs.f b/dspl/liblapack/SRC/zgbrfs.f new file mode 100644 index 0000000..bb7d58e --- /dev/null +++ b/dspl/liblapack/SRC/zgbrfs.f @@ -0,0 +1,475 @@ +*> \brief \b ZGBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is banded, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from ZGBTRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZGBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGBMV, ZGBTRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -9 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( KL+KU+2, N+1 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1, + $ CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + KK = KU + 1 - K + XK = CABS1( X( K, J ) ) + DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL ) + RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + KK = KU + 1 - K + DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL ) + S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N, + $ INFO ) + CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV, + $ WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZGBRFS +* + END diff --git a/dspl/liblapack/SRC/zgbrfsx.f b/dspl/liblapack/SRC/zgbrfsx.f new file mode 100644 index 0000000..e40d7d2 --- /dev/null +++ b/dspl/liblapack/SRC/zgbrfsx.f @@ -0,0 +1,763 @@ +*> \brief \b ZGBRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, +* $ NPARAMS, N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBRFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The original band matrix A, stored in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by DGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, R, C, B, LDB, X, LDX, RCOND, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, KL, KU, NRHS, + $ NPARAMS, N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN, IGNORE_CWISE + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE, N_NORMS, + $ ITHRESH + DOUBLE PRECISION ANORM, RCOND_TMP, ILLRCOND_THRESH, ERR_LBND, + $ CWISE_WRONG, RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGBCON, ZLA_GBRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, ZLANGB, ZLA_GBRCOND_X, ZLA_GBRCOND_C + DOUBLE PRECISION DLAMCH, ZLANGB, ZLA_GBRCOND_X, ZLA_GBRCOND_C + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = ZLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + CALL ZGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 .AND. INFO .EQ. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + IF ( NOTRAN ) THEN + CALL ZLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + ELSE + CALL ZLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = ZLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, C, .TRUE., INFO, WORK, RWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = ZLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, R, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = ZLA_GBRCOND_C( TRANS, N, KL, KU, AB, LDAB, AFB, + $ LDAFB, IPIV, C, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = ZLA_GBRCOND_X( TRANS, N, KL, KU, AB, LDAB, + $ AFB, LDAFB, IPIV, X( 1, J ), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of ZGBRFSX +* + END diff --git a/dspl/liblapack/SRC/zgbsv.f b/dspl/liblapack/SRC/zgbsv.f new file mode 100644 index 0000000..7bc4c44 --- /dev/null +++ b/dspl/liblapack/SRC/zgbsv.f @@ -0,0 +1,223 @@ +*> \brief ZGBSV computes the solution to system of linear equations A * X = B for GB matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBSV computes the solution to a complex system of linear equations +*> A * X = B, where A is a band matrix of order N with KL subdiagonals +*> and KU superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as A = L * U, where L is a product of permutation +*> and unit lower triangular matrices with KL subdiagonals, and U is +*> upper triangular with KL+KU superdiagonals. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL) +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZGBTRF, ZGBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( KL.LT.0 ) THEN + INFO = -2 + ELSE IF( KU.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBSV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of the band matrix A. +* + CALL ZGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV, + $ B, LDB, INFO ) + END IF + RETURN +* +* End of ZGBSV +* + END diff --git a/dspl/liblapack/SRC/zgbsvx.f b/dspl/liblapack/SRC/zgbsvx.f new file mode 100644 index 0000000..0e55866 --- /dev/null +++ b/dspl/liblapack/SRC/zgbsvx.f @@ -0,0 +1,647 @@ +*> \brief ZGBSVX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), +* $ RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBSVX uses the LU factorization to compute the solution to a complex +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a band matrix of order N with KL subdiagonals and KU +*> superdiagonals, and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed by this subroutine: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = L * U, +*> where L is a product of permutation and unit lower triangular +*> matrices with KL subdiagonals, and U is upper triangular with +*> KL+KU superdiagonals. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB and IPIV contain the factored form of +*> A. If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> AB, AFB, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then A must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by ZGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns details of the LU factorization of A. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns details of the LU factorization of the equilibrated +*> matrix A (see the description of AB for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = L*U +*> as computed by ZGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> On exit, RWORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If RWORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 RWORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GBsolve +* +* ===================================================================== + SUBROUTINE ZGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* Moved setting of INFO = N+1 so INFO does not subsequently get +* overwritten. Sven, 17 Mar 05. +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGB, ZLANTB + EXTERNAL LSAME, DLAMCH, ZLANGB, ZLANTB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBCON, ZGBEQU, ZGBRFS, ZGBTRF, + $ ZGBTRS, ZLACPY, ZLAQGB +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -16 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -18 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of the band matrix A. +* + DO 70 J = 1, N + J1 = MAX( J-KU, 1 ) + J2 = MIN( J+KL, N ) + CALL ZCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1, + $ AFB( KL+KU+1-J+J1, J ), 1 ) + 70 CONTINUE +* + CALL ZGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + ANORM = ZERO + DO 90 J = 1, INFO + DO 80 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + ANORM = MAX( ANORM, ABS( AB( I, J ) ) ) + 80 CONTINUE + 90 CONTINUE + RPVGRW = ZLANTB( 'M', 'U', 'N', INFO, MIN( INFO-1, KL+KU ), + $ AFB( MAX( 1, KL+KU+2-INFO ), 1 ), LDAFB, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ANORM / RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGB( NORM, N, KL, KU, AB, LDAB, RWORK ) + RPVGRW = ZLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND, + $ WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 120 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 140 J = 1, NRHS + DO 130 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 130 CONTINUE + 140 CONTINUE + DO 150 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 150 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of ZGBSVX +* + END diff --git a/dspl/liblapack/SRC/zgbsvxx.f b/dspl/liblapack/SRC/zgbsvxx.f new file mode 100644 index 0000000..9ba9c2e --- /dev/null +++ b/dspl/liblapack/SRC/zgbsvxx.f @@ -0,0 +1,800 @@ +*> \brief ZGBSVXX computes the solution to system of linear equations A * X = B for GB matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, +* LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, +* RCOND, RPVGRW, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBSVXX uses the LU factorization to compute the solution to a +*> complex*16 system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. ZGBSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> ZGBSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> ZGBSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what ZGBSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> +*> If FACT = 'F' and EQUED is not 'N', then AB must have been +*> equilibrated by the scaling factors in R and/or C. AB is not +*> modified if FACT = 'F' or 'N', or if FACT = 'E' and +*> EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains details of the LU factorization of the band matrix +*> A, as computed by ZGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. If EQUED .ne. 'N', then AFB is +*> the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by DGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In DGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GBsolve +* +* ===================================================================== + SUBROUTINE ZGBSVXX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, + $ LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX, + $ RCOND, RPVGRW, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDAB, LDAFB, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, I, J, KL, KU + DOUBLE PRECISION AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, ZLA_GBRPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLA_GBRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL ZGBEQUB, ZGBTRF, ZGBTRS, ZLACPY, ZLAQGB, + $ XERBLA, ZLASCL2, ZGBRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in ZGBRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until DGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KL.LT.0 ) THEN + INFO = -4 + ELSE IF( KU.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -8 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -10 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -12 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -13 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -14 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGBEQUB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0D+0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0D+0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL ZLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL ZLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + DO 40, J = 1, N + DO 30, I = KL+1, 2*KL+KU+1 + AFB( I, J ) = AB( I-KL, J ) + 30 CONTINUE + 40 CONTINUE + CALL ZGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = ZLA_GBRPVGRW( N, KL, KU, INFO, AB, LDAB, AFB, + $ LDAFB ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = ZLA_GBRPVGRW( N, KL, KU, N, AB, LDAB, AFB, LDAFB ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGBRFSX( TRANS, EQUED, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) + +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL ZLASCL2( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL ZLASCL2( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of ZGBSVXX +* + END diff --git a/dspl/liblapack/SRC/zgbtf2.f b/dspl/liblapack/SRC/zgbtf2.f new file mode 100644 index 0000000..cfddec8 --- /dev/null +++ b/dspl/liblapack/SRC/zgbtf2.f @@ -0,0 +1,277 @@ +*> \brief \b ZGBTF2 computes the LU factorization of a general band matrix using the unblocked version of the algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBTF2 computes an LU factorization of a complex m-by-n band matrix +*> A using partial pivoting with row interchanges. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U, because of fill-in resulting from the row +*> interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, JP, JU, KM, KV +* .. +* .. External Functions .. + INTEGER IZAMAX + EXTERNAL IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in. +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero. +* + DO 20 J = KU + 2, MIN( KV, N ) + DO 10 I = KV - J + 2, KL + AB( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* JU is the index of the last column affected by the current stage +* of the factorization. +* + JU = 1 +* + DO 40 J = 1, MIN( M, N ) +* +* Set fill-in elements in column J+KV to zero. +* + IF( J+KV.LE.N ) THEN + DO 30 I = 1, KL + AB( I, J+KV ) = ZERO + 30 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-J ) + JP = IZAMAX( KM+1, AB( KV+1, J ), 1 ) + IPIV( J ) = JP + J - 1 + IF( AB( KV+JP, J ).NE.ZERO ) THEN + JU = MAX( JU, MIN( J+KU+JP-1, N ) ) +* +* Apply interchange to columns J to JU. +* + IF( JP.NE.1 ) + $ CALL ZSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1, + $ AB( KV+1, J ), LDAB-1 ) + IF( KM.GT.0 ) THEN +* +* Compute multipliers. +* + CALL ZSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 ) +* +* Update trailing submatrix within the band. +* + IF( JU.GT.J ) + $ CALL ZGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1, + $ AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ), + $ LDAB-1 ) + END IF + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = J + END IF + 40 CONTINUE + RETURN +* +* End of ZGBTF2 +* + END diff --git a/dspl/liblapack/SRC/zgbtrf.f b/dspl/liblapack/SRC/zgbtrf.f new file mode 100644 index 0000000..533f5ee --- /dev/null +++ b/dspl/liblapack/SRC/zgbtrf.f @@ -0,0 +1,517 @@ +*> \brief \b ZGBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBTRF computes an LU factorization of a complex m-by-n band matrix A +*> using partial pivoting with row interchanges. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows KL+1 to +*> 2*KL+KU+1; rows 1 to KL of the array need not be set. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, details of the factorization: U is stored as an +*> upper triangular band matrix with KL+KU superdiagonals in +*> rows 1 to KL+KU+1, and the multipliers used during the +*> factorization are stored in rows KL+KU+2 to 2*KL+KU+1. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> M = N = 6, KL = 2, KU = 1: +*> +*> On entry: On exit: +*> +*> * * * + + + * * * u14 u25 u36 +*> * * + + + + * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * +*> a31 a42 a53 a64 * * m31 m42 m53 m64 * * +*> +*> Array elements marked * are not used by the routine; elements marked +*> + need not be set on entry, but are required by the routine to store +*> elements of U because of fill-in resulting from the row interchanges. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, KL, KU, LDAB, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP, + $ JU, K2, KM, KV, NB, NW + COMPLEX*16 TEMP +* .. +* .. Local Arrays .. + COMPLEX*16 WORK13( LDWORK, NBMAX ), + $ WORK31( LDWORK, NBMAX ) +* .. +* .. External Functions .. + INTEGER ILAENV, IZAMAX + EXTERNAL ILAENV, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP, + $ ZSCAL, ZSWAP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* KV is the number of superdiagonals in the factor U, allowing for +* fill-in +* + KV = KU + KL +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KV+1 ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU ) +* +* The block size must not exceed the limit set by the size of the +* local arrays WORK13 and WORK31. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KL ) THEN +* +* Use unblocked code +* + CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO ) + ELSE +* +* Use blocked code +* +* Zero the superdiagonal elements of the work array WORK13 +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK13( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Zero the subdiagonal elements of the work array WORK31 +* + DO 40 J = 1, NB + DO 30 I = J + 1, NB + WORK31( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Gaussian elimination with partial pivoting +* +* Set fill-in elements in columns KU+2 to KV to zero +* + DO 60 J = KU + 2, MIN( KV, N ) + DO 50 I = KV - J + 2, KL + AB( I, J ) = ZERO + 50 CONTINUE + 60 CONTINUE +* +* JU is the index of the last column affected by the current +* stage of the factorization +* + JU = 1 +* + DO 180 J = 1, MIN( M, N ), NB + JB = MIN( NB, MIN( M, N )-J+1 ) +* +* The active part of the matrix is partitioned +* +* A11 A12 A13 +* A21 A22 A23 +* A31 A32 A33 +* +* Here A11, A21 and A31 denote the current block of JB columns +* which is about to be factorized. The number of rows in the +* partitioning are JB, I2, I3 respectively, and the numbers +* of columns are JB, J2, J3. The superdiagonal elements of A13 +* and the subdiagonal elements of A31 lie outside the band. +* + I2 = MIN( KL-JB, M-J-JB+1 ) + I3 = MIN( JB, M-J-KL+1 ) +* +* J2 and J3 are computed after JU has been updated. +* +* Factorize the current block of JB columns +* + DO 80 JJ = J, J + JB - 1 +* +* Set fill-in elements in column JJ+KV to zero +* + IF( JJ+KV.LE.N ) THEN + DO 70 I = 1, KL + AB( I, JJ+KV ) = ZERO + 70 CONTINUE + END IF +* +* Find pivot and test for singularity. KM is the number of +* subdiagonal elements in the current column. +* + KM = MIN( KL, M-JJ ) + JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 ) + IPIV( JJ ) = JP + JJ - J + IF( AB( KV+JP, JJ ).NE.ZERO ) THEN + JU = MAX( JU, MIN( JJ+KU+JP-1, N ) ) + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to J+JB-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* + CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange affects columns J to JJ-1 of A31 +* which are stored in the work array WORK31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1, + $ AB( KV+JP, JJ ), LDAB-1 ) + END IF + END IF +* +* Compute multipliers +* + CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ), + $ 1 ) +* +* Update trailing submatrix within the band and within +* the current block. JM is the index of the last column +* which needs to be updated. +* + JM = MIN( JU, J+JB-1 ) + IF( JM.GT.JJ ) + $ CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1, + $ AB( KV, JJ+1 ), LDAB-1, + $ AB( KV+1, JJ+1 ), LDAB-1 ) + ELSE +* +* If pivot is zero, set INFO to the index of the pivot +* unless a zero pivot has already been found. +* + IF( INFO.EQ.0 ) + $ INFO = JJ + END IF +* +* Copy current column of A31 into the work array WORK31 +* + NW = MIN( JJ-J+1, I3 ) + IF( NW.GT.0 ) + $ CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1, + $ WORK31( 1, JJ-J+1 ), 1 ) + 80 CONTINUE + IF( J+JB.LE.N ) THEN +* +* Apply the row interchanges to the other blocks. +* + J2 = MIN( JU-J+1, KV ) - JB + J3 = MAX( 0, JU-J-KV+1 ) +* +* Use ZLASWP to apply the row interchanges to A12, A22, and +* A32. +* + CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB, + $ IPIV( J ), 1 ) +* +* Adjust the pivot indices. +* + DO 90 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 90 CONTINUE +* +* Apply the row interchanges to A13, A23, and A33 +* columnwise. +* + K2 = J - 1 + JB + J2 + DO 110 I = 1, J3 + JJ = K2 + I + DO 100 II = J + I - 1, J + JB - 1 + IP = IPIV( II ) + IF( IP.NE.II ) THEN + TEMP = AB( KV+1+II-JJ, JJ ) + AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ ) + AB( KV+1+IP-JJ, JJ ) = TEMP + END IF + 100 CONTINUE + 110 CONTINUE +* +* Update the relevant part of the trailing submatrix +* + IF( J2.GT.0 ) THEN +* +* Update A12 +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J2, ONE, AB( KV+1, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A22 +* + CALL ZGEMM( 'No transpose', 'No transpose', I2, J2, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+1, J+JB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A32 +* + CALL ZGEMM( 'No transpose', 'No transpose', I3, J2, + $ JB, -ONE, WORK31, LDWORK, + $ AB( KV+1-JB, J+JB ), LDAB-1, ONE, + $ AB( KV+KL+1-JB, J+JB ), LDAB-1 ) + END IF + END IF +* + IF( J3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array +* WORK13 +* + DO 130 JJ = 1, J3 + DO 120 II = JJ, JB + WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 ) + 120 CONTINUE + 130 CONTINUE +* +* Update A13 in the work array +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', + $ JB, J3, ONE, AB( KV+1, J ), LDAB-1, + $ WORK13, LDWORK ) +* + IF( I2.GT.0 ) THEN +* +* Update A23 +* + CALL ZGEMM( 'No transpose', 'No transpose', I2, J3, + $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1, + $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ), + $ LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Update A33 +* + CALL ZGEMM( 'No transpose', 'No transpose', I3, J3, + $ JB, -ONE, WORK31, LDWORK, WORK13, + $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 ) + END IF +* +* Copy the lower triangle of A13 back into place +* + DO 150 JJ = 1, J3 + DO 140 II = JJ, JB + AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ ) + 140 CONTINUE + 150 CONTINUE + END IF + ELSE +* +* Adjust the pivot indices. +* + DO 160 I = J, J + JB - 1 + IPIV( I ) = IPIV( I ) + J - 1 + 160 CONTINUE + END IF +* +* Partially undo the interchanges in the current block to +* restore the upper triangular form of A31 and copy the upper +* triangle of A31 back into place +* + DO 170 JJ = J + JB - 1, J, -1 + JP = IPIV( JJ ) - JJ + 1 + IF( JP.NE.1 ) THEN +* +* Apply interchange to columns J to JJ-1 +* + IF( JP+JJ-1.LT.J+KL ) THEN +* +* The interchange does not affect A31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ AB( KV+JP+JJ-J, J ), LDAB-1 ) + ELSE +* +* The interchange does affect A31 +* + CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1, + $ WORK31( JP+JJ-J-KL, 1 ), LDWORK ) + END IF + END IF +* +* Copy the current column of A31 back into place +* + NW = MIN( I3, JJ-J+1 ) + IF( NW.GT.0 ) + $ CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1, + $ AB( KV+KL+1-JJ+J, JJ ), 1 ) + 170 CONTINUE + 180 CONTINUE + END IF +* + RETURN +* +* End of ZGBTRF +* + END diff --git a/dspl/liblapack/SRC/zgbtrs.f b/dspl/liblapack/SRC/zgbtrs.f new file mode 100644 index 0000000..8dc9746 --- /dev/null +++ b/dspl/liblapack/SRC/zgbtrs.f @@ -0,0 +1,297 @@ +*> \brief \b ZGBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGBTRS solves a system of linear equations +*> A * X = B, A**T * X = B, or A**H * X = B +*> with a general band matrix A using the LU factorization computed +*> by ZGBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular band +*> matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and +*> the multipliers used during the factorization are stored in +*> rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= N, row i of the matrix was +*> interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, KL, KU, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LNOTI, NOTRAN + INTEGER I, J, KD, L, LM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZLACGV, ZSWAP, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + KD = KU + KL + 1 + LNOTI = KL.GT.0 +* + IF( NOTRAN ) THEN +* +* Solve A*X = B. +* +* Solve L*X = B, overwriting B with X. +* +* L is represented as a product of permutations and unit lower +* triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1), +* where each transformation L(i) is a rank-one modification of +* the identity matrix. +* + IF( LNOTI ) THEN + DO 10 J = 1, N - 1 + LM = MIN( KL, N-J ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + CALL ZGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ), + $ LDB, B( J+1, 1 ), LDB ) + 10 CONTINUE + END IF +* + DO 20 I = 1, NRHS +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU, + $ AB, LDAB, B( 1, I ), 1 ) + 20 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * X = B. +* + DO 30 I = 1, NRHS +* +* Solve U**T * X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB, + $ LDAB, B( 1, I ), 1 ) + 30 CONTINUE +* +* Solve L**T * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 40 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL ZGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ), + $ LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 40 CONTINUE + END IF +* + ELSE +* +* Solve A**H * X = B. +* + DO 50 I = 1, NRHS +* +* Solve U**H * X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KL+KU, AB, LDAB, B( 1, I ), 1 ) + 50 CONTINUE +* +* Solve L**H * X = B, overwriting B with X. +* + IF( LNOTI ) THEN + DO 60 J = N - 1, 1, -1 + LM = MIN( KL, N-J ) + CALL ZLACGV( NRHS, B( J, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', LM, NRHS, -ONE, + $ B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE, + $ B( J, 1 ), LDB ) + CALL ZLACGV( NRHS, B( J, 1 ), LDB ) + L = IPIV( J ) + IF( L.NE.J ) + $ CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB ) + 60 CONTINUE + END IF + END IF + RETURN +* +* End of ZGBTRS +* + END diff --git a/dspl/liblapack/SRC/zgebak.f b/dspl/liblapack/SRC/zgebak.f new file mode 100644 index 0000000..a9761fd --- /dev/null +++ b/dspl/liblapack/SRC/zgebak.f @@ -0,0 +1,270 @@ +*> \brief \b ZGEBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION SCALE( * ) +* COMPLEX*16 V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEBAK forms the right or left eigenvectors of a complex general +*> matrix by backward transformation on the computed eigenvectors of the +*> balanced matrix output by ZGEBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N', do nothing, return immediately; +*> = 'P', do backward transformation for permutation only; +*> = 'S', do backward transformation for scaling only; +*> = 'B', do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to ZGEBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by ZGEBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutation and scaling factors, as returned +*> by ZGEBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by ZHSEIN or ZTREVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ) + COMPLEX*16 V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, II, K + DOUBLE PRECISION S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -7 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + S = SCALE( I ) + CALL ZDSCAL( M, S, V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + S = ONE / SCALE( I ) + CALL ZDSCAL( M, S, V( I, 1 ), LDV ) + 20 CONTINUE + END IF +* + END IF +* +* Backward permutation +* +* For I = ILO-1 step -1 until 1, +* IHI+1 step 1 until N do -- +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN + IF( RIGHTV ) THEN + DO 40 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 40 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 40 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE + END IF +* + IF( LEFTV ) THEN + DO 50 II = 1, N + I = II + IF( I.GE.ILO .AND. I.LE.IHI ) + $ GO TO 50 + IF( I.LT.ILO ) + $ I = ILO - II + K = SCALE( I ) + IF( K.EQ.I ) + $ GO TO 50 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 50 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEBAK +* + END diff --git a/dspl/liblapack/SRC/zgebal.f b/dspl/liblapack/SRC/zgebal.f new file mode 100644 index 0000000..68291e2 --- /dev/null +++ b/dspl/liblapack/SRC/zgebal.f @@ -0,0 +1,400 @@ +*> \brief \b ZGEBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION SCALE( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEBAL balances a general complex matrix A. This involves, first, +*> permuting A by a similarity transformation to isolate eigenvalues +*> in the first 1 to ILO-1 and last IHI+1 to N elements on the +*> diagonal; and second, applying a diagonal similarity transformation +*> to rows and columns ILO to IHI to make the rows and columns as +*> close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrix, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A: +*> = 'N': none: simply set ILO = 1, IHI = N, SCALE(I) = 1.0 +*> for i = 1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to INTEGER such that on exit +*> A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied to +*> A. If P(j) is the index of the row and column interchanged +*> with row and column j and D(j) is the scaling factor +*> applied to row and column j, then +*> SCALE(j) = P(j) for j = 1,...,ILO-1 +*> = D(j) for j = ILO,...,IHI +*> = P(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The permutations consist of row and column interchanges which put +*> the matrix in the form +*> +*> ( T1 X Y ) +*> P A P = ( 0 B Z ) +*> ( 0 0 T2 ) +*> +*> where T1 and T2 are upper triangular matrices whose eigenvalues lie +*> along the diagonal. The column indices ILO and IHI mark the starting +*> and ending columns of the submatrix B. Balancing consists of applying +*> a diagonal similarity transformation inv(D) * B * D to make the +*> 1-norms of each row of B and its corresponding column nearly equal. +*> The output matrix is +*> +*> ( T1 X*D Y ) +*> ( 0 inv(D)*B*D inv(D)*Z ). +*> ( 0 0 T2 ) +*> +*> Information about the permutations P and the diagonal matrix D is +*> returned in the vector SCALE. +*> +*> This subroutine is based on the EISPACK routine CBAL. +*> +*> Modified by Tzu-Yi Chen, Computer Science Division, University of +*> California at Berkeley, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION SCALE( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION SCLFAC + PARAMETER ( SCLFAC = 2.0D+0 ) + DOUBLE PRECISION FACTOR + PARAMETER ( FACTOR = 0.95D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOCONV + INTEGER I, ICA, IEXC, IRA, J, K, L, M + DOUBLE PRECISION C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1, + $ SFMIN2 +* .. +* .. External Functions .. + LOGICAL DISNAN, LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL DISNAN, LSAME, IZAMAX, DLAMCH, DZNRM2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEBAL', -INFO ) + RETURN + END IF +* + K = 1 + L = N +* + IF( N.EQ.0 ) + $ GO TO 210 +* + IF( LSAME( JOB, 'N' ) ) THEN + DO 10 I = 1, N + SCALE( I ) = ONE + 10 CONTINUE + GO TO 210 + END IF +* + IF( LSAME( JOB, 'S' ) ) + $ GO TO 120 +* +* Permutation to isolate eigenvalues if possible +* + GO TO 50 +* +* Row and column exchange. +* + 20 CONTINUE + SCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 30 +* + CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL ZSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA ) +* + 30 CONTINUE + GO TO ( 40, 80 )IEXC +* +* Search for rows isolating an eigenvalue and push them down. +* + 40 CONTINUE + IF( L.EQ.1 ) + $ GO TO 210 + L = L - 1 +* + 50 CONTINUE + DO 70 J = L, 1, -1 +* + DO 60 I = 1, L + IF( I.EQ.J ) + $ GO TO 60 + IF( DBLE( A( J, I ) ).NE.ZERO .OR. DIMAG( A( J, I ) ).NE. + $ ZERO )GO TO 70 + 60 CONTINUE +* + M = L + IEXC = 1 + GO TO 20 + 70 CONTINUE +* + GO TO 90 +* +* Search for columns isolating an eigenvalue and push them left. +* + 80 CONTINUE + K = K + 1 +* + 90 CONTINUE + DO 110 J = K, L +* + DO 100 I = K, L + IF( I.EQ.J ) + $ GO TO 100 + IF( DBLE( A( I, J ) ).NE.ZERO .OR. DIMAG( A( I, J ) ).NE. + $ ZERO )GO TO 110 + 100 CONTINUE +* + M = K + IEXC = 2 + GO TO 20 + 110 CONTINUE +* + 120 CONTINUE + DO 130 I = K, L + SCALE( I ) = ONE + 130 CONTINUE +* + IF( LSAME( JOB, 'P' ) ) + $ GO TO 210 +* +* Balance the submatrix in rows K to L. +* +* Iterative loop for norm reduction +* + SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' ) + SFMAX1 = ONE / SFMIN1 + SFMIN2 = SFMIN1*SCLFAC + SFMAX2 = ONE / SFMIN2 + 140 CONTINUE + NOCONV = .FALSE. +* + DO 200 I = K, L +* + C = DZNRM2( L-K+1, A( K, I ), 1 ) + R = DZNRM2( L-K+1, A( I, K ), LDA ) + ICA = IZAMAX( L, A( 1, I ), 1 ) + CA = ABS( A( ICA, I ) ) + IRA = IZAMAX( N-K+1, A( I, K ), LDA ) + RA = ABS( A( I, IRA+K-1 ) ) +* +* Guard against zero C or R due to underflow. +* + IF( C.EQ.ZERO .OR. R.EQ.ZERO ) + $ GO TO 200 + G = R / SCLFAC + F = ONE + S = C + R + 160 CONTINUE + IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR. + $ MIN( R, G, RA ).LE.SFMIN2 )GO TO 170 + IF( DISNAN( C+F+CA+R+G+RA ) ) THEN +* +* Exit if NaN to avoid infinite loop +* + INFO = -3 + CALL XERBLA( 'ZGEBAL', -INFO ) + RETURN + END IF + F = F*SCLFAC + C = C*SCLFAC + CA = CA*SCLFAC + R = R / SCLFAC + G = G / SCLFAC + RA = RA / SCLFAC + GO TO 160 +* + 170 CONTINUE + G = C / SCLFAC + 180 CONTINUE + IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR. + $ MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190 + F = F / SCLFAC + C = C / SCLFAC + G = G / SCLFAC + CA = CA / SCLFAC + R = R*SCLFAC + RA = RA*SCLFAC + GO TO 180 +* +* Now balance. +* + 190 CONTINUE + IF( ( C+R ).GE.FACTOR*S ) + $ GO TO 200 + IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN + IF( F*SCALE( I ).LE.SFMIN1 ) + $ GO TO 200 + END IF + IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN + IF( SCALE( I ).GE.SFMAX1 / F ) + $ GO TO 200 + END IF + G = ONE / F + SCALE( I ) = SCALE( I )*F + NOCONV = .TRUE. +* + CALL ZDSCAL( N-K+1, G, A( I, K ), LDA ) + CALL ZDSCAL( L, F, A( 1, I ), 1 ) +* + 200 CONTINUE +* + IF( NOCONV ) + $ GO TO 140 +* + 210 CONTINUE + ILO = K + IHI = L +* + RETURN +* +* End of ZGEBAL +* + END diff --git a/dspl/liblapack/SRC/zgebd2.f b/dspl/liblapack/SRC/zgebd2.f new file mode 100644 index 0000000..3afe79e --- /dev/null +++ b/dspl/liblapack/SRC/zgebd2.f @@ -0,0 +1,331 @@ +*> \brief \b ZGEBD2 reduces a general matrix to bidiagonal form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEBD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEBD2 reduces a complex general m by n matrix A to upper or lower +*> real bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the unitary matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the unitary matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +*> A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +*> A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, v and u are complex vectors; +*> v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i); +*> u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n); +*> tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'ZGEBD2', -INFO ) + RETURN + END IF +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, N +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + IF( I.LT.N ) + $ CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = D( I ) +* + IF( I.LT.N ) THEN +* +* Generate elementary reflector G(i) to annihilate +* A(i,i+2:n) +* + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + ALPHA = A( I, I+1 ) + CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA, + $ TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Apply G(i) to A(i+1:m,i+1:n) from the right +* + CALL ZLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA, + $ TAUP( I ), A( I+1, I+1 ), LDA, WORK ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I+1 ) = E( I ) + ELSE + TAUP( I ) = ZERO + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, M +* +* Generate elementary reflector G(i) to annihilate A(i,i+1:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + A( I, I ) = ONE +* +* Apply G(i) to A(i+1:m,i:n) from the right +* + IF( I.LT.M ) + $ CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ TAUP( I ), A( I+1, I ), LDA, WORK ) + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + A( I, I ) = D( I ) +* + IF( I.LT.M ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Apply H(i)**H to A(i+1:m,i+1:n) from the left +* + CALL ZLARF( 'Left', M-I, N-I, A( I+1, I ), 1, + $ DCONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA, + $ WORK ) + A( I+1, I ) = E( I ) + ELSE + TAUQ( I ) = ZERO + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZGEBD2 +* + END diff --git a/dspl/liblapack/SRC/zgebrd.f b/dspl/liblapack/SRC/zgebrd.f new file mode 100644 index 0000000..bccd6b8 --- /dev/null +++ b/dspl/liblapack/SRC/zgebrd.f @@ -0,0 +1,352 @@ +*> \brief \b ZGEBRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEBRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEBRD reduces a general complex M-by-N matrix A to upper or lower +*> bidiagonal form B by a unitary transformation: Q**H * A * P = B. +*> +*> If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N general matrix to be reduced. +*> On exit, +*> if m >= n, the diagonal and the first superdiagonal are +*> overwritten with the upper bidiagonal matrix B; the +*> elements below the diagonal, with the array TAUQ, represent +*> the unitary matrix Q as a product of elementary +*> reflectors, and the elements above the first superdiagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors; +*> if m < n, the diagonal and the first subdiagonal are +*> overwritten with the lower bidiagonal matrix B; the +*> elements below the first subdiagonal, with the array TAUQ, +*> represent the unitary matrix Q as a product of +*> elementary reflectors, and the elements above the diagonal, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (min(M,N)) +*> The diagonal elements of the bidiagonal matrix B: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (min(M,N)-1) +*> The off-diagonal elements of the bidiagonal matrix B: +*> if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1; +*> if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,M,N). +*> For optimum performance LWORK >= (M+N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> If m >= n, +*> +*> Q = H(1) H(2) . . . H(n) and P = G(1) G(2) . . . G(n-1) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in +*> A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in +*> A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, +*> +*> Q = H(1) H(2) . . . H(m-1) and P = G(1) G(2) . . . G(m) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The contents of A on exit are illustrated by the following examples: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 ) +*> ( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 ) +*> ( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 ) +*> ( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 ) +*> ( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 ) +*> ( v1 v2 v3 v4 v5 ) +*> +*> where d and e denote diagonal and off-diagonal elements of B, vi +*> denotes an element of the vector defining H(i), and ui an element of +*> the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB, + $ NBMIN, NX, WS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEBD2, ZGEMM, ZLABRD +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB = MAX( 1, ILAENV( 1, 'ZGEBRD', ' ', M, N, -1, -1 ) ) + LWKOPT = ( M+N )*NB + WORK( 1 ) = DBLE( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.LT.0 ) THEN + CALL XERBLA( 'ZGEBRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + WS = MAX( M, N ) + LDWRKX = M + LDWRKY = N +* + IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN +* +* Set the crossover point NX. +* + NX = MAX( NB, ILAENV( 3, 'ZGEBRD', ' ', M, N, -1, -1 ) ) +* +* Determine when to switch from blocked to unblocked code. +* + IF( NX.LT.MINMN ) THEN + WS = ( M+N )*NB + IF( LWORK.LT.WS ) THEN +* +* Not enough work space for the optimal NB, consider using +* a smaller block size. +* + NBMIN = ILAENV( 2, 'ZGEBRD', ' ', M, N, -1, -1 ) + IF( LWORK.GE.( M+N )*NBMIN ) THEN + NB = LWORK / ( M+N ) + ELSE + NB = 1 + NX = MINMN + END IF + END IF + END IF + ELSE + NX = MINMN + END IF +* + DO 30 I = 1, MINMN - NX, NB +* +* Reduce rows and columns i:i+ib-1 to bidiagonal form and return +* the matrices X and Y which are needed to update the unreduced +* part of the matrix +* + CALL ZLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, LDWRKX, + $ WORK( LDWRKX*NB+1 ), LDWRKY ) +* +* Update the trailing submatrix A(i+ib:m,i+ib:n), using +* an update of the form A := A - V*Y**H - X*U**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1, + $ N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA, + $ WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE, + $ A( I+NB, I+NB ), LDA ) + CALL ZGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1, + $ NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA, + $ ONE, A( I+NB, I+NB ), LDA ) +* +* Copy diagonal and off-diagonal elements of B back into A +* + IF( M.GE.N ) THEN + DO 10 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J, J+1 ) = E( J ) + 10 CONTINUE + ELSE + DO 20 J = I, I + NB - 1 + A( J, J ) = D( J ) + A( J+1, J ) = E( J ) + 20 CONTINUE + END IF + 30 CONTINUE +* +* Use unblocked code to reduce the remainder of the matrix +* + CALL ZGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAUQ( I ), TAUP( I ), WORK, IINFO ) + WORK( 1 ) = WS + RETURN +* +* End of ZGEBRD +* + END diff --git a/dspl/liblapack/SRC/zgecon.f b/dspl/liblapack/SRC/zgecon.f new file mode 100644 index 0000000..91362a9 --- /dev/null +++ b/dspl/liblapack/SRC/zgecon.f @@ -0,0 +1,269 @@ +*> \brief \b ZGECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGECON estimates the reciprocal of the condition number of a general +*> complex matrix A, in either the 1-norm or the infinity-norm, using +*> the LU factorization computed by ZGETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, SCALE, SL, SMLNUM, SU + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the norm of inv(A). +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(L). +* + CALL ZLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A, + $ LDA, WORK, SL, RWORK, INFO ) +* +* Multiply by inv(U). +* + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SU, RWORK( N+1 ), INFO ) + ELSE +* +* Multiply by inv(U**H). +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ), + $ INFO ) +* +* Multiply by inv(L**H). +* + CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN, + $ N, A, LDA, WORK, SL, RWORK, INFO ) + END IF +* +* Divide X by 1/(SL*SU) if doing so will not cause overflow. +* + SCALE = SL*SU + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZGECON +* + END diff --git a/dspl/liblapack/SRC/zgeequ.f b/dspl/liblapack/SRC/zgeequ.f new file mode 100644 index 0000000..13fcb2a --- /dev/null +++ b/dspl/liblapack/SRC/zgeequ.f @@ -0,0 +1,313 @@ +*> \brief \b ZGEEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEEQU computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1. +*> +*> R(i) and C(j) are restricted to be between SMLNUM = smallest safe +*> number and BIGNUM = largest safe number. Use of these scaling +*> factors is not guaranteed to reduce the condition number of A but +*> works well in practice. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)) +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)) +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGEEQU +* + END diff --git a/dspl/liblapack/SRC/zgeequb.f b/dspl/liblapack/SRC/zgeequb.f new file mode 100644 index 0000000..1b9a3bb --- /dev/null +++ b/dspl/liblapack/SRC/zgeequb.f @@ -0,0 +1,330 @@ +*> \brief \b ZGEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEEQUB computes row and column scalings intended to equilibrate an +*> M-by-N matrix A and reduce its condition number. R returns the row +*> scale factors and C the column scale factors, chosen to try to make +*> the largest element in each row and column of the matrix B with +*> elements B(i,j)=R(i)*A(i,j)*C(j) have an absolute value of at most +*> the radix. +*> +*> R(i) and C(j) are restricted to be a power of the radix between +*> SMLNUM = smallest safe number and BIGNUM = largest safe number. Use +*> of these scaling factors is not guaranteed to reduce the condition +*> number of A but works well in practice. +*> +*> This routine differs from ZGEEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled entries' magnitudes are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The M-by-N matrix whose equilibration factors are +*> to be computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> If INFO = 0 or INFO > M, R contains the row scale factors +*> for A. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, C contains the column scale factors for A. +*> \endverbatim +*> +*> \param[out] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> If INFO = 0 or INFO > M, ROWCND contains the ratio of the +*> smallest R(i) to the largest R(i). If ROWCND >= 0.1 and +*> AMAX is neither too large nor too small, it is not worth +*> scaling by R. +*> \endverbatim +*> +*> \param[out] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> If INFO = 0, COLCND contains the ratio of the smallest +*> C(i) to the largest C(i). If COLCND >= 0.1, it is not +*> worth scaling by C. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= M: the i-th row of A is exactly zero +*> > M: the (i-M)-th column of A is exactly zero +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGEEQUB( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, RCMAX, RCMIN, SMLNUM, RADIX, LOGRDX + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, LOG, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + ROWCND = ONE + COLCND = ONE + AMAX = ZERO + RETURN + END IF +* +* Get machine constants. Assume SMLNUM is a power of the radix. +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + RADIX = DLAMCH( 'B' ) + LOGRDX = LOG( RADIX ) +* +* Compute row scale factors. +* + DO 10 I = 1, M + R( I ) = ZERO + 10 CONTINUE +* +* Find the maximum element in each row. +* + DO 30 J = 1, N + DO 20 I = 1, M + R( I ) = MAX( R( I ), CABS1( A( I, J ) ) ) + 20 CONTINUE + 30 CONTINUE + DO I = 1, M + IF( R( I ).GT.ZERO ) THEN + R( I ) = RADIX**INT( LOG(R( I ) ) / LOGRDX ) + END IF + END DO +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 40 I = 1, M + RCMAX = MAX( RCMAX, R( I ) ) + RCMIN = MIN( RCMIN, R( I ) ) + 40 CONTINUE + AMAX = RCMAX +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 50 I = 1, M + IF( R( I ).EQ.ZERO ) THEN + INFO = I + RETURN + END IF + 50 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 60 I = 1, M + R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM ) + 60 CONTINUE +* +* Compute ROWCND = min(R(I)) / max(R(I)). +* + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* +* Compute column scale factors. +* + DO 70 J = 1, N + C( J ) = ZERO + 70 CONTINUE +* +* Find the maximum element in each column, +* assuming the row scaling computed above. +* + DO 90 J = 1, N + DO 80 I = 1, M + C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) ) + 80 CONTINUE + IF( C( J ).GT.ZERO ) THEN + C( J ) = RADIX**INT( LOG( C( J ) ) / LOGRDX ) + END IF + 90 CONTINUE +* +* Find the maximum and minimum scale factors. +* + RCMIN = BIGNUM + RCMAX = ZERO + DO 100 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 100 CONTINUE +* + IF( RCMIN.EQ.ZERO ) THEN +* +* Find the first zero scale factor and return an error code. +* + DO 110 J = 1, N + IF( C( J ).EQ.ZERO ) THEN + INFO = M + J + RETURN + END IF + 110 CONTINUE + ELSE +* +* Invert the scale factors. +* + DO 120 J = 1, N + C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM ) + 120 CONTINUE +* +* Compute COLCND = min(C(J)) / max(C(J)). +* + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + END IF +* + RETURN +* +* End of ZGEEQUB +* + END diff --git a/dspl/liblapack/SRC/zgees.f b/dspl/liblapack/SRC/zgees.f new file mode 100644 index 0000000..a33e0a1 --- /dev/null +++ b/dspl/liblapack/SRC/zgees.f @@ -0,0 +1,424 @@ +*> \brief ZGEES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, +* LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEES computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues, the Schur form T, and, optionally, the matrix of Schur +*> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> Schur form so that selected eigenvalues are at the top left. +*> The leading columns of Z then form an orthonormal basis for the +*> invariant subspace corresponding to the selected eigenvalues. +*> +*> A complex matrix is in Schur form if it is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered: +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of one COMPLEX*16 argument +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to order +*> to the top left of the Schur form. +*> IF SORT = 'N', SELECT is not referenced. +*> The eigenvalue W(j) is selected if SELECT(W(j)) is true. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten by its Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues for which +*> SELECT is true. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> W contains the computed eigenvalues, in the same order that +*> they appear on the diagonal of the output Schur form T. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is COMPLEX*16 array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the unitary matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1; if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of W +*> contain those eigenvalues which have converged; +*> if JOBVS = 'V', VS contains the matrix which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because +*> some eigenvalues were too close to separate (the +*> problem is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Schur form no longer satisfy +*> SELECT = .TRUE.. This could also be caused by +*> underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS, + $ LDVS, WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTST, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZCOPY, ZGEBAK, ZGEBAL, ZGEHRD, + $ ZHSEQR, ZLACPY, ZLASCL, ZTRSEN, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N +* + CALL ZHSEQR( 'S', JOBVS, N, 1, N, A, LDA, W, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'ZUNGHR', + $ ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, HSWORK ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues and transform Schur vectors +* (CWorkspace: none) +* (RWorkspace: none) +* + CALL ZTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND ) + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL ZCOPY( N, A, LDA+1, W, 1 ) + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEES +* + END diff --git a/dspl/liblapack/SRC/zgeesx.f b/dspl/liblapack/SRC/zgeesx.f new file mode 100644 index 0000000..a5391cb --- /dev/null +++ b/dspl/liblapack/SRC/zgeesx.f @@ -0,0 +1,499 @@ +*> \brief ZGEESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, +* VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, +* BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVS, SENSE, SORT +* INTEGER INFO, LDA, LDVS, LWORK, N, SDIM +* DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELECT +* EXTERNAL SELECT +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEESX computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues, the Schur form T, and, optionally, the matrix of Schur +*> vectors Z. This gives the Schur factorization A = Z*T*(Z**H). +*> +*> Optionally, it also orders the eigenvalues on the diagonal of the +*> Schur form so that selected eigenvalues are at the top left; +*> computes a reciprocal condition number for the average of the +*> selected eigenvalues (RCONDE); and computes a reciprocal condition +*> number for the right invariant subspace corresponding to the +*> selected eigenvalues (RCONDV). The leading columns of Z form an +*> orthonormal basis for this invariant subspace. +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where +*> these quantities are called s and sep respectively). +*> +*> A complex matrix is in Schur form if it is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVS +*> \verbatim +*> JOBVS is CHARACTER*1 +*> = 'N': Schur vectors are not computed; +*> = 'V': Schur vectors are computed. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELECT). +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is a LOGICAL FUNCTION of one COMPLEX*16 argument +*> SELECT must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'S', SELECT is used to select eigenvalues to order +*> to the top left of the Schur form. +*> If SORT = 'N', SELECT is not referenced. +*> An eigenvalue W(j) is selected if SELECT(W(j)) is true. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for average of selected eigenvalues only; +*> = 'V': Computed for selected right invariant subspace only; +*> = 'B': Computed for both. +*> If SENSE = 'E', 'V' or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the N-by-N matrix A. +*> On exit, A is overwritten by its Schur form T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues for which +*> SELECT is true. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> W contains the computed eigenvalues, in the same order +*> that they appear on the diagonal of the output Schur form T. +*> \endverbatim +*> +*> \param[out] VS +*> \verbatim +*> VS is COMPLEX*16 array, dimension (LDVS,N) +*> If JOBVS = 'V', VS contains the unitary matrix Z of Schur +*> vectors. +*> If JOBVS = 'N', VS is not referenced. +*> \endverbatim +*> +*> \param[in] LDVS +*> \verbatim +*> LDVS is INTEGER +*> The leading dimension of the array VS. LDVS >= 1, and if +*> JOBVS = 'V', LDVS >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION +*> If SENSE = 'E' or 'B', RCONDE contains the reciprocal +*> condition number for the average of the selected eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION +*> If SENSE = 'V' or 'B', RCONDV contains the reciprocal +*> condition number for the selected right invariant subspace. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM), +*> where SDIM is the number of selected eigenvalues computed by +*> this routine. Note that 2*SDIM*(N-SDIM) <= N*N/2. Note also +*> that an error is only returned if LWORK < max(1,2*N), but if +*> SENSE = 'E' or 'V' or 'B' this may not be large enough. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates upper bound on the optimal size of the +*> array WORK, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is +*> <= N: the QR algorithm failed to compute all the +*> eigenvalues; elements 1:ILO-1 and i+1:N of W +*> contain those eigenvalues which have converged; if +*> JOBVS = 'V', VS contains the transformation which +*> reduces A to its partially converged Schur form. +*> = N+1: the eigenvalues could not be reordered because some +*> eigenvalues were too close to separate (the problem +*> is very ill-conditioned); +*> = N+2: after reordering, roundoff changed values of some +*> complex eigenvalues so that leading eigenvalues in +*> the Schur form no longer satisfy SELECT=.TRUE. This +*> could also be caused by underflow due to scaling. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W, + $ VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVS, SENSE, SORT + INTEGER INFO, LDA, LDVS, LWORK, N, SDIM + DOUBLE PRECISION RCONDE, RCONDV +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELECT + EXTERNAL SELECT +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTSB, WANTSE, WANTSN, WANTST, + $ WANTSV, WANTVS + INTEGER HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO, + $ ITAU, IWRK, LWRK, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, XERBLA, ZCOPY, ZGEBAK, ZGEBAL, + $ ZGEHRD, ZHSEQR, ZLACPY, ZLASCL, ZTRSEN, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTVS = LSAME( JOBVS, 'V' ) + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of real workspace needed at that point in the +* code, as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case. +* If SENSE = 'E', 'V' or 'B', then the amount of workspace needed +* depends on SDIM, which is computed by the routine ZTRSEN later +* in the code.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + LWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N +* + CALL ZHSEQR( 'S', JOBVS, N, 1, N, A, LDA, W, VS, LDVS, + $ WORK, -1, IEVAL ) + HSWORK = WORK( 1 ) +* + IF( .NOT.WANTVS ) THEN + MAXWRK = MAX( MAXWRK, HSWORK ) + ELSE + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'ZUNGHR', + $ ' ', N, 1, N, -1 ) ) + MAXWRK = MAX( MAXWRK, HSWORK ) + END IF + LWRK = MAXWRK + IF( .NOT.WANTSN ) + $ LWRK = MAX( LWRK, ( N*N )/2 ) + END IF + WORK( 1 ) = LWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEESX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* +* Permute the matrix to make it more nearly triangular +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = N + ITAU + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVS ) THEN +* +* Copy Householder vectors to VS +* + CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS ) +* +* Generate unitary matrix in VS +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) + END IF +* + SDIM = 0 +* +* Perform QR iteration, accumulating Schur vectors in VS if desired +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS, + $ WORK( IWRK ), LWORK-IWRK+1, IEVAL ) + IF( IEVAL.GT.0 ) + $ INFO = IEVAL +* +* Sort eigenvalues if desired +* + IF( WANTST .AND. INFO.EQ.0 ) THEN + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR ) + DO 10 I = 1, N + BWORK( I ) = SELECT( W( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Schur vectors, and compute +* reciprocal condition numbers +* (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM) +* otherwise, need none ) +* (RWorkspace: none) +* + CALL ZTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM, + $ RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1, + $ ICOND ) + IF( .NOT.WANTSN ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( ICOND.EQ.-14 ) THEN +* +* Not enough complex workspace +* + INFO = -15 + END IF + END IF +* + IF( WANTVS ) THEN +* +* Undo balancing +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS, + $ IERR ) + END IF +* + IF( SCALEA ) THEN +* +* Undo scaling for the Schur form of A +* + CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR ) + CALL ZCOPY( N, A, LDA+1, W, 1 ) + IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN + DUM( 1 ) = RCONDV + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + RCONDV = DUM( 1 ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEESX +* + END diff --git a/dspl/liblapack/SRC/zgeev.f b/dspl/liblapack/SRC/zgeev.f new file mode 100644 index 0000000..22b0446 --- /dev/null +++ b/dspl/liblapack/SRC/zgeev.f @@ -0,0 +1,503 @@ +*> \brief ZGEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of are computed. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> W contains the computed eigenvalues. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> u(j) = VL(:,j), the j-th column of VL. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> v(j) = VR(:,j), the j-th column of VR. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors have been computed; +*> elements and i+1:N of W contain eigenvalues which have +*> converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @precisions fortran z -> c +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR, + $ WORK, LWORK, RWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR + CHARACTER SIDE + INTEGER HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU, + $ IWRK, K, LWORK_TREVC, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX*16 TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, ZGEHRD, + $ ZHSEQR, ZLACPY, ZLASCL, ZSCAL, ZTREVC3, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DZNRM2, ZLANGE + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, CONJG, AIMAG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) + MINWRK = 2*N + IF( WANTVL ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'ZUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL ZTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL ZHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'ZUNGHR', + $ ' ', N, 1, N, -1 ) ) + CALL ZTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, N + LWORK_TREVC ) + CALL ZHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + CALL ZHSEQR( 'E', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + END IF + HSWORK = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, HSWORK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix +* (CWorkspace: none) +* (RWorkspace: need N) +* + IBAL = 1 + CALL ZGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR ) +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from ZHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N, prefer N + 2*N*NB) +* (RWorkspace: need 2*N) +* + IRWORK = IBAL + N + CALL ZTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, + $ RWORK( IRWORK ), N, IERR ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / DZNRM2( N, VL( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( IRWORK+K-1 ) = DBLE( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = IDAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL ZSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* (CWorkspace: none) +* (RWorkspace: need N) +* + CALL ZGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / DZNRM2( N, VR( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( IRWORK+K-1 ) = DBLE( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = IDAMAX( N, RWORK( IRWORK ), 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) ) + CALL ZSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.GT.0 ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEEV +* + END diff --git a/dspl/liblapack/SRC/zgeevx.f b/dspl/liblapack/SRC/zgeevx.f new file mode 100644 index 0000000..323782b --- /dev/null +++ b/dspl/liblapack/SRC/zgeevx.f @@ -0,0 +1,667 @@ +*> \brief ZGEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, +* LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, +* RCONDV, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N +* DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RCONDE( * ), RCONDV( * ), RWORK( * ), +* $ SCALE( * ) +* COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEEVX computes for an N-by-N complex nonsymmetric matrix A, the +*> eigenvalues and, optionally, the left and/or right eigenvectors. +*> +*> Optionally also, it computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues +*> (RCONDE), and reciprocal condition numbers for the right +*> eigenvectors (RCONDV). +*> +*> The right eigenvector v(j) of A satisfies +*> A * v(j) = lambda(j) * v(j) +*> where lambda(j) is its eigenvalue. +*> The left eigenvector u(j) of A satisfies +*> u(j)**H * A = lambda(j) * u(j)**H +*> where u(j)**H denotes the conjugate transpose of u(j). +*> +*> The computed eigenvectors are normalized to have Euclidean norm +*> equal to 1 and largest component real. +*> +*> Balancing a matrix means permuting the rows and columns to make it +*> more nearly upper triangular, and applying a diagonal similarity +*> transformation D * A * D**(-1), where D is a diagonal matrix, to +*> make its rows and columns closer in norm and the condition numbers +*> of its eigenvalues and eigenvectors smaller. The computed +*> reciprocal condition numbers correspond to the balanced matrix. +*> Permuting rows and columns will not change the condition numbers +*> (in exact arithmetic) but diagonal scaling will. For further +*> explanation of balancing, see section 4.10.2 of the LAPACK +*> Users' Guide. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Indicates how the input matrix should be diagonally scaled +*> and/or permuted to improve the conditioning of its +*> eigenvalues. +*> = 'N': Do not diagonally scale or permute; +*> = 'P': Perform permutations to make the matrix more nearly +*> upper triangular. Do not diagonally scale; +*> = 'S': Diagonally scale the matrix, ie. replace A by +*> D*A*D**(-1), where D is a diagonal matrix chosen +*> to make the rows and columns of A more equal in +*> norm. Do not permute; +*> = 'B': Both diagonally scale and permute A. +*> +*> Computed reciprocal condition numbers will be for the matrix +*> after balancing and/or permuting. Permuting does not change +*> condition numbers (in exact arithmetic), but balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': left eigenvectors of A are not computed; +*> = 'V': left eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVL must = 'V'. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': right eigenvectors of A are not computed; +*> = 'V': right eigenvectors of A are computed. +*> If SENSE = 'E' or 'B', JOBVR must = 'V'. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': None are computed; +*> = 'E': Computed for eigenvalues only; +*> = 'V': Computed for right eigenvectors only; +*> = 'B': Computed for eigenvalues and right eigenvectors. +*> +*> If SENSE = 'E' or 'B', both left and right eigenvectors +*> must also be computed (JOBVL = 'V' and JOBVR = 'V'). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> On exit, A has been overwritten. If JOBVL = 'V' or +*> JOBVR = 'V', A contains the Schur form of the balanced +*> version of the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> W contains the computed eigenvalues. +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left eigenvectors u(j) are stored one +*> after another in the columns of VL, in the same order +*> as their eigenvalues. +*> If JOBVL = 'N', VL is not referenced. +*> u(j) = VL(:,j), the j-th column of VL. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; if +*> JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right eigenvectors v(j) are stored one +*> after another in the columns of VR, in the same order +*> as their eigenvalues. +*> If JOBVR = 'N', VR is not referenced. +*> v(j) = VR(:,j), the j-th column of VR. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; if +*> JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values determined when A was +*> balanced. The balanced A(i,j) = 0 if I > J and +*> J = 1,...,ILO-1 or I = IHI+1,...,N. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> when balancing A. If P(j) is the index of the row and column +*> interchanged with row and column j, and D(j) is the scaling +*> factor applied to row and column j, then +*> SCALE(J) = P(J), for J = 1,...,ILO-1 +*> = D(J), for J = ILO,...,IHI +*> = P(J) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix (the maximum +*> of the sum of absolute values of elements of any column). +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension (N) +*> RCONDE(j) is the reciprocal condition number of the j-th +*> eigenvalue. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension (N) +*> RCONDV(j) is the reciprocal condition number of the j-th +*> right eigenvector. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. If SENSE = 'N' or 'E', +*> LWORK >= max(1,2*N), and if SENSE = 'V' or 'B', +*> LWORK >= N*N+2*N. +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the QR algorithm failed to compute all the +*> eigenvalues, and no eigenvectors or condition numbers +*> have been computed; elements 1:ILO-1 and i+1:N of W +*> contain eigenvalues which have converged. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +* @precisions fortran z -> c +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL, + $ LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, + $ RCONDV, WORK, LWORK, RWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM +* .. +* .. Array Arguments .. + DOUBLE PRECISION RCONDE( * ), RCONDV( * ), RWORK( * ), + $ SCALE( * ) + COMPLEX*16 A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE, + $ WNTSNN, WNTSNV + CHARACTER JOB, SIDE + INTEGER HSWORK, I, ICOND, IERR, ITAU, IWRK, K, + $ LWORK_TREVC, MAXWRK, MINWRK, NOUT + DOUBLE PRECISION ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM + COMPLEX*16 TMP +* .. +* .. Local Arrays .. + LOGICAL SELECT( 1 ) + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, + $ ZGEHRD, ZHSEQR, ZLACPY, ZLASCL, ZSCAL, ZTREVC3, + $ ZTRSNA, ZUNGHR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, ILAENV + DOUBLE PRECISION DLAMCH, DZNRM2, ZLANGE + EXTERNAL LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, CONJG, AIMAG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + WANTVL = LSAME( JOBVL, 'V' ) + WANTVR = LSAME( JOBVR, 'V' ) + WNTSNN = LSAME( SENSE, 'N' ) + WNTSNE = LSAME( SENSE, 'E' ) + WNTSNV = LSAME( SENSE, 'V' ) + WNTSNB = LSAME( SENSE, 'B' ) + IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR. + $ LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR. + $ ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND. + $ WANTVR ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN + INFO = -12 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to real +* workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV. +* HSWORK refers to the workspace preferred by ZHSEQR, as +* calculated below. HSWORK is computed assuming ILO=1 and IHI=N, +* the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 ) +* + IF( WANTVL ) THEN + CALL ZTREVC3( 'L', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, LWORK_TREVC ) + CALL ZHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VL, LDVL, + $ WORK, -1, INFO ) + ELSE IF( WANTVR ) THEN + CALL ZTREVC3( 'R', 'B', SELECT, N, A, LDA, + $ VL, LDVL, VR, LDVR, + $ N, NOUT, WORK, -1, RWORK, -1, IERR ) + LWORK_TREVC = INT( WORK(1) ) + MAXWRK = MAX( MAXWRK, LWORK_TREVC ) + CALL ZHSEQR( 'S', 'V', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + IF( WNTSNN ) THEN + CALL ZHSEQR( 'E', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + ELSE + CALL ZHSEQR( 'S', 'N', N, 1, N, A, LDA, W, VR, LDVR, + $ WORK, -1, INFO ) + END IF + END IF + HSWORK = INT( WORK(1) ) +* + IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN + MINWRK = 2*N + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 2*N ) + ELSE + MINWRK = 2*N + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MINWRK = MAX( MINWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, HSWORK ) + MAXWRK = MAX( MAXWRK, N + ( N - 1 )*ILAENV( 1, 'ZUNGHR', + $ ' ', N, 1, N, -1 ) ) + IF( .NOT.( WNTSNN .OR. WNTSNE ) ) + $ MAXWRK = MAX( MAXWRK, N*N + 2*N ) + MAXWRK = MAX( MAXWRK, 2*N ) + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ICOND = 0 + ANRM = ZLANGE( 'M', N, N, A, LDA, DUM ) + SCALEA = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + SCALEA = .TRUE. + CSCALE = SMLNUM + ELSE IF( ANRM.GT.BIGNUM ) THEN + SCALEA = .TRUE. + CSCALE = BIGNUM + END IF + IF( SCALEA ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR ) +* +* Balance the matrix and compute ABNRM +* + CALL ZGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR ) + ABNRM = ZLANGE( '1', N, N, A, LDA, DUM ) + IF( SCALEA ) THEN + DUM( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR ) + ABNRM = DUM( 1 ) + END IF +* +* Reduce to upper Hessenberg form +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + ITAU = 1 + IWRK = ITAU + N + CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* + IF( WANTVL ) THEN +* +* Want left eigenvectors +* Copy Householder vectors to VL +* + SIDE = 'L' + CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL ) +* +* Generate unitary matrix in VL +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VL +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + IF( WANTVR ) THEN +* +* Want left and right eigenvectors +* Copy Schur vectors to VR +* + SIDE = 'B' + CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR ) + END IF +* + ELSE IF( WANTVR ) THEN +* +* Want right eigenvectors +* Copy Householder vectors to VR +* + SIDE = 'R' + CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR ) +* +* Generate unitary matrix in VR +* (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ), + $ LWORK-IWRK+1, IERR ) +* +* Perform QR iteration, accumulating Schur vectors in VR +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) +* + ELSE +* +* Compute eigenvalues only +* If condition numbers desired, compute Schur form +* + IF( WNTSNN ) THEN + JOB = 'E' + ELSE + JOB = 'S' + END IF +* +* (CWorkspace: need 1, prefer HSWORK (see comments) ) +* (RWorkspace: none) +* + IWRK = ITAU + CALL ZHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR, + $ WORK( IWRK ), LWORK-IWRK+1, INFO ) + END IF +* +* If INFO .NE. 0 from ZHSEQR, then quit +* + IF( INFO.NE.0 ) + $ GO TO 50 +* + IF( WANTVL .OR. WANTVR ) THEN +* +* Compute left and/or right eigenvectors +* (CWorkspace: need 2*N, prefer N + 2*N*NB) +* (RWorkspace: need N) +* + CALL ZTREVC3( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ N, NOUT, WORK( IWRK ), LWORK-IWRK+1, + $ RWORK, N, IERR ) + END IF +* +* Compute condition numbers if desired +* (CWorkspace: need N*N+2*N unless SENSE = 'E') +* (RWorkspace: need 2*N unless SENSE = 'E') +* + IF( .NOT.WNTSNN ) THEN + CALL ZTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR, + $ RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK, + $ ICOND ) + END IF +* + IF( WANTVL ) THEN +* +* Undo balancing of left eigenvectors +* + CALL ZGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL, + $ IERR ) +* +* Normalize left eigenvectors and make largest component real +* + DO 20 I = 1, N + SCL = ONE / DZNRM2( N, VL( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VL( 1, I ), 1 ) + DO 10 K = 1, N + RWORK( K ) = DBLE( VL( K, I ) )**2 + + $ AIMAG( VL( K, I ) )**2 + 10 CONTINUE + K = IDAMAX( N, RWORK, 1 ) + TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( K ) ) + CALL ZSCAL( N, TMP, VL( 1, I ), 1 ) + VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO ) + 20 CONTINUE + END IF +* + IF( WANTVR ) THEN +* +* Undo balancing of right eigenvectors +* + CALL ZGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR, + $ IERR ) +* +* Normalize right eigenvectors and make largest component real +* + DO 40 I = 1, N + SCL = ONE / DZNRM2( N, VR( 1, I ), 1 ) + CALL ZDSCAL( N, SCL, VR( 1, I ), 1 ) + DO 30 K = 1, N + RWORK( K ) = DBLE( VR( K, I ) )**2 + + $ AIMAG( VR( K, I ) )**2 + 30 CONTINUE + K = IDAMAX( N, RWORK, 1 ) + TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( K ) ) + CALL ZSCAL( N, TMP, VR( 1, I ), 1 ) + VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO ) + 40 CONTINUE + END IF +* +* Undo scaling if necessary +* + 50 CONTINUE + IF( SCALEA ) THEN + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ), + $ MAX( N-INFO, 1 ), IERR ) + IF( INFO.EQ.0 ) THEN + IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 ) + $ CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N, + $ IERR ) + ELSE + CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR ) + END IF + END IF +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGEEVX +* + END diff --git a/dspl/liblapack/SRC/zgehd2.f b/dspl/liblapack/SRC/zgehd2.f new file mode 100644 index 0000000..12bac62 --- /dev/null +++ b/dspl/liblapack/SRC/zgehd2.f @@ -0,0 +1,224 @@ +*> \brief \b ZGEHD2 reduces a general square matrix to upper Hessenberg form using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEHD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H +*> by a unitary similarity transformation: Q**H * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to ZGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= max(1,N). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the n by n general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the unitary matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEHD2', -INFO ) + RETURN + END IF +* + DO 10 I = ILO, IHI - 1 +* +* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) ) + A( I+1, I ) = ONE +* +* Apply H(i) to A(1:ihi,i+1:ihi) from the right +* + CALL ZLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ), + $ A( 1, I+1 ), LDA, WORK ) +* +* Apply H(i)**H to A(i+1:ihi,i+1:n) from the left +* + CALL ZLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, + $ DCONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK ) +* + A( I+1, I ) = ALPHA + 10 CONTINUE +* + RETURN +* +* End of ZGEHD2 +* + END diff --git a/dspl/liblapack/SRC/zgehrd.f b/dspl/liblapack/SRC/zgehrd.f new file mode 100644 index 0000000..d8ddf24 --- /dev/null +++ b/dspl/liblapack/SRC/zgehrd.f @@ -0,0 +1,356 @@ +*> \brief \b ZGEHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEHRD reduces a complex general matrix A to upper Hessenberg form H by +*> an unitary similarity transformation: Q**H * A * Q = H . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that A is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to ZGEBAL; otherwise they should be +*> set to 1 and N respectively. See Further Details. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> elements below the first subdiagonal, with the array TAU, +*> represent the unitary matrix Q as a product of elementary +*> reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to +*> zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,N). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of (ihi-ilo) elementary +*> reflectors +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on +*> exit in A(i+2:ihi,i), and tau in TAU(i). +*> +*> The contents of A are illustrated by the following example, with +*> n = 7, ilo = 2 and ihi = 6: +*> +*> on entry, on exit, +*> +*> ( a a a a a a a ) ( a a h h h h a ) +*> ( a a a a a a ) ( a h h h h a ) +*> ( a a a a a a ) ( h h h h h h ) +*> ( a a a a a a ) ( v2 h h h h h ) +*> ( a a a a a a ) ( v2 v3 h h h h ) +*> ( a a a a a a ) ( v2 v3 v4 h h h ) +*> ( a ) ( a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This file is a slight modification of LAPACK-3.0's DGEHRD +*> subroutine incorporating improvements proposed by Quintana-Orti and +*> Van de Geijn (2006). (See DLAHR2.) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWT, J, LDWORK, LWKOPT, NB, + $ NBMIN, NH, NX + COMPLEX*16 EI +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEHD2, ZGEMM, ZLAHR2, ZLARFB, ZTRMM, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + LWKOPT = N*NB + TSIZE + WORK( 1 ) = LWKOPT + ENDIF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEHRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Set elements 1:ILO-1 and IHI:N-1 of TAU to zero +* + DO 10 I = 1, ILO - 1 + TAU( I ) = ZERO + 10 CONTINUE + DO 20 I = MAX( 1, IHI ), N - 1 + TAU( I ) = ZERO + 20 CONTINUE +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the block size +* + NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + NBMIN = 2 + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code) +* + NX = MAX( NB, ILAENV( 3, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code +* + IF( LWORK.LT.N*NB+TSIZE ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGEHRD', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.(N*NBMIN + TSIZE) ) THEN + NB = (LWORK-TSIZE) / N + ELSE + NB = 1 + END IF + END IF + END IF + END IF + LDWORK = N +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + I = ILO +* + ELSE +* +* Use blocked code +* + IWT = 1 + N*NB + DO 40 I = ILO, IHI - 1 - NX, NB + IB = MIN( NB, IHI-I ) +* +* Reduce columns i:i+ib-1 to Hessenberg form, returning the +* matrices V and T of the block reflector H = I - V*T*V**H +* which performs the reduction, and also the matrix Y = A*V*T +* + CALL ZLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), + $ WORK( IWT ), LDT, WORK, LDWORK ) +* +* Apply the block reflector H to A(1:ihi,i+ib:ihi) from the +* right, computing A := A - Y * V**H. V(i+ib,ib-1) must be set +* to 1 +* + EI = A( I+IB, I+IB-1 ) + A( I+IB, I+IB-1 ) = ONE + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ IHI, IHI-I-IB+1, + $ IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE, + $ A( 1, I+IB ), LDA ) + A( I+IB, I+IB-1 ) = EI +* +* Apply the block reflector H to A(1:i,i+1:i+ib-1) from the +* right +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', I, IB-1, + $ ONE, A( I+1, I ), LDA, WORK, LDWORK ) + DO 30 J = 0, IB-2 + CALL ZAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1, + $ A( 1, I+J+1 ), 1 ) + 30 CONTINUE +* +* Apply the block reflector H to A(i+1:ihi,i+ib:n) from the +* left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', + $ IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, + $ WORK( IWT ), LDT, A( I+1, I+IB ), LDA, + $ WORK, LDWORK ) + 40 CONTINUE + END IF +* +* Use unblocked code to reduce the rest of the matrix +* + CALL ZGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO ) + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGEHRD +* + END diff --git a/dspl/liblapack/SRC/zgejsv.f b/dspl/liblapack/SRC/zgejsv.f new file mode 100644 index 0000000..e8418c6 --- /dev/null +++ b/dspl/liblapack/SRC/zgejsv.f @@ -0,0 +1,2237 @@ +*> \brief \b ZGEJSV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEJSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, +* M, N, A, LDA, SVA, U, LDU, V, LDV, +* CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* IMPLICIT NONE +* INTEGER INFO, LDA, LDU, LDV, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK ) +* DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) +* INTEGER IWORK( * ) +* CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N +*> matrix [A], where M >= N. The SVD of [A] is written as +*> +*> [A] = [U] * [SIGMA] * [V]^*, +*> +*> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N +*> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and +*> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are +*> the singular values of [A]. The columns of [U] and [V] are the left and +*> the right singular vectors of [A], respectively. The matrices [U] and [V] +*> are computed and stored in the arrays U and V, respectively. The diagonal +*> of [SIGMA] is computed and stored in the array SVA. +*> \endverbatim +*> +*> Arguments: +*> ========== +*> +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the level of accuracy: +*> = 'C': This option works well (high relative accuracy) if A = B * D, +*> with well-conditioned B and arbitrary diagonal matrix D. +*> The accuracy cannot be spoiled by COLUMN scaling. The +*> accuracy of the computed output depends on the condition of +*> B, and the procedure aims at the best theoretical accuracy. +*> The relative error max_{i=1:N}|d sigma_i| / sigma_i is +*> bounded by f(M,N)*epsilon* cond(B), independent of D. +*> The input matrix is preprocessed with the QRF with column +*> pivoting. This initial preprocessing and preconditioning by +*> a rank revealing QR factorization is common for all values of +*> JOBA. Additional actions are specified as follows: +*> = 'E': Computation as with 'C' with an additional estimate of the +*> condition number of B. It provides a realistic error bound. +*> = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings +*> D1, D2, and well-conditioned matrix C, this option gives +*> higher accuracy than the 'C' option. If the structure of the +*> input matrix is not known, and relative accuracy is +*> desirable, then this option is advisable. The input matrix A +*> is preprocessed with QR factorization with FULL (row and +*> column) pivoting. +*> = 'G' Computation as with 'F' with an additional estimate of the +*> condition number of B, where A=B*D. If A has heavily weighted +*> rows, then using this condition number gives too pessimistic +*> error bound. +*> = 'A': Small singular values are not well determined by the data +*> and are considered as noisy; the matrix is treated as +*> numerically rank defficient. The error in the computed +*> singular values is bounded by f(m,n)*epsilon*||A||. +*> The computed SVD A = U * S * V^* restores A up to +*> f(m,n)*epsilon*||A||. +*> This gives the procedure the licence to discard (set to zero) +*> all singular values below N*epsilon*||A||. +*> = 'R': Similar as in 'A'. Rank revealing property of the initial +*> QR factorization is used do reveal (using triangular factor) +*> a gap sigma_{r+1} < epsilon * sigma_r in which case the +*> numerical RANK is declared to be r. The SVD is computed with +*> absolute error bounds, but more accurately than with 'A'. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the columns of U: +*> = 'U': N columns of U are returned in the array U. +*> = 'F': full set of M left sing. vectors is returned in the array U. +*> = 'W': U may be used as workspace of length M*N. See the description +*> of U. +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the matrix V: +*> = 'V': N columns of V are returned in the array V; Jacobi rotations +*> are not explicitly accumulated. +*> = 'J': N columns of V are returned in the array V, but they are +*> computed as the product of Jacobi rotations, if JOBT .EQ. 'N'. +*> = 'W': V may be used as workspace of length N*N. See the description +*> of V. +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBR +*> \verbatim +*> JOBR is CHARACTER*1 +*> Specifies the RANGE for the singular values. Issues the licence to +*> set to zero small positive singular values if they are outside +*> specified range. If A .NE. 0 is scaled so that the largest singular +*> value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues +*> the licence to kill columns of A whose norm in c*A is less than +*> SQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN, +*> where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E'). +*> = 'N': Do not kill small columns of c*A. This option assumes that +*> BLAS and QR factorizations and triangular solvers are +*> implemented to work in that range. If the condition of A +*> is greater than BIG, use ZGESVJ. +*> = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)] +*> (roughly, as described above). This option is recommended. +*> =========================== +*> For computing the singular values in the FULL range [SFMIN,BIG] +*> use ZGESVJ. +*> \endverbatim +*> +*> \param[in] JOBT +*> \verbatim +*> JOBT is CHARACTER*1 +*> If the matrix is square then the procedure may determine to use +*> transposed A if A^* seems to be better with respect to convergence. +*> If the matrix is not square, JOBT is ignored. +*> The decision is based on two values of entropy over the adjoint +*> orbit of A^* * A. See the descriptions of WORK(6) and WORK(7). +*> = 'T': transpose if entropy test indicates possibly faster +*> convergence of Jacobi process if A^* is taken as input. If A is +*> replaced with A^*, then the row pivoting is included automatically. +*> = 'N': do not speculate. +*> The option 'T' can be used to compute only the singular values, or +*> the full SVD (U, SIGMA and V). For only one set of singular vectors +*> (U or V), the caller should provide both U and V, as one of the +*> matrices is used as workspace if the matrix A is transposed. +*> The implementer can easily remove this constraint and make the +*> code more complicated. See the descriptions of U and V. +*> In general, this option is considered experimental, and 'N'; should +*> be preferred. This is subject to changes in the future. +*> \endverbatim +*> +*> \param[in] JOBP +*> \verbatim +*> JOBP is CHARACTER*1 +*> Issues the licence to introduce structured perturbations to drown +*> denormalized numbers. This licence should be active if the +*> denormals are poorly implemented, causing slow computation, +*> especially in cases of fast convergence (!). For details see [1,2]. +*> For the sake of simplicity, this perturbations are included only +*> when the full SVD or only the singular values are requested. The +*> implementer/user can easily add the perturbation for the cases of +*> computing one set of singular vectors. +*> = 'P': introduce perturbation +*> = 'N': do not perturb +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On exit, +*> - For WORK(1)/WORK(2) = ONE: The singular values of A. During the +*> computation SVA contains Euclidean column norms of the +*> iterated matrices in the array A. +*> - For WORK(1) .NE. WORK(2): The singular values of A are +*> (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if +*> sigma_max(A) overflows or if small singular values have been +*> saved from underflow by scaling the input matrix A. +*> - If JOBR='R' then some of the singular values may be returned +*> as exact zeros obtained by "set to zero" because they are +*> below the numerical rank threshold or are denormalized numbers. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension ( LDU, N ) +*> If JOBU = 'U', then U contains on exit the M-by-N matrix of +*> the left singular vectors. +*> If JOBU = 'F', then U contains on exit the M-by-M matrix of +*> the left singular vectors, including an ONB +*> of the orthogonal complement of the Range(A). +*> If JOBU = 'W' .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N), +*> then U is used as workspace if the procedure +*> replaces A with A^*. In that case, [V] is computed +*> in U as left singular vectors of A^* and then +*> copied back to the V array. This 'W' option is just +*> a reminder to the caller that in this case U is +*> reserved as workspace of length N*N. +*> If JOBU = 'N' U is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U, LDU >= 1. +*> IF JOBU = 'U' or 'F' or 'W', then LDU >= M. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension ( LDV, N ) +*> If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N), +*> then V is used as workspace if the pprocedure +*> replaces A with A^*. In that case, [U] is computed +*> in V as right singular vectors of A^* and then +*> copied back to the U array. This 'W' option is just +*> a reminder to the caller that in this case V is +*> reserved as workspace of length N*N. +*> If JOBV = 'N' V is not referenced, unless JOBT='T'. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V' or 'J' or 'W', then LDV >= N. +*> \endverbatim +*> +*> \param[out] CWORK +*> \verbatim +*> CWORK is COMPLEX*16 array, dimension (MAX(2,LWORK)) +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the required length of +*> CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> Length of CWORK to confirm proper allocation of workspace. +*> LWORK depends on the job: +*> +*> 1. If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and +*> 1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'): +*> LWORK >= 2*N+1. This is the minimal requirement. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= N + (N+1)*NB. Here NB is the optimal +*> block size for ZGEQP3 and ZGEQRF. +*> In general, optimal LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ)). +*> 1.2. .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). In this case, LWORK the minimal +*> requirement is LWORK >= N*N + 2*N. +*> ->> For optimal performance (blocked code) the optimal value +*> is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ), +*> N*N+LWORK(ZPOCON)). +*> 2. If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'), +*> (JOBU.EQ.'N') +*> 2.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> ZUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ), +*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). +*> 2.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance, +*> LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQ, +*> ZUNMLQ. In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ), +*> N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)). +*> 3. If SIGMA and the left singular vectors are needed +*> 3.1 .. no scaled condition estimate requested (JOBE.EQ.'N'): +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). +*> 3.2 .. an estimate of the scaled condition number of A is +*> required (JOBA='E', or 'G'). +*> -> the minimal requirement is LWORK >= 3*N. +*> -> For optimal performance: +*> if JOBU.EQ.'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB, +*> where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR. +*> In general, the optimal length LWORK is computed as +*> LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON), +*> 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). +*> 4. If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and +*> 4.1. if JOBV.EQ.'V' +*> the minimal requirement is LWORK >= 5*N+2*N*N. +*> 4.2. if JOBV.EQ.'J' the minimal requirement is +*> LWORK >= 4*N+N*N. +*> In both cases, the allocated CWORK can accommodate blocked runs +*> of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, ZUNMLQ. +*> +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the +*> minimal length of CWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(7,LWORK)) +*> On exit, +*> RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1) +*> such that SCALE*SVA(1:N) are the computed singular values +*> of A. (See the description of SVA().) +*> RWORK(2) = See the description of RWORK(1). +*> RWORK(3) = SCONDA is an estimate for the condition number of +*> column equilibrated A. (If JOBA .EQ. 'E' or 'G') +*> SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +*> It is computed using SPOCON. It holds +*> N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA +*> where R is the triangular factor from the QRF of A. +*> However, if R is truncated and the numerical rank is +*> determined to be strictly smaller than N, SCONDA is +*> returned as -1, thus indicating that the smallest +*> singular values might be lost. +*> +*> If full SVD is needed, the following two condition numbers are +*> useful for the analysis of the algorithm. They are provied for +*> a developer/implementer who is familiar with the details of +*> the method. +*> +*> RWORK(4) = an estimate of the scaled condition number of the +*> triangular factor in the first QR factorization. +*> RWORK(5) = an estimate of the scaled condition number of the +*> triangular factor in the second QR factorization. +*> The following two parameters are computed if JOBT .EQ. 'T'. +*> They are provided for a developer/implementer who is familiar +*> with the details of the method. +*> RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy +*> of diag(A^* * A) / Trace(A^* * A) taken as point in the +*> probability simplex. +*> RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).) +*> If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or +*> LRWORK=-1), then on exit RWORK(1) contains the required length of +*> RWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK to confirm proper allocation of workspace. +*> LRWORK depends on the job: +*> +*> 1. If only the singular values are requested i.e. if +*> LSAME(JOBU,'N') .AND. LSAME(JOBV,'N') +*> then: +*> 1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then: LRWORK = max( 7, 2 * M ). +*> 1.2. Otherwise, LRWORK = max( 7, N ). +*> 2. If singular values with the right singular vectors are requested +*> i.e. if +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND. +*> .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) +*> then: +*> 2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 2.2. Otherwise, LRWORK = max( 7, N ). +*> 3. If singular values with the left singular vectors are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 3.2. Otherwise, LRWORK = max( 7, N ). +*> 4. If singular values with both the left and the right singular vectors +*> are requested, i.e. if +*> (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND. +*> (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) +*> then: +*> 4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'), +*> then LRWORK = max( 7, 2 * M ). +*> 4.2. Otherwise, LRWORK = max( 7, N ). +*> +*> If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and +*> the length of RWORK is returned in RWORK(1). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, of dimension at least 4, that further depends +*> on the job: +*> +*> 1. If only the singular values are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 2. If the singular values and the right singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 3. If the singular values and the left singular vectors are requested then: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4. If the singular values with both the left and the right singular vectors +*> are requested, then: +*> 4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is N+M; otherwise the length of IWORK is N. +*> 4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows: +*> If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) +*> then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N. +*> +*> On exit, +*> IWORK(1) = the numerical rank determined after the initial +*> QR factorization with pivoting. See the descriptions +*> of JOBA and JOBR. +*> IWORK(2) = the number of the computed nonzero singular values +*> IWORK(3) = if nonzero, a warning message: +*> If IWORK(3).EQ.1 then some of the column norms of A +*> were denormalized floats. The requested high accuracy +*> is not warranted by the data. +*> IWORK(4) = 1 or -1. If IWORK(4) .EQ. 1, then the procedure used A^* to +*> do the job as specified by the JOB parameters. +*> If the call to ZGEJSV is a workspace query (indicated by LWORK .EQ. -1 or +*> LRWORK .EQ. -1), then on exit IWORK(1) contains the required length of +*> IWORK for the job parameters used in the call. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0 : if INFO = -i, then the i-th argument had an illegal value. +*> = 0 : successful exit; +*> > 0 : ZGEJSV did not converge in the maximal allowed number +*> of sweeps. The computed values may be inaccurate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsing +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3, +*> ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an +*> additional row pivoting can be used as a preprocessor, which in some +*> cases results in much higher accuracy. An example is matrix A with the +*> structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned +*> diagonal matrices and C is well-conditioned matrix. In that case, complete +*> pivoting in the first QR factorizations provides accuracy dependent on the +*> condition number of C, and independent of D1, D2. Such higher accuracy is +*> not completely understood theoretically, but it works well in practice. +*> Further, if A can be written as A = B*D, with well-conditioned B and some +*> diagonal D, then the high accuracy is guaranteed, both theoretically and +*> in software, independent of D. For more details see [1], [2]. +*> The computational range for the singular values can be the full range +*> ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS +*> & LAPACK routines called by ZGEJSV are implemented to work in that range. +*> If that is not the case, then the restriction for safe computation with +*> the singular values in the range of normalized IEEE numbers is that the +*> spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not +*> overflow. This code (ZGEJSV) is best used in this restricted range, +*> meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are +*> returned as zeros. See JOBR for details on this. +*> Further, this implementation is somewhat slower than the one described +*> in [1,2] due to replacement of some non-LAPACK components, and because +*> the choice of some tuning parameters in the iterative part (ZGESVJ) is +*> left to the implementer on a particular machine. +*> The rank revealing QR factorization (in this code: ZGEQP3) should be +*> implemented as in [3]. We have a new version of ZGEQP3 under development +*> that is more robust than the current one in LAPACK, with a cleaner cut in +*> rank deficient cases. It will be available in the SIGMA library [4]. +*> If M is much larger than N, it is obvious that the initial QRF with +*> column pivoting can be preprocessed by the QRF without pivoting. That +*> well known trick is not used in ZGEJSV because in some cases heavy row +*> weighting can be treated with complete pivoting. The overhead in cases +*> M much larger than N is then only due to pivoting, but the benefits in +*> terms of accuracy have prevailed. The implementer/user can incorporate +*> this extra QRF step easily. The implementer can also improve data movement +*> (matrix transpose, matrix copy, matrix transposed copy) - this +*> implementation of ZGEJSV uses only the simplest, naive data movement. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac, Department of Mathematics, Faculty of Science, +*> University of Zagreb (Zagreb, Croatia); drmac@math.hr +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR +*> factorization software - a case study. +*> ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28. +*> LAPACK Working note 176. +*> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2016. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> Please report all bugs and send interesting examples and/or comments to +*> drmac@math.hr. Thank you. +*> +* ===================================================================== + SUBROUTINE ZGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP, + $ M, N, A, LDA, SVA, U, LDU, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + IMPLICIT NONE + INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ), + $ CWORK( LWORK ) + DOUBLE PRECISION SVA( N ), RWORK( LRWORK ) + INTEGER IWORK( * ) + CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV +* .. +* +* =========================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 CTEMP + DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, + $ COND_OK, CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, + $ MAXPRJ, SCALEM, SCONDA, SFMIN, SMALL, TEMP1, + $ USCAL1, USCAL2, XSC + INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING + LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY, + $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL, + $ ROWPIV, RSVEC, TRANSP +* + INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK + INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM, + $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF + INTEGER LWRK_ZGELQF, LWRK_ZGEQP3, LWRK_ZGEQP3N, LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZGESVJV, LWRK_ZGESVJU, LWRK_ZUNMLQ, + $ LWRK_ZUNMQR, LWRK_ZUNMQRM +* .. +* .. Local Arrays + COMPLEX*16 CDUMMY(1) + DOUBLE PRECISION RDUMMY(1) +* +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, CONJG, DLOG, MAX, MIN, DBLE, NINT, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DZNRM2 + INTEGER IDAMAX, IZAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, IZAMAX, LSAME, DLAMCH, DZNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ, ZCOPY, ZGELQF, ZGEQP3, ZGEQRF, ZLACPY, ZLAPMR, + $ ZLASCL, DLASCL, ZLASET, ZLASSQ, ZLASWP, ZUNGQR, ZUNMLQ, + $ ZUNMQR, ZPOCON, DSCAL, ZDSCAL, ZSWAP, ZTRSM, ZLACGV, + $ XERBLA +* + EXTERNAL ZGESVJ +* .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + JRACC = LSAME( JOBV, 'J' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. JRACC + ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' ) + L2RANK = LSAME( JOBA, 'R' ) + L2ABER = LSAME( JOBA, 'A' ) + ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' ) + L2TRAN = LSAME( JOBT, 'T' ) .AND. ( M .EQ. N ) + L2KILL = LSAME( JOBR, 'R' ) + DEFR = LSAME( JOBR, 'N' ) + L2PERT = LSAME( JOBP, 'P' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) +* + IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR. + $ ERREST .OR. LSAME( JOBA, 'C' ) )) THEN + INFO = - 1 + ELSE IF ( .NOT.( LSVEC .OR. LSAME( JOBU, 'N' ) .OR. + $ ( LSAME( JOBU, 'W' ) .AND. RSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 2 + ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR. + $ ( LSAME( JOBV, 'W' ) .AND. LSVEC .AND. L2TRAN ) ) ) THEN + INFO = - 3 + ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) ) THEN + INFO = - 4 + ELSE IF ( .NOT. ( LSAME(JOBT,'T') .OR. LSAME(JOBT,'N') ) ) THEN + INFO = - 5 + ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN + INFO = - 6 + ELSE IF ( M .LT. 0 ) THEN + INFO = - 7 + ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN + INFO = - 8 + ELSE IF ( LDA .LT. M ) THEN + INFO = - 10 + ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN + INFO = - 13 + ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN + INFO = - 15 + ELSE +* #:) + INFO = 0 + END IF +* + IF ( INFO .EQ. 0 ) THEN +* .. compute the minimal and the optimal workspace lengths +* [[The expressions for computing the minimal and the optimal +* values of LCWORK, LRWORK are written with a lot of redundancy and +* can be simplified. However, this verbose form is useful for +* maintenance and modifications of the code.]] +* +* .. minimal workspace length for ZGEQP3 of an M x N matrix, +* ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix, +* ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N +* matrix, ZUNMQR for computing M x N matrix, respectively. + LWQP3 = N+1 + LWQRF = MAX( 1, N ) + LWLQF = MAX( 1, N ) + LWUNMLQ = MAX( 1, N ) + LWUNMQR = MAX( 1, N ) + LWUNMQRM = MAX( 1, M ) +* .. minimal workspace length for ZPOCON of an N x N matrix + LWCON = 2 * N +* .. minimal workspace length for ZGESVJ of an N x N matrix, +* without and with explicit accumulation of Jacobi rotations + LWSVDJ = MAX( 2 * N, 1 ) + LWSVDJV = MAX( 2 * N, 1 ) +* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ + LRWQP3 = N + LRWCON = N + LRWSVDJ = N + IF ( LQUERY ) THEN + CALL ZGEQP3( M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_ZGEQP3 = CDUMMY(1) + CALL ZGEQRF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_ZGEQRF = CDUMMY(1) + CALL ZGELQF( N, N, A, LDA, CDUMMY, CDUMMY,-1, IERR ) + LWRK_ZGELQF = CDUMMY(1) + END IF + MINWRK = 2 + OPTWRK = 2 + MINIWRK = N + IF ( .NOT. (LSVEC .OR. RSVEC ) ) THEN +* .. minimal and optimal sizes of the complex workspace if +* only the singular values are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N**2+LWCON, N+LWQRF, LWSVDJ ) + ELSE + MINWRK = MAX( N+LWQP3, N+LWQRF, LWSVDJ ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'N', 'N', N, N, A, LDA, SVA, N, V, + $ LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N**2+LWCON, + $ N+LWRK_ZGEQRF, LWRK_ZGESVJ ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWCON, LRWSVDJ ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( RSVEC .AND. (.NOT.LSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the right singular vectors are requested + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, LWCON, LWSVDJ, N+LWLQF, + $ 2*N+LWQRF, N+LWSVDJ, N+LWUNMLQ ) + ELSE + MINWRK = MAX( N+LWQP3, LWSVDJ, N+LWLQF, 2*N+LWQRF, + $ N+LWSVDJ, N+LWUNMLQ ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, LWCON, LWRK_ZGESVJ, + $ N+LWRK_ZGELQF, 2*N+LWRK_ZGEQRF, + $ N+LWRK_ZGESVJ, N+LWRK_ZUNMLQ ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, LWRK_ZGESVJ,N+LWRK_ZGELQF, + $ 2*N+LWRK_ZGEQRF, N+LWRK_ZGESVJ, + $ N+LWRK_ZUNMLQ ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE IF ( LSVEC .AND. (.NOT.RSVEC) ) THEN +* .. minimal and optimal sizes of the complex workspace if the +* singular values and the left singular vectors are requested + IF ( ERREST ) THEN + MINWRK = N + MAX( LWQP3,LWCON,N+LWQRF,LWSVDJ,LWUNMQRM ) + ELSE + MINWRK = N + MAX( LWQP3, N+LWQRF, LWSVDJ, LWUNMQRM ) + END IF + IF ( LQUERY ) THEN + CALL ZGESVJ( 'L', 'U', 'N', N,N, U, LDU, SVA, N, A, + $ LDA, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = N + MAX( LWRK_ZGEQP3, LWCON, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) + ELSE + OPTWRK = N + MAX( LWRK_ZGEQP3, N+LWRK_ZGEQRF, + $ LWRK_ZGESVJ, LWRK_ZUNMQRM ) + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + IF ( ERREST ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ ) + END IF + ELSE + IF ( ERREST ) THEN + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ ) + END IF + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE +* .. minimal and optimal sizes of the complex workspace if the +* full SVD is requested + IF ( .NOT. JRACC ) THEN + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+N**2+LWCON, + $ 2*N+LWQRF, 2*N+LWQP3, + $ 2*N+N**2+N+LWLQF, 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWSVDJ, 2*N+N**2+N+LWSVDJV, + $ 2*N+N**2+N+LWUNMQR,2*N+N**2+N+LWUNMLQ, + $ N+N**2+LWSVDJ, N+LWUNMQRM ) + END IF + MINIWRK = MINIWRK + N + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + ELSE + IF ( ERREST ) THEN + MINWRK = MAX( N+LWQP3, N+LWCON, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + ELSE + MINWRK = MAX( N+LWQP3, 2*N+LWQRF, + $ 2*N+N**2+LWSVDJV, 2*N+N**2+N+LWUNMQR, + $ N+LWUNMQRM ) + END IF + IF ( ROWPIV .OR. L2TRAN ) MINIWRK = MINIWRK + M + END IF + IF ( LQUERY ) THEN + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', N, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQR = CDUMMY(1) + IF ( .NOT. JRACC ) THEN + CALL ZGEQP3( N,N, A, LDA, IWORK, CDUMMY,CDUMMY, -1, + $ RDUMMY, IERR ) + LWRK_ZGEQP3N = CDUMMY(1) + CALL ZGESVJ( 'L', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJ = CDUMMY(1) + CALL ZGESVJ( 'U', 'U', 'N', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJU = CDUMMY(1) + CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJV = CDUMMY(1) + CALL ZUNMLQ( 'L', 'C', N, N, N, A, LDA, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMLQ = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, + $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, + $ 2*N+LWRK_ZGEQP3N, + $ 2*N+N**2+N+LWRK_ZGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_ZGESVJ, + $ 2*N+N**2+N+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ 2*N+N**2+N+LWRK_ZUNMLQ, + $ N+N**2+LWRK_ZGESVJU, + $ N+LWRK_ZUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, + $ 2*N+N**2+LWCON, 2*N+LWRK_ZGEQRF, + $ 2*N+LWRK_ZGEQP3N, + $ 2*N+N**2+N+LWRK_ZGELQF, + $ 2*N+N**2+N+N**2+LWCON, + $ 2*N+N**2+N+LWRK_ZGESVJ, + $ 2*N+N**2+N+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ 2*N+N**2+N+LWRK_ZUNMLQ, + $ N+N**2+LWRK_ZGESVJU, + $ N+LWRK_ZUNMQRM ) + END IF + ELSE + CALL ZGESVJ( 'L', 'U', 'V', N, N, U, LDU, SVA, + $ N, V, LDV, CDUMMY, -1, RDUMMY, -1, IERR ) + LWRK_ZGESVJV = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', N, N, N, CDUMMY, N, CDUMMY, + $ V, LDV, CDUMMY, -1, IERR ) + LWRK_ZUNMQR = CDUMMY(1) + CALL ZUNMQR( 'L', 'N', M, N, N, A, LDA, CDUMMY, U, + $ LDU, CDUMMY, -1, IERR ) + LWRK_ZUNMQRM = CDUMMY(1) + IF ( ERREST ) THEN + OPTWRK = MAX( N+LWRK_ZGEQP3, N+LWCON, + $ 2*N+LWRK_ZGEQRF, 2*N+N**2, + $ 2*N+N**2+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR,N+LWRK_ZUNMQRM ) + ELSE + OPTWRK = MAX( N+LWRK_ZGEQP3, 2*N+LWRK_ZGEQRF, + $ 2*N+N**2, 2*N+N**2+LWRK_ZGESVJV, + $ 2*N+N**2+N+LWRK_ZUNMQR, + $ N+LWRK_ZUNMQRM ) + END IF + END IF + END IF + IF ( L2TRAN .OR. ROWPIV ) THEN + MINRWRK = MAX( 7, 2*M, LRWQP3, LRWSVDJ, LRWCON ) + ELSE + MINRWRK = MAX( 7, LRWQP3, LRWSVDJ, LRWCON ) + END IF + END IF + MINWRK = MAX( 2, MINWRK ) + OPTWRK = MAX( 2, OPTWRK ) + IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 + IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 + END IF +* + IF ( INFO .NE. 0 ) THEN +* #:( + CALL XERBLA( 'ZGEJSV', - INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = OPTWRK + CWORK(2) = MINWRK + RWORK(1) = MINRWRK + IWORK(1) = MAX( 4, MINIWRK ) + RETURN + END IF +* +* Quick return for void matrix (Y3K safe) +* #:) + IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) THEN + IWORK(1:4) = 0 + RWORK(1:7) = 0 + RETURN + ENDIF +* +* Determine whether the matrix U should be M x N or M x M +* + IF ( LSVEC ) THEN + N1 = N + IF ( LSAME( JOBU, 'F' ) ) N1 = M + END IF +* +* Set numerical parameters +* +*! NOTE: Make sure DLAMCH() does not fail on the target architecture. +* + EPSLN = DLAMCH('Epsilon') + SFMIN = DLAMCH('SafeMinimum') + SMALL = SFMIN / EPSLN + BIG = DLAMCH('O') +* BIG = ONE / SFMIN +* +* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N +* +*(!) If necessary, scale SVA() to protect the largest norm from +* overflow. It is possible that this scaling pushes the smallest +* column norm left from the underflow threshold (extreme case). +* + SCALEM = ONE / SQRT(DBLE(M)*DBLE(N)) + NOSCAL = .TRUE. + GOSCAL = .TRUE. + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A(1,p), 1, AAPP, AAQQ ) + IF ( AAPP .GT. BIG ) THEN + INFO = - 9 + CALL XERBLA( 'ZGEJSV', -INFO ) + RETURN + END IF + AAQQ = SQRT(AAQQ) + IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL ) THEN + SVA(p) = AAPP * AAQQ + ELSE + NOSCAL = .FALSE. + SVA(p) = AAPP * ( AAQQ * SCALEM ) + IF ( GOSCAL ) THEN + GOSCAL = .FALSE. + CALL DSCAL( p-1, SCALEM, SVA, 1 ) + END IF + END IF + 1874 CONTINUE +* + IF ( NOSCAL ) SCALEM = ONE +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + AAPP = MAX( AAPP, SVA(p) ) + IF ( SVA(p) .NE. ZERO ) AAQQ = MIN( AAQQ, SVA(p) ) + 4781 CONTINUE +* +* Quick return for zero M x N matrix +* #:) + IF ( AAPP .EQ. ZERO ) THEN + IF ( LSVEC ) CALL ZLASET( 'G', M, N1, CZERO, CONE, U, LDU ) + IF ( RSVEC ) CALL ZLASET( 'G', N, N, CZERO, CONE, V, LDV ) + RWORK(1) = ONE + RWORK(2) = ONE + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + IWORK(1) = 0 + IWORK(2) = 0 + IWORK(3) = 0 + IWORK(4) = -1 + RETURN + END IF +* +* Issue warning if denormalized column norms detected. Override the +* high relative accuracy request. Issue licence to kill nonzero columns +* (set them to zero) whose norm is less than sigma_max / BIG (roughly). +* #:( + WARNING = 0 + IF ( AAQQ .LE. SFMIN ) THEN + L2RANK = .TRUE. + L2KILL = .TRUE. + WARNING = 1 + END IF +* +* Quick return for one-column matrix +* #:) + IF ( N .EQ. 1 ) THEN +* + IF ( LSVEC ) THEN + CALL ZLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR ) + CALL ZLACPY( 'A', M, 1, A, LDA, U, LDU ) +* computing all M left singular vectors of the M x 1 matrix + IF ( N1 .NE. N ) THEN + CALL ZGEQRF( M, N, U,LDU, CWORK, CWORK(N+1),LWORK-N,IERR ) + CALL ZUNGQR( M,N1,1, U,LDU,CWORK,CWORK(N+1),LWORK-N,IERR ) + CALL ZCOPY( M, A(1,1), 1, U(1,1), 1 ) + END IF + END IF + IF ( RSVEC ) THEN + V(1,1) = CONE + END IF + IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN + SVA(1) = SVA(1) / SCALEM + SCALEM = ONE + END IF + RWORK(1) = ONE / SCALEM + RWORK(2) = ONE + IF ( SVA(1) .NE. ZERO ) THEN + IWORK(1) = 1 + IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN + IWORK(2) = 1 + ELSE + IWORK(2) = 0 + END IF + ELSE + IWORK(1) = 0 + IWORK(2) = 0 + END IF + IWORK(3) = 0 + IWORK(4) = -1 + IF ( ERREST ) RWORK(3) = ONE + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = ONE + RWORK(5) = ONE + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ZERO + RWORK(7) = ZERO + END IF + RETURN +* + END IF +* + TRANSP = .FALSE. +* + AATMAX = -ONE + AATMIN = BIG + IF ( ROWPIV .OR. L2TRAN ) THEN +* +* Compute the row norms, needed to determine row pivoting sequence +* (in the case of heavily row weighted A, row pivoting is strongly +* advised) and to collect information needed to compare the +* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.). +* + IF ( L2TRAN ) THEN + DO 1950 p = 1, M + XSC = ZERO + TEMP1 = ONE + CALL ZLASSQ( N, A(p,1), LDA, XSC, TEMP1 ) +* ZLASSQ gets both the ell_2 and the ell_infinity norm +* in one pass through the vector + RWORK(M+p) = XSC * SCALEM + RWORK(p) = XSC * (SCALEM*SQRT(TEMP1)) + AATMAX = MAX( AATMAX, RWORK(p) ) + IF (RWORK(p) .NE. ZERO) + $ AATMIN = MIN(AATMIN,RWORK(p)) + 1950 CONTINUE + ELSE + DO 1904 p = 1, M + RWORK(M+p) = SCALEM*ABS( A(p,IZAMAX(N,A(p,1),LDA)) ) + AATMAX = MAX( AATMAX, RWORK(M+p) ) + AATMIN = MIN( AATMIN, RWORK(M+p) ) + 1904 CONTINUE + END IF +* + END IF +* +* For square matrix A try to determine whether A^* would be better +* input for the preconditioned Jacobi SVD, with faster convergence. +* The decision is based on an O(N) function of the vector of column +* and row norms of A, based on the Shannon entropy. This should give +* the right choice in most cases when the difference actually matters. +* It may fail and pick the slower converging side. +* + ENTRA = ZERO + ENTRAT = ZERO + IF ( L2TRAN ) THEN +* + XSC = ZERO + TEMP1 = ONE + CALL DLASSQ( N, SVA, 1, XSC, TEMP1 ) + TEMP1 = ONE / TEMP1 +* + ENTRA = ZERO + DO 1113 p = 1, N + BIG1 = ( ( SVA(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1) + 1113 CONTINUE + ENTRA = - ENTRA / DLOG(DBLE(N)) +* +* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex. +* It is derived from the diagonal of A^* * A. Do the same with the +* diagonal of A * A^*, compute the entropy of the corresponding +* probability distribution. Note that A * A^* and A^* * A have the +* same trace. +* + ENTRAT = ZERO + DO 1114 p = 1, M + BIG1 = ( ( RWORK(p) / XSC )**2 ) * TEMP1 + IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1) + 1114 CONTINUE + ENTRAT = - ENTRAT / DLOG(DBLE(M)) +* +* Analyze the entropies and decide A or A^*. Smaller entropy +* usually means better input for the algorithm. +* + TRANSP = ( ENTRAT .LT. ENTRA ) +* +* If A^* is better than A, take the adjoint of A. This is allowed +* only for square matrices, M=N. + IF ( TRANSP ) THEN +* In an optimal implementation, this trivial transpose +* should be replaced with faster transpose. + DO 1115 p = 1, N - 1 + A(p,p) = CONJG(A(p,p)) + DO 1116 q = p + 1, N + CTEMP = CONJG(A(q,p)) + A(q,p) = CONJG(A(p,q)) + A(p,q) = CTEMP + 1116 CONTINUE + 1115 CONTINUE + A(N,N) = CONJG(A(N,N)) + DO 1117 p = 1, N + RWORK(M+p) = SVA(p) + SVA(p) = RWORK(p) +* previously computed row 2-norms are now column 2-norms +* of the transposed matrix + 1117 CONTINUE + TEMP1 = AAPP + AAPP = AATMAX + AATMAX = TEMP1 + TEMP1 = AAQQ + AAQQ = AATMIN + AATMIN = TEMP1 + KILL = LSVEC + LSVEC = RSVEC + RSVEC = KILL + IF ( LSVEC ) N1 = N +* + ROWPIV = .TRUE. + END IF +* + END IF +* END IF L2TRAN +* +* Scale the matrix so that its maximal singular value remains less +* than SQRT(BIG) -- the matrix is scaled so that its maximal column +* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep +* SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and +* BLAS routines that, in some implementations, are not capable of +* working in the full interval [SFMIN,BIG] and that they may provoke +* overflows in the intermediate results. If the singular values spread +* from SFMIN to BIG, then ZGESVJ will compute them. So, in that case, +* one should use ZGESVJ instead of ZGEJSV. +* >> change in the April 2016 update: allow bigger range, i.e. the +* largest column is allowed up to BIG/N and ZGESVJ will do the rest. + BIG1 = SQRT( BIG ) + TEMP1 = SQRT( BIG / DBLE(N) ) +* TEMP1 = BIG/DBLE(N) +* + CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR ) + IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN + AAQQ = ( AAQQ / AAPP ) * TEMP1 + ELSE + AAQQ = ( AAQQ * TEMP1 ) / AAPP + END IF + TEMP1 = TEMP1 * SCALEM + CALL ZLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR ) +* +* To undo scaling at the end of this procedure, multiply the +* computed singular values with USCAL2 / USCAL1. +* + USCAL1 = TEMP1 + USCAL2 = AAPP +* + IF ( L2KILL ) THEN +* L2KILL enforces computation of nonzero singular values in +* the restricted range of condition number of the initial A, +* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN). + XSC = SQRT( SFMIN ) + ELSE + XSC = SMALL +* +* Now, if the condition number of A is too big, +* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN, +* as a precaution measure, the full SVD is computed using ZGESVJ +* with accumulated Jacobi rotations. This provides numerically +* more robust computation, at the cost of slightly increased run +* time. Depending on the concrete implementation of BLAS and LAPACK +* (i.e. how they behave in presence of extreme ill-conditioning) the +* implementor may decide to remove this switch. + IF ( ( AAQQ.LT.SQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN + JRACC = .TRUE. + END IF +* + END IF + IF ( AAQQ .LT. XSC ) THEN + DO 700 p = 1, N + IF ( SVA(p) .LT. XSC ) THEN + CALL ZLASET( 'A', M, 1, CZERO, CZERO, A(1,p), LDA ) + SVA(p) = ZERO + END IF + 700 CONTINUE + END IF +* +* Preconditioning using QR factorization with pivoting +* + IF ( ROWPIV ) THEN +* Optional row permutation (Bjoerck row pivoting): +* A result by Cox and Higham shows that the Bjoerck's +* row pivoting combined with standard column pivoting +* has similar effect as Powell-Reid complete pivoting. +* The ell-infinity norms of A are made nonincreasing. + IF ( ( LSVEC .AND. RSVEC ) .AND. .NOT.( JRACC ) ) THEN + IWOFF = 2*N + ELSE + IWOFF = N + END IF + DO 1952 p = 1, M - 1 + q = IDAMAX( M-p+1, RWORK(M+p), 1 ) + p - 1 + IWORK(IWOFF+p) = q + IF ( p .NE. q ) THEN + TEMP1 = RWORK(M+p) + RWORK(M+p) = RWORK(M+q) + RWORK(M+q) = TEMP1 + END IF + 1952 CONTINUE + CALL ZLASWP( N, A, LDA, 1, M-1, IWORK(IWOFF+1), 1 ) + END IF +* +* End of the preparation phase (scaling, optional sorting and +* transposing, optional flushing of small columns). +* +* Preconditioning +* +* If the full SVD is needed, the right singular vectors are computed +* from a matrix equation, and for that we need theoretical analysis +* of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF. +* In all other cases the first RR QRF can be chosen by other criteria +* (eg speed by replacing global with restricted window pivoting, such +* as in xGEQPX from TOMS # 782). Good results will be obtained using +* xGEQPX with properly (!) chosen numerical parameters. +* Any improvement of ZGEQP3 improves overal performance of ZGEJSV. +* +* A * P1 = Q1 * [ R1^* 0]^*: + DO 1963 p = 1, N +* .. all columns are free columns + IWORK(p) = 0 + 1963 CONTINUE + CALL ZGEQP3( M, N, A, LDA, IWORK, CWORK, CWORK(N+1), LWORK-N, + $ RWORK, IERR ) +* +* The upper triangular matrix R1 from the first QRF is inspected for +* rank deficiency and possibilities for deflation, or possible +* ill-conditioning. Depending on the user specified flag L2RANK, +* the procedure explores possibilities to reduce the numerical +* rank by inspecting the computed upper triangular factor. If +* L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of +* A + dA, where ||dA|| <= f(M,N)*EPSLN. +* + NR = 1 + IF ( L2ABER ) THEN +* Standard absolute error bound suffices. All sigma_i with +* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an +* agressive enforcement of lower numerical rank by introducing a +* backward error of the order of N*EPSLN*||A||. + TEMP1 = SQRT(DBLE(N))*EPSLN + DO 3001 p = 2, N + IF ( ABS(A(p,p)) .GE. (TEMP1*ABS(A(1,1))) ) THEN + NR = NR + 1 + ELSE + GO TO 3002 + END IF + 3001 CONTINUE + 3002 CONTINUE + ELSE IF ( L2RANK ) THEN +* .. similarly as above, only slightly more gentle (less agressive). +* Sudden drop on the diagonal of R1 is used as the criterion for +* close-to-rank-deficient. + TEMP1 = SQRT(SFMIN) + DO 3401 p = 2, N + IF ( ( ABS(A(p,p)) .LT. (EPSLN*ABS(A(p-1,p-1))) ) .OR. + $ ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402 + NR = NR + 1 + 3401 CONTINUE + 3402 CONTINUE +* + ELSE +* The goal is high relative accuracy. However, if the matrix +* has high scaled condition number the relative accuracy is in +* general not feasible. Later on, a condition number estimator +* will be deployed to estimate the scaled condition number. +* Here we just remove the underflowed part of the triangular +* factor. This prevents the situation in which the code is +* working hard to get the accuracy not warranted by the data. + TEMP1 = SQRT(SFMIN) + DO 3301 p = 2, N + IF ( ( ABS(A(p,p)) .LT. SMALL ) .OR. + $ ( L2KILL .AND. (ABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302 + NR = NR + 1 + 3301 CONTINUE + 3302 CONTINUE +* + END IF +* + ALMORT = .FALSE. + IF ( NR .EQ. N ) THEN + MAXPRJ = ONE + DO 3051 p = 2, N + TEMP1 = ABS(A(p,p)) / SVA(IWORK(p)) + MAXPRJ = MIN( MAXPRJ, TEMP1 ) + 3051 CONTINUE + IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE. + END IF +* +* + SCONDA = - ONE + CONDR1 = - ONE + CONDR2 = - ONE +* + IF ( ERREST ) THEN + IF ( N .EQ. NR ) THEN + IF ( RSVEC ) THEN +* .. V is available as workspace + CALL ZLACPY( 'U', N, N, A, LDA, V, LDV ) + DO 3053 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL ZDSCAL( p, ONE/TEMP1, V(1,p), 1 ) + 3053 CONTINUE + IF ( LSVEC )THEN + CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL ZPOCON( 'U', N, V, LDV, ONE, TEMP1, + $ CWORK, RWORK, IERR ) + END IF +* + ELSE IF ( LSVEC ) THEN +* .. U is available as workspace + CALL ZLACPY( 'U', N, N, A, LDA, U, LDU ) + DO 3054 p = 1, N + TEMP1 = SVA(IWORK(p)) + CALL ZDSCAL( p, ONE/TEMP1, U(1,p), 1 ) + 3054 CONTINUE + CALL ZPOCON( 'U', N, U, LDU, ONE, TEMP1, + $ CWORK(N+1), RWORK, IERR ) + ELSE + CALL ZLACPY( 'U', N, N, A, LDA, CWORK, N ) +*[] CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) +* Change: here index shifted by N to the left, CWORK(1:N) +* not needed for SIGMA only computation + DO 3052 p = 1, N + TEMP1 = SVA(IWORK(p)) +*[] CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 ) + CALL ZDSCAL( p, ONE/TEMP1, CWORK((p-1)*N+1), 1 ) + 3052 CONTINUE +* .. the columns of R are scaled to have unit Euclidean lengths. +*[] CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1, +*[] $ CWORK(N+N*N+1), RWORK, IERR ) + CALL ZPOCON( 'U', N, CWORK, N, ONE, TEMP1, + $ CWORK(N*N+1), RWORK, IERR ) +* + END IF + IF ( TEMP1 .NE. ZERO ) THEN + SCONDA = ONE / SQRT(TEMP1) + ELSE + SCONDA = - ONE + END IF +* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1). +* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA + ELSE + SCONDA = - ONE + END IF + END IF +* + L2PERT = L2PERT .AND. ( ABS( A(1,1)/A(NR,NR) ) .GT. SQRT(BIG1) ) +* If there is no violent scaling, artificial perturbation is not needed. +* +* Phase 3: +* + IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN +* +* Singular Values only +* +* .. transpose A(1:NR,1:N) + DO 1946 p = 1, MIN( N-1, NR ) + CALL ZCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL ZLACGV( N-p+1, A(p,p), 1 ) + 1946 CONTINUE + IF ( NR .EQ. N ) A(N,N) = CONJG(A(N,N)) +* +* The following two DO-loops introduce small relative perturbation +* into the strict upper triangle of the lower triangular matrix. +* Small entries below the main diagonal are also changed. +* This modification is useful if the computing environment does not +* provide/allow FLUSH TO ZERO underflow, for it prevents many +* annoying denormalized numbers in case of strongly scaled matrices. +* The perturbation is structured so that it does not introduce any +* new perturbation of the singular values, and it does not destroy +* the job done by the preconditioner. +* The licence for this perturbation is in the variable L2PERT, which +* should be .FALSE. if FLUSH TO ZERO underflow is active. +* + IF ( .NOT. ALMORT ) THEN +* + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 4947 q = 1, NR + CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) + DO 4949 p = 1, N + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 4949 CONTINUE + 4947 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, A(1,2),LDA ) + END IF +* +* .. second preconditioning using the QR factorization +* + CALL ZGEQRF( N,NR, A,LDA, CWORK, CWORK(N+1),LWORK-N, IERR ) +* +* .. and transpose upper to lower triangular + DO 1948 p = 1, NR - 1 + CALL ZCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 ) + CALL ZLACGV( NR-p+1, A(p,p), 1 ) + 1948 CONTINUE +* + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* +* .. again some perturbation (a "background noise") is added +* to drown denormals + IF ( L2PERT ) THEN +* XSC = SQRT(SMALL) + XSC = EPSLN / DBLE(N) + DO 1947 q = 1, NR + CTEMP = DCMPLX(XSC*ABS(A(q,q)),ZERO) + DO 1949 p = 1, NR + IF ( ( (p.GT.q) .AND. (ABS(A(p,q)).LE.TEMP1) ) + $ .OR. ( p .LT. q ) ) +* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) ) + $ A(p,q) = CTEMP + 1949 CONTINUE + 1947 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, A(1,2), LDA ) + END IF +* +* .. and one-sided Jacobi rotations are started on a lower +* triangular matrix (plus perturbation which is ignored in +* the part which destroys triangular form (confusing?!)) +* + CALL ZGESVJ( 'L', 'N', 'N', NR, NR, A, LDA, SVA, + $ N, V, LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* +* + ELSE IF ( ( RSVEC .AND. ( .NOT. LSVEC ) .AND. ( .NOT. JRACC ) ) + $ .OR. + $ ( JRACC .AND. ( .NOT. LSVEC ) .AND. ( NR .NE. N ) ) ) THEN +* +* -> Singular Values and Right Singular Vectors <- +* + IF ( ALMORT ) THEN +* +* .. in this case NR equals N + DO 1998 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 1998 CONTINUE + CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) +* + CALL ZGESVJ( 'L','U','N', N, NR, V, LDV, SVA, NR, A, LDA, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + ELSE +* +* .. two more QR factorizations ( one QRF is not enough, two require +* accumulated product of Jacobi rotations, three are perfect ) +* + CALL ZLASET( 'L', NR-1,NR-1, CZERO, CZERO, A(2,1), LDA ) + CALL ZGELQF( NR,N, A, LDA, CWORK, CWORK(N+1), LWORK-N, IERR) + CALL ZLACPY( 'L', NR, NR, A, LDA, V, LDV ) + CALL ZLASET( 'U', NR-1,NR-1, CZERO, CZERO, V(1,2), LDV ) + CALL ZGEQRF( NR, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + DO 8998 p = 1, NR + CALL ZCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 ) + CALL ZLACGV( NR-p+1, V(p,p), 1 ) + 8998 CONTINUE + CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV) +* + CALL ZGESVJ( 'L', 'U','N', NR, NR, V,LDV, SVA, NR, U, + $ LDU, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR, NR, CZERO,CZERO, V(NR+1,1), LDV ) + CALL ZLASET( 'A',NR, N-NR, CZERO,CZERO, V(1,NR+1), LDV ) + CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE, V(NR+1,NR+1),LDV ) + END IF +* + CALL ZUNMLQ( 'L', 'C', N, N, NR, A, LDA, CWORK, + $ V, LDV, CWORK(N+1), LWORK-N, IERR ) +* + END IF +* .. permute the rows of V +* DO 8991 p = 1, N +* CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA ) +* 8991 CONTINUE +* CALL ZLACPY( 'All', N, N, A, LDA, V, LDV ) + CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + IF ( TRANSP ) THEN + CALL ZLACPY( 'A', N, N, V, LDV, U, LDU ) + END IF +* + ELSE IF ( JRACC .AND. (.NOT. LSVEC) .AND. ( NR.EQ. N ) ) THEN +* + CALL ZLASET( 'L', N-1,N-1, CZERO, CZERO, A(2,1), LDA ) +* + CALL ZGESVJ( 'U','N','V', N, N, A, LDA, SVA, N, V, LDV, + $ CWORK, LWORK, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + CALL ZLAPMR( .FALSE., N, N, V, LDV, IWORK ) +* + ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN +* +* .. Singular Values and Left Singular Vectors .. +* +* .. second preconditioning step to avoid need to accumulate +* Jacobi rotations in the Jacobi iterations. + DO 1965 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 ) + CALL ZLACGV( N-p+1, U(p,p), 1 ) + 1965 CONTINUE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL ZGEQRF( N, NR, U, LDU, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + DO 1967 p = 1, NR - 1 + CALL ZCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 ) + CALL ZLACGV( N-p+1, U(p,p), 1 ) + 1967 CONTINUE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) +* + CALL ZGESVJ( 'L', 'U', 'N', NR,NR, U, LDU, SVA, NR, A, + $ LDA, CWORK(N+1), LWORK-N, RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) +* + IF ( NR .LT. M ) THEN + CALL ZLASET( 'A', M-NR, NR,CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL ZLASET( 'A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU ) + CALL ZLASET( 'A',M-NR,N1-NR,CZERO,CONE,U(NR+1,NR+1),LDU ) + END IF + END IF +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + DO 1974 p = 1, N1 + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 1974 CONTINUE +* + IF ( TRANSP ) THEN + CALL ZLACPY( 'A', N, N, U, LDU, V, LDV ) + END IF +* + ELSE +* +* .. Full SVD .. +* + IF ( .NOT. JRACC ) THEN +* + IF ( .NOT. ALMORT ) THEN +* +* Second Preconditioning Step (QRF [with pivoting]) +* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is +* equivalent to an LQF CALL. Since in many libraries the QRF +* seems to be better optimized than the LQF, we do explicit +* transpose and use the QRF. This is subject to changes in an +* optimized implementation of ZGEJSV. +* + DO 1968 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 1968 CONTINUE +* +* .. the following two loops perturb small entries to avoid +* denormals in the second QR factorization, where they are +* as good as zeros. This is done to avoid painfully slow +* computation with denormals. The relative size of the perturbation +* is a parameter that can be changed by the implementer. +* This perturbation device will be obsolete on machines with +* properly implemented arithmetic. +* To switch it off, set L2PERT=.FALSE. To remove it from the +* code, remove the action under L2PERT=.TRUE., leave the ELSE part. +* The following two loops should be blocked and fused with the +* transposed copy above. +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 2969 q = 1, NR + CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 2968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 2968 CONTINUE + 2969 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF +* +* Estimate the row scaled condition number of R1 +* (If R1 is rectangular, N > NR, then the condition number +* of the leading NR x NR submatrix is estimated.) +* + CALL ZLACPY( 'L', NR, NR, V, LDV, CWORK(2*N+1), NR ) + DO 3950 p = 1, NR + TEMP1 = DZNRM2(NR-p+1,CWORK(2*N+(p-1)*NR+p),1) + CALL ZDSCAL(NR-p+1,ONE/TEMP1,CWORK(2*N+(p-1)*NR+p),1) + 3950 CONTINUE + CALL ZPOCON('L',NR,CWORK(2*N+1),NR,ONE,TEMP1, + $ CWORK(2*N+NR*NR+1),RWORK,IERR) + CONDR1 = ONE / SQRT(TEMP1) +* .. here need a second oppinion on the condition number +* .. then assume worst case scenario +* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N) +* more conservative <=> CONDR1 .LT. SQRT(DBLE(N)) +* + COND_OK = SQRT(SQRT(DBLE(NR))) +*[TP] COND_OK is a tuning parameter. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* .. the second QRF without pivoting. Note: in an optimized +* implementation, this QRF should be implemented as the QRF +* of a lower triangular matrix. +* R1^* = Q2 * R2 + CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL)/EPSLN + DO 3959 p = 2, NR + DO 3958 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3958 CONTINUE + 3959 CONTINUE + END IF +* + IF ( NR .NE. N ) + $ CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* .. save ... +* +* .. this transposed copy should be better than naive + DO 1969 p = 1, NR - 1 + CALL ZCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 ) + CALL ZLACGV(NR-p+1, V(p,p), 1 ) + 1969 CONTINUE + V(NR,NR)=CONJG(V(NR,NR)) +* + CONDR2 = CONDR1 +* + ELSE +* +* .. ill-conditioned case: second QRF with pivoting +* Note that windowed pivoting would be equaly good +* numerically, and more run-time efficient. So, in +* an optimal implementation, the next call to ZGEQP3 +* should be replaced with eg. CALL ZGEQPX (ACM TOMS #782) +* with properly (carefully) chosen parameters. +* +* R1^* * P2 = Q2 * R2 + DO 3003 p = 1, NR + IWORK(N+p) = 0 + 3003 CONTINUE + CALL ZGEQP3( N, NR, V, LDV, IWORK(N+1), CWORK(N+1), + $ CWORK(2*N+1), LWORK-2*N, RWORK, IERR ) +** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), +** $ LWORK-2*N, IERR ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 3969 p = 2, NR + DO 3968 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) + IF ( ABS(V(q,p)) .LE. TEMP1 ) +* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) ) + $ V(q,p) = CTEMP + 3968 CONTINUE + 3969 CONTINUE + END IF +* + CALL ZLACPY( 'A', N, NR, V, LDV, CWORK(2*N+1), N ) +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 8970 p = 2, NR + DO 8971 q = 1, p - 1 + CTEMP=DCMPLX(XSC*MIN(ABS(V(p,p)),ABS(V(q,q))), + $ ZERO) +* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) ) + V(p,q) = - CTEMP + 8971 CONTINUE + 8970 CONTINUE + ELSE + CALL ZLASET( 'L',NR-1,NR-1,CZERO,CZERO,V(2,1),LDV ) + END IF +* Now, compute R2 = L3 * Q3, the LQ factorization. + CALL ZGELQF( NR, NR, V, LDV, CWORK(2*N+N*NR+1), + $ CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR ) +* .. and estimate the condition number + CALL ZLACPY( 'L',NR,NR,V,LDV,CWORK(2*N+N*NR+NR+1),NR ) + DO 4950 p = 1, NR + TEMP1 = DZNRM2( p, CWORK(2*N+N*NR+NR+p), NR ) + CALL ZDSCAL( p, ONE/TEMP1, CWORK(2*N+N*NR+NR+p), NR ) + 4950 CONTINUE + CALL ZPOCON( 'L',NR,CWORK(2*N+N*NR+NR+1),NR,ONE,TEMP1, + $ CWORK(2*N+N*NR+NR+NR*NR+1),RWORK,IERR ) + CONDR2 = ONE / SQRT(TEMP1) +* +* + IF ( CONDR2 .GE. COND_OK ) THEN +* .. save the Householder vectors used for Q3 +* (this overwrittes the copy of R2, as it will not be +* needed in this branch, but it does not overwritte the +* Huseholder vectors of Q2.). + CALL ZLACPY( 'U', NR, NR, V, LDV, CWORK(2*N+1), N ) +* .. and the rest of the information on Q3 is in +* WORK(2*N+N*NR+1:2*N+N*NR+N) + END IF +* + END IF +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 4968 q = 2, NR + CTEMP = XSC * V(q,q) + DO 4969 p = 1, q - 1 +* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) ) + V(p,q) = - CTEMP + 4969 CONTINUE + 4968 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1,NR-1, CZERO,CZERO, V(1,2), LDV ) + END IF +* +* Second preconditioning finished; continue with Jacobi SVD +* The input matrix is lower trinagular. +* +* Recover the right singular vectors as solution of a well +* conditioned triangular matrix equation. +* + IF ( CONDR1 .LT. COND_OK ) THEN +* + CALL ZGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U, LDU, + $ CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,RWORK, + $ LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3970 p = 1, NR + CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL ZDSCAL( NR, SVA(p), V(1,p), 1 ) + 3970 CONTINUE + +* .. pick the right matrix equation and solve it +* + IF ( NR .EQ. N ) THEN +* :)) .. best case, R1 is inverted. The solution of this matrix +* equation is Q2*V2 = the product of the Jacobi rotations +* used in ZGESVJ, premultiplied with the orthogonal matrix +* from the second QR factorization. + CALL ZTRSM('L','U','N','N', NR,NR,CONE, A,LDA, V,LDV) + ELSE +* .. R1 is well conditioned, but non-square. Adjoint of R2 +* is inverted to get the product of the Jacobi rotations +* used in ZGESVJ. The Q-factor from the second QR +* factorization is then built in explicitly. + CALL ZTRSM('L','U','C','N',NR,NR,CONE,CWORK(2*N+1), + $ N,V,LDV) + IF ( NR .LT. N ) THEN + CALL ZLASET('A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV) + CALL ZLASET('A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR('L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR) + END IF +* + ELSE IF ( CONDR2 .LT. COND_OK ) THEN +* +* The matrix R2 is inverted. The solution of the matrix equation +* is Q3^* * V3 = the product of the Jacobi rotations (appplied to +* the lower triangular L3 from the LQ factorization of +* R2=L3*Q3), pre-multiplied with the transposed Q3. + CALL ZGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 3870 p = 1, NR + CALL ZCOPY( NR, V(1,p), 1, U(1,p), 1 ) + CALL ZDSCAL( NR, SVA(p), U(1,p), 1 ) + 3870 CONTINUE + CALL ZTRSM('L','U','N','N',NR,NR,CONE,CWORK(2*N+1),N, + $ U,LDU) +* .. apply the permutation from the second QR factorization + DO 873 q = 1, NR + DO 872 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 872 CONTINUE + DO 874 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 874 CONTINUE + 873 CONTINUE + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) + ELSE +* Last line of defense. +* #:( This is a rather pathological case: no scaled condition +* improvement after two pivoted QR factorizations. Other +* possibility is that the rank revealing QR factorization +* or the condition estimator has failed, or the COND_OK +* is set very close to ONE (which is unnecessary). Normally, +* this branch should never be executed, but in rare cases of +* failure of the RRQR or condition estimator, the last line of +* defense ensures that ZGEJSV completes the task. +* Compute the full SVD of L3 using ZGESVJ with explicit +* accumulation of Jacobi rotations. + CALL ZGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U, + $ LDU, CWORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET('A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV) + END IF + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* + CALL ZUNMLQ( 'L', 'C', NR, NR, NR, CWORK(2*N+1), N, + $ CWORK(2*N+N*NR+1), U, LDU, CWORK(2*N+N*NR+NR+1), + $ LWORK-2*N-N*NR-NR, IERR ) + DO 773 q = 1, NR + DO 772 p = 1, NR + CWORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q) + 772 CONTINUE + DO 774 p = 1, NR + U(p,q) = CWORK(2*N+N*NR+NR+p) + 774 CONTINUE + 773 CONTINUE +* + END IF +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(DBLE(N)) * EPSLN + DO 1972 q = 1, N + DO 972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 972 CONTINUE + DO 973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 973 CONTINUE + XSC = ONE / DZNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) + 1972 CONTINUE +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). + IF ( NR .LT. M ) THEN + CALL ZLASET('A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU) + IF ( NR .LT. N1 ) THEN + CALL ZLASET('A',NR,N1-NR,CZERO,CZERO,U(1,NR+1),LDU) + CALL ZLASET('A',M-NR,N1-NR,CZERO,CONE, + $ U(NR+1,NR+1),LDU) + END IF + END IF +* +* The Q matrix from the first QRF is built into the left singular +* matrix U. This applies to all cases. +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + +* The columns of U are normalized. The cost is O(M*N) flops. + TEMP1 = SQRT(DBLE(M)) * EPSLN + DO 1973 p = 1, NR + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 1973 CONTINUE +* +* If the initial QRF is computed with row pivoting, the left +* singular vectors must be adjusted. +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + ELSE +* +* .. the initial matrix A has almost orthogonal columns and +* the second QRF is not needed +* + CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N ) + IF ( L2PERT ) THEN + XSC = SQRT(SMALL) + DO 5970 p = 2, N + CTEMP = XSC * CWORK( N + (p-1)*N + p ) + DO 5971 q = 1, p - 1 +* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) / +* $ ABS(CWORK(N+(p-1)*N+q)) ) + CWORK(N+(q-1)*N+p)=-CTEMP + 5971 CONTINUE + 5970 CONTINUE + ELSE + CALL ZLASET( 'L',N-1,N-1,CZERO,CZERO,CWORK(N+2),N ) + END IF +* + CALL ZGESVJ( 'U', 'U', 'N', N, N, CWORK(N+1), N, SVA, + $ N, U, LDU, CWORK(N+N*N+1), LWORK-N-N*N, RWORK, LRWORK, + $ INFO ) +* + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + DO 6970 p = 1, N + CALL ZCOPY( N, CWORK(N+(p-1)*N+1), 1, U(1,p), 1 ) + CALL ZDSCAL( N, SVA(p), CWORK(N+(p-1)*N+1), 1 ) + 6970 CONTINUE +* + CALL ZTRSM( 'L', 'U', 'N', 'N', N, N, + $ CONE, A, LDA, CWORK(N+1), N ) + DO 6972 p = 1, N + CALL ZCOPY( N, CWORK(N+p), N, V(IWORK(p),1), LDV ) + 6972 CONTINUE + TEMP1 = SQRT(DBLE(N))*EPSLN + DO 6971 p = 1, N + XSC = ONE / DZNRM2( N, V(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,p), 1 ) + 6971 CONTINUE +* +* Assemble the left singular vector matrix U (M x N). +* + IF ( N .LT. M ) THEN + CALL ZLASET( 'A', M-N, N, CZERO, CZERO, U(N+1,1), LDU ) + IF ( N .LT. N1 ) THEN + CALL ZLASET('A',N, N1-N, CZERO, CZERO, U(1,N+1),LDU) + CALL ZLASET( 'A',M-N,N1-N, CZERO, CONE,U(N+1,N+1),LDU) + END IF + END IF + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) + TEMP1 = SQRT(DBLE(M))*EPSLN + DO 6973 p = 1, N1 + XSC = ONE / DZNRM2( M, U(1,p), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( M, XSC, U(1,p), 1 ) + 6973 CONTINUE +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* + END IF +* +* end of the >> almost orthogonal case << in the full SVD +* + ELSE +* +* This branch deploys a preconditioned Jacobi SVD with explicitly +* accumulated rotations. It is included as optional, mainly for +* experimental purposes. It does perfom well, and can also be used. +* In this implementation, this branch will be automatically activated +* if the condition number sigma_max(A) / sigma_min(A) is predicted +* to be greater than the overflow threshold. This is because the +* a posteriori computation of the singular vectors assumes robust +* implementation of BLAS and some LAPACK procedures, capable of working +* in presence of extreme values, e.g. when the singular values spread from +* the underflow to the overflow threshold. +* + DO 7968 p = 1, NR + CALL ZCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 ) + CALL ZLACGV( N-p+1, V(p,p), 1 ) + 7968 CONTINUE +* + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 5969 q = 1, NR + CTEMP = DCMPLX(XSC*ABS( V(q,q) ),ZERO) + DO 5968 p = 1, N + IF ( ( p .GT. q ) .AND. ( ABS(V(p,q)) .LE. TEMP1 ) + $ .OR. ( p .LT. q ) ) +* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) ) + $ V(p,q) = CTEMP + IF ( p .LT. q ) V(p,q) = - V(p,q) + 5968 CONTINUE + 5969 CONTINUE + ELSE + CALL ZLASET( 'U', NR-1, NR-1, CZERO, CZERO, V(1,2), LDV ) + END IF + + CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1), + $ LWORK-2*N, IERR ) + CALL ZLACPY( 'L', N, NR, V, LDV, CWORK(2*N+1), N ) +* + DO 7969 p = 1, NR + CALL ZCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 ) + CALL ZLACGV( NR-p+1, U(p,p), 1 ) + 7969 CONTINUE + + IF ( L2PERT ) THEN + XSC = SQRT(SMALL/EPSLN) + DO 9970 q = 2, NR + DO 9971 p = 1, q - 1 + CTEMP = DCMPLX(XSC * MIN(ABS(U(p,p)),ABS(U(q,q))), + $ ZERO) +* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) ) + U(p,q) = - CTEMP + 9971 CONTINUE + 9970 CONTINUE + ELSE + CALL ZLASET('U', NR-1, NR-1, CZERO, CZERO, U(1,2), LDU ) + END IF + + CALL ZGESVJ( 'L', 'U', 'V', NR, NR, U, LDU, SVA, + $ N, V, LDV, CWORK(2*N+N*NR+1), LWORK-2*N-N*NR, + $ RWORK, LRWORK, INFO ) + SCALEM = RWORK(1) + NUMRANK = NINT(RWORK(2)) + + IF ( NR .LT. N ) THEN + CALL ZLASET( 'A',N-NR,NR,CZERO,CZERO,V(NR+1,1),LDV ) + CALL ZLASET( 'A',NR,N-NR,CZERO,CZERO,V(1,NR+1),LDV ) + CALL ZLASET( 'A',N-NR,N-NR,CZERO,CONE,V(NR+1,NR+1),LDV ) + END IF + + CALL ZUNMQR( 'L','N',N,N,NR,CWORK(2*N+1),N,CWORK(N+1), + $ V,LDV,CWORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR ) +* +* Permute the rows of V using the (column) permutation from the +* first QRF. Also, scale the columns to make them unit in +* Euclidean norm. This applies to all cases. +* + TEMP1 = SQRT(DBLE(N)) * EPSLN + DO 7972 q = 1, N + DO 8972 p = 1, N + CWORK(2*N+N*NR+NR+IWORK(p)) = V(p,q) + 8972 CONTINUE + DO 8973 p = 1, N + V(p,q) = CWORK(2*N+N*NR+NR+p) + 8973 CONTINUE + XSC = ONE / DZNRM2( N, V(1,q), 1 ) + IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) ) + $ CALL ZDSCAL( N, XSC, V(1,q), 1 ) + 7972 CONTINUE +* +* At this moment, V contains the right singular vectors of A. +* Next, assemble the left singular vector matrix U (M x N). +* + IF ( NR .LT. M ) THEN + CALL ZLASET( 'A', M-NR, NR, CZERO, CZERO, U(NR+1,1), LDU ) + IF ( NR .LT. N1 ) THEN + CALL ZLASET('A',NR, N1-NR, CZERO, CZERO, U(1,NR+1),LDU) + CALL ZLASET('A',M-NR,N1-NR, CZERO, CONE,U(NR+1,NR+1),LDU) + END IF + END IF +* + CALL ZUNMQR( 'L', 'N', M, N1, N, A, LDA, CWORK, U, + $ LDU, CWORK(N+1), LWORK-N, IERR ) +* + IF ( ROWPIV ) + $ CALL ZLASWP( N1, U, LDU, 1, M-1, IWORK(IWOFF+1), -1 ) +* +* + END IF + IF ( TRANSP ) THEN +* .. swap U and V because the procedure worked on A^* + DO 6974 p = 1, N + CALL ZSWAP( N, U(1,p), 1, V(1,p), 1 ) + 6974 CONTINUE + END IF +* + END IF +* end of the full SVD +* +* Undo scaling, if necessary (and possible) +* + IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN + CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR ) + USCAL1 = ONE + USCAL2 = ONE + END IF +* + IF ( NR .LT. N ) THEN + DO 3004 p = NR+1, N + SVA(p) = ZERO + 3004 CONTINUE + END IF +* + RWORK(1) = USCAL2 * SCALEM + RWORK(2) = USCAL1 + IF ( ERREST ) RWORK(3) = SCONDA + IF ( LSVEC .AND. RSVEC ) THEN + RWORK(4) = CONDR1 + RWORK(5) = CONDR2 + END IF + IF ( L2TRAN ) THEN + RWORK(6) = ENTRA + RWORK(7) = ENTRAT + END IF +* + IWORK(1) = NR + IWORK(2) = NUMRANK + IWORK(3) = WARNING + IF ( TRANSP ) THEN + IWORK(4) = 1 + ELSE + IWORK(4) = -1 + END IF + +* + RETURN +* .. +* .. END OF ZGEJSV +* .. + END +* diff --git a/dspl/liblapack/SRC/zgelq.f b/dspl/liblapack/SRC/zgelq.f new file mode 100644 index 0000000..6563965 --- /dev/null +++ b/dspl/liblapack/SRC/zgelq.f @@ -0,0 +1,306 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZGELQ computes a LQ factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-min(M,N) lower trapezoidal matrix L +*> (L is lower triangular if M <= N); +*> the elements above the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any LQ factorization algorithm they wish. The triangular +*> (trapezoidal) L has to be stored in the lower part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> ZLASWLQ or ZGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, ZGELQ will use either +*> ZLASWLQ (if the matrix is short-and-wide) or ZGELQT to compute +*> the LQ factorization. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGELQ( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGELQT, ZLASWLQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'ZGELQ ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'ZGELQ ', ' ', M, N, 2, -1 ) + ELSE + MB = 1 + NB = N + END IF + IF( MB.GT.MIN( M, N ) .OR. MB.LT.1 ) MB = 1 + IF( NB.GT.N .OR. NB.LE.M ) NB = N + MINTSZ = M + 5 + IF ( NB.GT.M .AND. N.GT.M ) THEN + IF( MOD( N - M, NB - M ).EQ.0 ) THEN + NBLCKS = ( N - M ) / ( NB - M ) + ELSE + NBLCKS = ( N - M ) / ( NB - M ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M ) + $ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + MB = 1 + NB = N + END IF + IF( LWORK.LT.MB*M ) THEN + LMINWS = .TRUE. + MB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = MB*M*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, MB*M ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN + CALL ZGELQT( M, N, MB, A, LDA, T( 6 ), MB, WORK, INFO ) + ELSE + CALL ZLASWLQ( M, N, MB, NB, A, LDA, T( 6 ), MB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, MB*M ) +* + RETURN +* +* End of ZGELQ +* + END diff --git a/dspl/liblapack/SRC/zgelq2.f b/dspl/liblapack/SRC/zgelq2.f new file mode 100644 index 0000000..188c8f8 --- /dev/null +++ b/dspl/liblapack/SRC/zgelq2.f @@ -0,0 +1,194 @@ +*> \brief \b ZGELQ2 computes the LQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELQ2 computes an LQ factorization of a complex m by n matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m by min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +*> A(i,i+1:n), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i,i+1:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAU( I ) ) + IF( I.LT.M ) THEN +* +* Apply H(i) to A(i+1:m,i:n) from the right +* + A( I, I ) = ONE + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ), + $ A( I+1, I ), LDA, WORK ) + END IF + A( I, I ) = ALPHA + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZGELQ2 +* + END diff --git a/dspl/liblapack/SRC/zgelqf.f b/dspl/liblapack/SRC/zgelqf.f new file mode 100644 index 0000000..8d9341a --- /dev/null +++ b/dspl/liblapack/SRC/zgelqf.f @@ -0,0 +1,269 @@ +*> \brief \b ZGELQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELQF computes an LQ factorization of a complex M-by-N matrix A: +*> A = L * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the m-by-min(m,n) lower trapezoidal matrix L (L is +*> lower triangular if m <= n); the elements above the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in +*> A(i,i+1:n), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGELQ2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGELQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGELQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the LQ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL ZGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i+ib:m,i:n) from the right +* + CALL ZLARFB( 'Right', 'No transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL ZGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGELQF +* + END diff --git a/dspl/liblapack/SRC/zgelqt.f b/dspl/liblapack/SRC/zgelqt.f new file mode 100644 index 0000000..015bd3d --- /dev/null +++ b/dspl/liblapack/SRC/zgelqt.f @@ -0,0 +1,210 @@ +*> \brief \b ZGELQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELQT computes a blocked LQ factorization of a complex M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the M-by-MIN(M,N) lower trapezoidal matrix L (L is +*> lower triangular if M <= N); the elements above the diagonal +*> are the rows of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = K - (B-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, MB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K +* .. +* .. External Subroutines .. + EXTERNAL ZGELQT3, ZLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. (MB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ))THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, MB + IB = MIN( K-I+1, MB ) +* +* Compute the LQ factorization of the current block A(I:M,I:I+IB-1) +* + CALL ZGELQT3( IB, N-I+1, A(I,I), LDA, T(1,I), LDT, IINFO ) + IF( I+IB.LE.M ) THEN +* +* Update by applying H**T to A(I:M,I+IB:N) from the right +* + CALL ZLARFB( 'R', 'N', 'F', 'R', M-I-IB+1, N-I+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, WORK , M-I-IB+1 ) + END IF + END DO + RETURN +* +* End of ZGELQT +* + END diff --git a/dspl/liblapack/SRC/zgelqt3.f b/dspl/liblapack/SRC/zgelqt3.f new file mode 100644 index 0000000..45e74f4 --- /dev/null +++ b/dspl/liblapack/SRC/zgelqt3.f @@ -0,0 +1,261 @@ +*> \brief \b ZGELQT3 recursively computes a LQ factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE ZGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGELQT3 recursively computes a LQ factorization of a complex M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M =< N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the real M-by-N matrix A. On exit, the elements on and +*> below the diagonal contain the N-by-N lower triangular matrix L; the +*> elements above the diagonal are the rows of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th row +*> above the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 v1 v1 v1 v1 ) +*> ( 1 v2 v2 v2 ) +*> ( 1 v3 v3 v3 ) +*> +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**T +*> +*> where V**T is the transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE ZGELQT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = (1.0D+00,0.0D+00) ) + PARAMETER ( ZERO = (0.0D+00,0.0D+00)) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, M1, M2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZTRMM, ZGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( N .LT. M ) THEN + INFO = -2 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, M ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELQT3', -INFO ) + RETURN + END IF +* + IF( M.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL ZLARFG( N, A, A( 1, MIN( 2, N ) ), LDA, T ) + T(1,1)=CONJG(T(1,1)) +* + ELSE +* +* Otherwise, split A into blocks... +* + M1 = M/2 + M2 = M-M1 + I1 = MIN( M1+1, M ) + J1 = MIN( M+1, N ) +* +* Compute A(1:M1,1:N) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL ZGELQT3( M1, N, A, LDA, T, LDT, IINFO ) +* +* Compute A(J1:M,1:N) = A(J1:M,1:N) Q1^H [workspace: T(1:N1,J1:N)] +* + DO I=1,M2 + DO J=1,M1 + T( I+M1, J ) = A( I+M1, J ) + END DO + END DO + CALL ZTRMM( 'R', 'U', 'C', 'U', M2, M1, ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + CALL ZGEMM( 'N', 'C', M2, M1, N-M1, ONE, A( I1, I1 ), LDA, + & A( 1, I1 ), LDA, ONE, T( I1, 1 ), LDT) +* + CALL ZTRMM( 'R', 'U', 'N', 'N', M2, M1, ONE, + & T, LDT, T( I1, 1 ), LDT ) +* + CALL ZGEMM( 'N', 'N', M2, N-M1, M1, -ONE, T( I1, 1 ), LDT, + & A( 1, I1 ), LDA, ONE, A( I1, I1 ), LDA ) +* + CALL ZTRMM( 'R', 'U', 'N', 'U', M2, M1 , ONE, + & A, LDA, T( I1, 1 ), LDT ) +* + DO I=1,M2 + DO J=1,M1 + A( I+M1, J ) = A( I+M1, J ) - T( I+M1, J ) + T( I+M1, J )= ZERO + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL ZGELQT3( M2, N-M1, A( I1, I1 ), LDA, + & T( I1, I1 ), LDT, IINFO ) +* +* Compute T3 = T(J1:N1,1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,M2 + DO J=1,M1 + T( J, I+M1 ) = (A( J, I+M1 )) + END DO + END DO +* + CALL ZTRMM( 'R', 'U', 'C', 'U', M1, M2, ONE, + & A( I1, I1 ), LDA, T( 1, I1 ), LDT ) +* + CALL ZGEMM( 'N', 'C', M1, M2, N-M, ONE, A( 1, J1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, I1 ), LDT ) +* + CALL ZTRMM( 'L', 'U', 'N', 'N', M1, M2, -ONE, T, LDT, + & T( 1, I1 ), LDT ) +* + CALL ZTRMM( 'R', 'U', 'N', 'N', M1, M2, ONE, + & T( I1, I1 ), LDT, T( 1, I1 ), LDT ) +* +* +* +* Y = (Y1,Y2); L = [ L1 0 ]; T = [T1 T3] +* [ A(1:N1,J1:N) L2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of ZGELQT3 +* + END diff --git a/dspl/liblapack/SRC/zgels.f b/dspl/liblapack/SRC/zgels.f new file mode 100644 index 0000000..8e2794f --- /dev/null +++ b/dspl/liblapack/SRC/zgels.f @@ -0,0 +1,505 @@ +*> \brief ZGELS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELS solves overdetermined or underdetermined complex linear systems +*> involving an M-by-N matrix A, or its conjugate-transpose, using a QR +*> or LQ factorization of A. It is assumed that A has full rank. +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an underdetermined system A**H * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**H * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> if M >= N, A is overwritten by details of its QR +*> factorization as returned by ZGEQRF; +*> if M < N, A is overwritten by details of its LQ +*> factorization as returned by ZGELQF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors; the residual sum of squares for the +*> solution in each column is given by the sum of squares of the +*> modulus of elements N+1 to M in that column; +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors; the residual sum of squares +*> for the solution in each column is given by the sum of +*> squares of the modulus of elements M+1 to N in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= max( 1, MN + max( MN, NRHS ) ). +*> For optimal performance, +*> LWORK >= max( 1, MN + max( MN, NRHS )*NB ). +*> where MN = min(M,N) and NB is the optimum block size. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TPSD + INTEGER BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGELQF, ZGEQRF, ZLASCL, ZLASET, + $ ZTRTRS, ZUNMLQ, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY ) + $ THEN + INFO = -10 + END IF +* +* Figure out optimal block size +* + IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN +* + TPSD = .TRUE. + IF( LSAME( TRANS, 'N' ) ) + $ TPSD = .FALSE. +* + IF( M.GE.N ) THEN + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LN', M, NRHS, N, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LC', M, NRHS, N, + $ -1 ) ) + END IF + ELSE + NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 ) + IF( TPSD ) THEN + NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, + $ -1 ) ) + ELSE + NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LN', N, NRHS, M, + $ -1 ) ) + END IF + END IF +* + WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB ) + WORK( 1 ) = DBLE( WSIZE ) +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELS ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF( TPSD ) + $ BROW = N + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL ZGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least N, optimally N*NB +* + IF( .NOT.TPSD ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = N +* + ELSE +* +* Underdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**H) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'Upper', 'Conjugate transpose','Non-unit', + $ N, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = ZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL ZUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL ZGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TPSD ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'No transpose', 'Non-unit', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**H * B(1:M,1:NRHS) +* + CALL ZUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A, + $ LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**H * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL ZUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA, + $ WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**H) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ M, NRHS, A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( WSIZE ) +* + RETURN +* +* End of ZGELS +* + END diff --git a/dspl/liblapack/SRC/zgelsd.f b/dspl/liblapack/SRC/zgelsd.f new file mode 100644 index 0000000..9463ccc --- /dev/null +++ b/dspl/liblapack/SRC/zgelsd.f @@ -0,0 +1,665 @@ +*> \brief ZGELSD computes the minimum-norm solution to a linear least squares problem for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), S( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELSD computes the minimum-norm solution to a real linear least +*> squares problem: +*> minimize 2-norm(| b - A*x |) +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The problem is solved in three steps: +*> (1) Reduce the coefficient matrix A to bidiagonal form with +*> Householder transformations, reducing the original problem +*> into a "bidiagonal least squares problem" (BLS) +*> (2) Solve the BLS using a divide and conquer approach. +*> (3) Apply back all the Householder transformations to solve +*> the original least squares problem. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of the modulus of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK must be at least 1. +*> The exact minimum amount of workspace needed depends on M, +*> N and NRHS. As long as LWORK is at least +*> 2*N + N*NRHS +*> if M is greater than or equal to N or +*> 2*M + M*NRHS +*> if M is less than N, the code will execute correctly. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the array WORK and the +*> minimum sizes of the arrays RWORK and IWORK, and returns +*> these values as the first entries of the WORK, RWORK and +*> IWORK arrays, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> LRWORK >= +*> 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) +*> if M is greater than or equal to N or +*> 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) +*> if M is less than N, the code will execute correctly. +*> SMLSIZ is returned by ILAENV and is equal to the maximum +*> size of the subproblems at the bottom of the computation +*> tree (usually about 25), and +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN), +*> where MINMN = MIN( M,N ). +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEsolve +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ, + $ LDWORK, LIWORK, LRWORK, MAXMN, MAXWRK, MINMN, + $ MINWRK, MM, MNTHR, NLVL, NRWORK, NWORK, SMLSIZ + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZGEBRD, ZGELQF, + $ ZGEQRF, ZLACPY, ZLALSD, ZLASCL, ZLASET, ZUNMBR, + $ ZUNMLQ, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, LOG, MAX, MIN, DBLE +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace. +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + LIWORK = 1 + LRWORK = 1 + IF( MINMN.GT.0 ) THEN + SMLSIZ = ILAENV( 9, 'ZGELSD', ' ', 0, 0, 0, 0 ) + MNTHR = ILAENV( 6, 'ZGELSD', ' ', M, N, NRHS, -1 ) + NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ + 1 ) ) / + $ LOG( TWO ) ) + 1, 0 ) + LIWORK = 3*MINMN*NLVL + 11*MINMN + MM = M + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns. +* + MM = N + MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'ZGEQRF', ' ', M, N, + $ -1, -1 ) ) + MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'ZUNMQR', 'LC', M, + $ NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + LRWORK = 10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + + $ MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + MAXWRK = MAX( MAXWRK, 2*N + ( MM + N )*ILAENV( 1, + $ 'ZGEBRD', ' ', MM, N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + NRHS*ILAENV( 1, 'ZUNMBR', + $ 'QLC', MM, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + ( N - 1 )*ILAENV( 1, + $ 'ZUNMBR', 'PLN', N, NRHS, N, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*N + N*NRHS ) + MINWRK = MAX( 2*N + MM, 2*N + N*NRHS ) + END IF + IF( N.GT.M ) THEN + LRWORK = 10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS + + $ MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows. +* + MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, + $ -1 ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + 2*M*ILAENV( 1, + $ 'ZGEBRD', ' ', M, M, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + NRHS*ILAENV( 1, + $ 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, M*M + 4*M + ( M - 1 )*ILAENV( 1, + $ 'ZUNMLQ', 'LC', N, NRHS, M, -1 ) ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M*M + 4*M + M*NRHS ) +! XXX: Ensure the Path 2a case below is triggered. The workspace +! calculation should use queries for all routines eventually. + MAXWRK = MAX( MAXWRK, + $ 4*M+M*M+MAX( M, 2*M-4, NRHS, N-3*M ) ) + ELSE +* +* Path 2 - underdetermined. +* + MAXWRK = 2*M + ( N + M )*ILAENV( 1, 'ZGEBRD', ' ', M, + $ N, -1, -1 ) + MAXWRK = MAX( MAXWRK, 2*M + NRHS*ILAENV( 1, 'ZUNMBR', + $ 'QLC', M, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*ILAENV( 1, 'ZUNMBR', + $ 'PLN', N, NRHS, M, -1 ) ) + MAXWRK = MAX( MAXWRK, 2*M + M*NRHS ) + END IF + MINWRK = MAX( 2*M + N, 2*M + M*NRHS ) + END IF + END IF + MINWRK = MIN( MINWRK, MAXWRK ) + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters. +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max entry outside range [SMLNUM,BIGNUM]. +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 ) + RANK = 0 + GO TO 10 + END IF +* +* Scale B if max entry outside range [SMLNUM,BIGNUM]. +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM. +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM. +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* If M < N make sure B(M+1:N,:) = 0 +* + IF( M.LT.N ) + $ CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) +* +* Overdetermined case. +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined. +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R. +* (RWorkspace: need N) +* (CWorkspace: need N, prefer N*NB) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose(Q). +* (RWorkspace: need N) +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Zero out below R. +* + IF( N.GT.1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + END IF +* + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N + IE = 1 + NRWORK = IE + N +* +* Bidiagonalize R in A. +* (RWorkspace: need N) +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* + CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R. +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of R. +* + CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+ + $ MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm. +* + LDWORK = M + IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ), + $ M*LDA+M+M*NRHS ) )LDWORK = LDA + ITAU = 1 + NWORK = M + 1 +* +* Compute A=L*Q. +* (CWorkspace: need 2*M, prefer M+M*NB) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) + IL = NWORK +* +* Copy L to WORK(IL), zeroing out above its diagonal. +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize L in WORK(IL). +* (RWorkspace: need M) +* (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L. +* (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of L. +* + CALL ZUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUP ), B, LDB, WORK( NWORK ), + $ LWORK-NWORK+1, INFO ) +* +* Zero out below first M rows of B. +* + CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + NWORK = ITAU + M +* +* Multiply transpose(Q) by B. +* (CWorkspace: need NRHS, prefer NRHS*NB) +* + CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases. +* + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M + IE = 1 + NRWORK = IE + M +* +* Bidiagonalize A. +* (RWorkspace: need M) +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors. +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* +* Solve the bidiagonal least squares problem. +* + CALL ZLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB, + $ RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ), + $ IWORK, INFO ) + IF( INFO.NE.0 ) THEN + GO TO 10 + END IF +* +* Multiply B by right bidiagonalizing vectors of A. +* + CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ), + $ B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO ) +* + END IF +* +* Undo scaling. +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 10 CONTINUE + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWORK + RWORK( 1 ) = LRWORK + RETURN +* +* End of ZGELSD +* + END diff --git a/dspl/liblapack/SRC/zgelss.f b/dspl/liblapack/SRC/zgelss.f new file mode 100644 index 0000000..df436b4 --- /dev/null +++ b/dspl/liblapack/SRC/zgelss.f @@ -0,0 +1,771 @@ +*> \brief ZGELSS solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELSS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), S( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELSS computes the minimum norm solution to a complex linear +*> least squares problem: +*> +*> Minimize 2-norm(| b - A*x |). +*> +*> using the singular value decomposition (SVD) of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix +*> X. +*> +*> The effective rank of A is determined by treating as zero those +*> singular values which are less than RCOND times the largest singular +*> value. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the first min(m,n) rows of A are overwritten with +*> its right singular vectors, stored rowwise. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, B is overwritten by the N-by-NRHS solution matrix X. +*> If m >= n and RANK = n, the residual sum-of-squares for +*> the solution in the i-th column is given by the sum of +*> squares of the modulus of elements n+1:m in that column. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A in decreasing order. +*> The condition number of A in the 2-norm = S(1)/S(min(m,n)). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A. +*> Singular values S(i) <= RCOND*S(1) are treated as zero. +*> If RCOND < 0, machine precision is used instead. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the number of singular values +*> which are greater than RCOND*S(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1, and also: +*> LWORK >= 2*min(M,N) + max(M,N,NRHS) +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: the algorithm for computing the SVD failed to converge; +*> if INFO = i, i off-diagonal elements of an intermediate +*> bidiagonal form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK, + $ ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN, + $ MAXWRK, MINMN, MINWRK, MM, MNTHR + INTEGER LWORK_ZGEQRF, LWORK_ZUNMQR, LWORK_ZGEBRD, + $ LWORK_ZUNMBR, LWORK_ZUNGBR, LWORK_ZUNMLQ, + $ LWORK_ZGELQF + DOUBLE PRECISION ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR +* .. +* .. Local Arrays .. + COMPLEX*16 DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY, + $ ZDRSCL, ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF, + $ ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ, + $ ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace refers +* to real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + MM = M + MNTHR = ILAENV( 6, 'ZGELSS', ' ', M, N, NRHS, -1 ) + IF( M.GE.N .AND. M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than +* columns +* +* Compute space needed for ZGEQRF + CALL ZGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, INFO ) + LWORK_ZGEQRF=DUM(1) +* Compute space needed for ZUNMQR + CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, DUM(1), B, + $ LDB, DUM(1), -1, INFO ) + LWORK_ZUNMQR=DUM(1) + MM = N + MAXWRK = MAX( MAXWRK, N + N*ILAENV( 1, 'ZGEQRF', ' ', M, + $ N, -1, -1 ) ) + MAXWRK = MAX( MAXWRK, N + NRHS*ILAENV( 1, 'ZUNMQR', 'LC', + $ M, NRHS, N, -1 ) ) + END IF + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* +* Compute space needed for ZGEBRD + CALL ZGEBRD( MM, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), + $ -1, INFO ) + LWORK_ZGEBRD=DUM(1) +* Compute space needed for ZUNMBR + CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_ZUNMBR=DUM(1) +* Compute space needed for ZUNGBR + CALL ZUNGBR( 'P', N, N, N, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_ZUNGBR=DUM(1) +* Compute total workspace needed + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZGEBRD ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + MINWRK = 2*N + MAX( NRHS, M ) + END IF + IF( N.GT.M ) THEN + MINWRK = 2*M + MAX( NRHS, N ) + IF( N.GE.MNTHR ) THEN +* +* Path 2a - underdetermined, with many more columns +* than rows +* +* Compute space needed for ZGELQF + CALL ZGELQF( M, N, A, LDA, DUM(1), DUM(1), + $ -1, INFO ) + LWORK_ZGELQF=DUM(1) +* Compute space needed for ZGEBRD + CALL ZGEBRD( M, M, A, LDA, S, S, DUM(1), DUM(1), + $ DUM(1), -1, INFO ) + LWORK_ZGEBRD=DUM(1) +* Compute space needed for ZUNMBR + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_ZUNMBR=DUM(1) +* Compute space needed for ZUNGBR + CALL ZUNGBR( 'P', M, M, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_ZUNGBR=DUM(1) +* Compute space needed for ZUNMLQ + CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, DUM(1), + $ B, LDB, DUM(1), -1, INFO ) + LWORK_ZUNMLQ=DUM(1) +* Compute total workspace needed + MAXWRK = M + LWORK_ZGELQF + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_ZGEBRD ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_ZUNMBR ) + MAXWRK = MAX( MAXWRK, 3*M + M*M + LWORK_ZUNGBR ) + IF( NRHS.GT.1 ) THEN + MAXWRK = MAX( MAXWRK, M*M + M + M*NRHS ) + ELSE + MAXWRK = MAX( MAXWRK, M*M + 2*M ) + END IF + MAXWRK = MAX( MAXWRK, M + LWORK_ZUNMLQ ) + ELSE +* +* Path 2 - underdetermined +* +* Compute space needed for ZGEBRD + CALL ZGEBRD( M, N, A, LDA, S, S, DUM(1), DUM(1), + $ DUM(1), -1, INFO ) + LWORK_ZGEBRD=DUM(1) +* Compute space needed for ZUNMBR + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, A, LDA, + $ DUM(1), B, LDB, DUM(1), -1, INFO ) + LWORK_ZUNMBR=DUM(1) +* Compute space needed for ZUNGBR + CALL ZUNGBR( 'P', M, N, M, A, LDA, DUM(1), + $ DUM(1), -1, INFO ) + LWORK_ZUNGBR=DUM(1) + MAXWRK = 2*M + LWORK_ZGEBRD + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR ) + MAXWRK = MAX( MAXWRK, N*NRHS ) + END IF + END IF + MAXWRK = MAX( MINWRK, MAXWRK ) + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) + $ INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSS', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + EPS = DLAMCH( 'P' ) + SFMIN = DLAMCH( 'S' ) + SMLNUM = SFMIN / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN ) + RANK = 0 + GO TO 70 + END IF +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Overdetermined case +* + IF( M.GE.N ) THEN +* +* Path 1 - overdetermined or exactly determined +* + MM = M + IF( M.GE.MNTHR ) THEN +* +* Path 1a - overdetermined, with many more rows than columns +* + MM = N + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: none) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose(Q) +* (CWorkspace: need N+NRHS, prefer N+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Zero out below R +* + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of R +* (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration +* multiply B by transpose of left singular vectors +* compute right singular vectors in A +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, DUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 10 I = 1, N + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 10 CONTINUE +* +* Multiply B by right singular vectors +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 20 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL ZLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) + 20 CONTINUE + ELSE + CALL ZGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL ZCOPY( N, WORK, 1, B, 1 ) + END IF +* + ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) ) + $ THEN +* +* Underdetermined case, M much less than N +* +* Path 2a - underdetermined, with many more columns than rows +* and sufficient workspace for an efficient algorithm +* + LDWORK = M + IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) ) + $ LDWORK = LDA + ITAU = 1 + IWORK = M + 1 +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: none) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) + IL = IWORK +* +* Copy L to WORK(IL), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ), + $ LDWORK ) + IE = 1 + ITAUQ = IL + LDWORK*M + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors of L +* (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK, + $ WORK( ITAUQ ), B, LDB, WORK( IWORK ), + $ LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors of R in WORK(IL) +* (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right singular +* vectors of L in WORK(IL) and multiplying B by transpose of +* left singular vectors +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ), + $ LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 30 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 30 CONTINUE + IWORK = IL + M*LDWORK +* +* Multiply B by right singular vectors of L in WORK(IL) +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK, + $ B, LDB, CZERO, WORK( IWORK ), LDB ) + CALL ZLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = ( LWORK-IWORK+1 ) / M + DO 40 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK, + $ B( 1, I ), LDB, CZERO, WORK( IWORK ), M ) + CALL ZLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), + $ LDB ) + 40 CONTINUE + ELSE + CALL ZGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), + $ 1, CZERO, WORK( IWORK ), 1 ) + CALL ZCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) + END IF +* +* Zero out below first M rows of B +* + CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB ) + IWORK = ITAU + M +* +* Multiply transpose(Q) by B +* (CWorkspace: need M+NRHS, prefer M+NHRS*NB) +* (RWorkspace: none) +* + CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B, + $ LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* + ELSE +* +* Path 2 - remaining underdetermined cases +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ INFO ) +* +* Multiply B by transpose of left bidiagonalizing vectors +* (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB) +* (RWorkspace: none) +* + CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ), + $ B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: none) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, INFO ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, +* computing right singular vectors of A in A and +* multiplying B by transpose of left singular vectors +* (CWorkspace: none) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, DUM, + $ 1, B, LDB, RWORK( IRWORK ), INFO ) + IF( INFO.NE.0 ) + $ GO TO 70 +* +* Multiply B by reciprocals of singular values +* + THR = MAX( RCOND*S( 1 ), SFMIN ) + IF( RCOND.LT.ZERO ) + $ THR = MAX( EPS*S( 1 ), SFMIN ) + RANK = 0 + DO 50 I = 1, M + IF( S( I ).GT.THR ) THEN + CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB ) + RANK = RANK + 1 + ELSE + CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + END IF + 50 CONTINUE +* +* Multiply B by right singular vectors of A +* (CWorkspace: need N, prefer N*NRHS) +* (RWorkspace: none) +* + IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN + CALL ZGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB, + $ CZERO, WORK, LDB ) + CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB ) + ELSE IF( NRHS.GT.1 ) THEN + CHUNK = LWORK / N + DO 60 I = 1, NRHS, CHUNK + BL = MIN( NRHS-I+1, CHUNK ) + CALL ZGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ), + $ LDB, CZERO, WORK, N ) + CALL ZLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) + 60 CONTINUE + ELSE + CALL ZGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) + CALL ZCOPY( N, WORK, 1, B, 1 ) + END IF + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF + 70 CONTINUE + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGELSS +* + END diff --git a/dspl/liblapack/SRC/zgelsy.f b/dspl/liblapack/SRC/zgelsy.f new file mode 100644 index 0000000..79cb793 --- /dev/null +++ b/dspl/liblapack/SRC/zgelsy.f @@ -0,0 +1,477 @@ +*> \brief ZGELSY solves overdetermined or underdetermined systems for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGELSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGELSY computes the minimum-norm solution to a complex linear least +*> squares problem: +*> minimize || A * X - B || +*> using a complete orthogonal factorization of A. A is an M-by-N +*> matrix which may be rank-deficient. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> +*> The routine first computes a QR factorization with column pivoting: +*> A * P = Q * [ R11 R12 ] +*> [ 0 R22 ] +*> with R11 defined as the largest leading submatrix whose estimated +*> condition number is less than 1/RCOND. The order of R11, RANK, +*> is the effective rank of A. +*> +*> Then, R22 is considered to be negligible, and R12 is annihilated +*> by unitary transformations from the right, arriving at the +*> complete orthogonal factorization: +*> A * P = Q * [ T11 0 ] * Z +*> [ 0 0 ] +*> The minimum-norm solution is then +*> X = P * Z**H [ inv(T11)*Q1**H*B ] +*> [ 0 ] +*> where Q1 consists of the first RANK columns of Q. +*> +*> This routine is basically identical to the original xGELSX except +*> three differences: +*> o The permutation of matrix B (the right hand side) is faster and +*> more simple. +*> o The call to the subroutine xGEQPF has been substituted by the +*> the call to the subroutine xGEQP3. This subroutine is a Blas-3 +*> version of the QR factorization with column pivoting. +*> o Matrix B (the right hand side) is updated with Blas-3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A has been overwritten by details of its +*> complete orthogonal factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the M-by-NRHS right hand side matrix B. +*> On exit, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M,N). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of AP, otherwise column i is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> RCOND is used to determine the effective rank of A, which +*> is defined as the order of the largest leading triangular +*> submatrix R11 in the QR factorization with pivoting of A, +*> whose estimated condition number < 1/RCOND. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The effective rank of A, i.e., the order of the submatrix +*> R11. This is the same as the order of the submatrix T11 +*> in the complete orthogonal factorization of A. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> The unblocked strategy requires that: +*> LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) +*> where MN = min(M,N). +*> The block algorithm requires that: +*> LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) +*> where NB is an upper bound on the blocksize returned +*> by ILAENV for the routines ZGEQP3, ZTZRZF, CTZRQF, ZUNMQR, +*> and ZUNMRZ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEsolve +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA \n +*> E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain \n +*> +* ===================================================================== + SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK, + $ WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS, RANK + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IMAX, IMIN + PARAMETER ( IMAX = 1, IMIN = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN, + $ NB, NB1, NB2, NB3, NB4 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR, + $ SMLNUM, WSIZE + COMPLEX*16 C1, C2, S1, S2 +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZCOPY, ZGEQP3, ZLAIC1, ZLASCL, + $ ZLASET, ZTRSM, ZTZRZF, ZUNMQR, ZUNMRZ +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* + MN = MIN( M, N ) + ISMIN = MN + 1 + ISMAX = 2*MN + 1 +* +* Test the input arguments. +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, NRHS, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, NRHS, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS ) + WORK( 1 ) = DCMPLX( LWKOPT ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. .NOT. + $ LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGELSY', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + RANK = 0 + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max entries outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + RANK = 0 + GO TO 70 + END IF +* + BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO ) + IBSCL = 2 + END IF +* +* Compute QR factorization with column pivoting of A: +* A * P = Q * R +* + CALL ZGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), + $ LWORK-MN, RWORK, INFO ) + WSIZE = MN + DBLE( WORK( MN+1 ) ) +* +* complex workspace: MN+NB*(N+1). real workspace 2*N. +* Details of Householder rotations stored in WORK(1:MN). +* +* Determine RANK using incremental condition estimation +* + WORK( ISMIN ) = CONE + WORK( ISMAX ) = CONE + SMAX = ABS( A( 1, 1 ) ) + SMIN = SMAX + IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN + RANK = 0 + CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB ) + GO TO 70 + ELSE + RANK = 1 + END IF +* + 10 CONTINUE + IF( RANK.LT.MN ) THEN + I = RANK + 1 + CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ), + $ A( I, I ), SMINPR, S1, C1 ) + CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ), + $ A( I, I ), SMAXPR, S2, C2 ) +* + IF( SMAXPR*RCOND.LE.SMINPR ) THEN + DO 20 I = 1, RANK + WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 ) + WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 ) + 20 CONTINUE + WORK( ISMIN+RANK ) = C1 + WORK( ISMAX+RANK ) = C2 + SMIN = SMINPR + SMAX = SMAXPR + RANK = RANK + 1 + GO TO 10 + END IF + END IF +* +* complex workspace: 3*MN. +* +* Logically partition R = [ R11 R12 ] +* [ 0 R22 ] +* where R11 = R(1:RANK,1:RANK) +* +* [R11,R12] = [ T11, 0 ] * Y +* + IF( RANK.LT.N ) + $ CALL ZTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ), + $ LWORK-2*MN, INFO ) +* +* complex workspace: 2*MN. +* Details of Householder rotations stored in WORK(MN+1:2*MN) +* +* B(1:M,1:NRHS) := Q**H * B(1:M,1:NRHS) +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA, + $ WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + WSIZE = MAX( WSIZE, 2*MN+DBLE( WORK( 2*MN+1 ) ) ) +* +* complex workspace: 2*MN+NB*NRHS. +* +* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK, + $ NRHS, CONE, A, LDA, B, LDB ) +* + DO 40 J = 1, NRHS + DO 30 I = RANK + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Y**H * B(1:N,1:NRHS) +* + IF( RANK.LT.N ) THEN + CALL ZUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK, + $ N-RANK, A, LDA, WORK( MN+1 ), B, LDB, + $ WORK( 2*MN+1 ), LWORK-2*MN, INFO ) + END IF +* +* complex workspace: 2*MN+NRHS. +* +* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) +* + DO 60 J = 1, NRHS + DO 50 I = 1, N + WORK( JPVT( I ) ) = B( I, J ) + 50 CONTINUE + CALL ZCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 ) + 60 CONTINUE +* +* complex workspace: N. +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO ) + CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO ) + END IF +* + 70 CONTINUE + WORK( 1 ) = DCMPLX( LWKOPT ) +* + RETURN +* +* End of ZGELSY +* + END diff --git a/dspl/liblapack/SRC/zgemlq.f b/dspl/liblapack/SRC/zgemlq.f new file mode 100644 index 0000000..aa07e0f --- /dev/null +++ b/dspl/liblapack/SRC/zgemlq.f @@ -0,0 +1,282 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( * ), C(LDC, * ), WORK( * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMLQ overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> where Q is a complex unitary matrix defined as the product +*> of blocked elementary reflectors computed by short wide +*> LQ factorization (ZGELQ) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> Part of the data structure to represent Q as returned by ZGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by ZGELQ. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> ZLASWLQ or ZGELQT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, ZGELQ will use either +*> ZLASWLQ (if the matrix is wide-and-short) or ZGELQT to compute +*> the LQ factorization. +*> This version of ZGEMLQ will use either ZLAMSWLQ or ZGEMLQT to +*> multiply matrix Q by another matrix. +*> Further Details in ZLAMSWLQ or ZGEMLQT. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEMLQ( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLAMSWLQ, ZGEMLQT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * MB + MN = M + ELSE + LW = M * MB + MN = N + END IF +* + IF( ( NB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, NB - K ) .EQ. 0 ) THEN + NBLCKS = ( MN - K ) / ( NB - K ) + ELSE + NBLCKS = ( MN - K ) / ( NB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( NB.LE.K ) .OR. ( NB.GE.MAX( M, N, K ) ) ) THEN + CALL ZGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T( 6 ), MB, C, LDC, WORK, INFO ) + ELSE + CALL ZLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ MB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of ZGEMLQ +* + END diff --git a/dspl/liblapack/SRC/zgemlqt.f b/dspl/liblapack/SRC/zgemlqt.f new file mode 100644 index 0000000..6a4175e --- /dev/null +++ b/dspl/liblapack/SRC/zgemlqt.f @@ -0,0 +1,289 @@ +*> \brief \b ZGEMLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGEMLQT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMLQT overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'C': Q**H C C Q**H +*> +*> where Q is a complex orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**H +*> +*> generated using the compact WY representation as returned by ZGELQT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DGELQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,M) if SIDE = 'L', +*> (LDV,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DGELQT in the first K rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,K). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DGELQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. The dimension of +*> WORK is N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE ZGEMLQT( SIDE, TRANS, M, N, K, MB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, MB, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0) THEN + INFO = -5 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL ZLARFB( 'L', 'C', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + CALL ZLARFB( 'R', 'N', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL ZLARFB( 'L', 'N', 'F', 'R', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + CALL ZLARFB( 'R', 'C', 'F', 'R', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of ZGEMLQT +* + END diff --git a/dspl/liblapack/SRC/zgemqr.f b/dspl/liblapack/SRC/zgemqr.f new file mode 100644 index 0000000..32f1bf4 --- /dev/null +++ b/dspl/liblapack/SRC/zgemqr.f @@ -0,0 +1,285 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, +* $ TSIZE, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, LDT, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMQR overwrites the general real M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'T': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (ZGEQR) +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**T from the Left; +*> = 'R': apply Q or Q**T from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'T': Transpose, apply Q**T. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> Part of the data structure to represent Q as returned by ZGEQR. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (MAX(5,TSIZE)). +*> Part of the data structure to represent Q as returned by ZGEQR. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> The dimension of the array T. TSIZE >= 5. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1, then a workspace query is assumed. The routine +*> only calculates the size of the WORK array, returns this +*> value as WORK(1), and no error message related to WORK +*> is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> ZLATSQR or ZGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, ZGEQR will use either +*> ZLATSQR (if the matrix is tall-and-skinny) or ZGEQRT to compute +*> the QR factorization. +*> This version of ZGEMQR will use either ZLAMTSQR or ZGEMQRT to +*> multiply matrix Q by another matrix. +*> Further Details in ZLAMTSQR or ZGEMQRT. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEMQR( SIDE, TRANS, M, N, K, A, LDA, T, TSIZE, + $ C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, TSIZE, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER MB, NB, LW, NBLCKS, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMQRT, ZLAMTSQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, MOD +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.EQ.-1 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) +* + MB = INT( T( 2 ) ) + NB = INT( T( 3 ) ) + IF( LEFT ) THEN + LW = N * NB + MN = M + ELSE + LW = MB * NB + MN = N + END IF +* + IF( ( MB.GT.K ) .AND. ( MN.GT.K ) ) THEN + IF( MOD( MN - K, MB - K ).EQ.0 ) THEN + NBLCKS = ( MN - K ) / ( MB - K ) + ELSE + NBLCKS = ( MN - K ) / ( MB - K ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.MN ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, MN ) ) THEN + INFO = -7 + ELSE IF( TSIZE.LT.5 ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( ( LWORK.LT.MAX( 1, LW ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N, K ).EQ.0 ) THEN + RETURN + END IF +* + IF( ( LEFT .AND. M.LE.K ) .OR. ( RIGHT .AND. N.LE.K ) + $ .OR. ( MB.LE.K ) .OR. ( MB.GE.MAX( M, N, K ) ) ) THEN + CALL ZGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, INFO ) + ELSE + CALL ZLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T( 6 ), + $ NB, C, LDC, WORK, LWORK, INFO ) + END IF +* + WORK( 1 ) = LW +* + RETURN +* +* End of ZGEMQR +* + END diff --git a/dspl/liblapack/SRC/zgemqrt.f b/dspl/liblapack/SRC/zgemqrt.f new file mode 100644 index 0000000..7ceb77f --- /dev/null +++ b/dspl/liblapack/SRC/zgemqrt.f @@ -0,0 +1,291 @@ +*> \brief \b ZGEMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, +* C, LDC, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. +* COMPLEX*16 V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMQRT overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q C C Q +*> TRANS = 'C': Q**H C C Q**H +*> +*> where Q is a complex orthogonal matrix defined as the product of K +*> elementary reflectors: +*> +*> Q = H(1) H(2) . . . H(K) = I - V T V**H +*> +*> generated using the compact WY representation as returned by ZGEQRT. +*> +*> Q is of order M if SIDE = 'L' and of order N if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CGEQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CGEQRT in the first K columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CGEQRT, stored as a NB-by-N matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q C, Q**H C, C Q**H or C Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGEMQRT( SIDE, TRANS, M, N, K, NB, V, LDV, T, LDT, + $ C, LDC, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDC, M, N, NB, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 V( LDV, * ), C( LDC, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, LDWORK, KF, Q +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF( LEFT ) THEN + LDWORK = MAX( 1, N ) + Q = M + ELSE IF ( RIGHT ) THEN + LDWORK = MAX( 1, M ) + Q = N + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.Q ) THEN + INFO = -5 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0)) THEN + INFO = -6 + ELSE IF( LDV.LT.MAX( 1, Q ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL ZLARFB( 'L', 'C', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + CALL ZLARFB( 'R', 'N', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL ZLARFB( 'L', 'N', 'F', 'C', M-I+1, N, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( I, 1 ), LDC, WORK, LDWORK ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + CALL ZLARFB( 'R', 'C', 'F', 'C', M, N-I+1, IB, + $ V( I, I ), LDV, T( 1, I ), LDT, + $ C( 1, I ), LDC, WORK, LDWORK ) + END DO +* + END IF +* + RETURN +* +* End of ZGEMQRT +* + END diff --git a/dspl/liblapack/SRC/zgeql2.f b/dspl/liblapack/SRC/zgeql2.f new file mode 100644 index 0000000..90008c4 --- /dev/null +++ b/dspl/liblapack/SRC/zgeql2.f @@ -0,0 +1,192 @@ +*> \brief \b ZGEQL2 computes the QL factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQL2 computes a QL factorization of a complex m by n matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the m by n lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQL2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(1:m-k+i-1,n-k+i) +* + ALPHA = A( M-K+I, N-K+I ) + CALL ZLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) ) +* +* Apply H(i)**H to A(1:m-k+i,1:n-k+i-1) from the left +* + A( M-K+I, N-K+I ) = ONE + CALL ZLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, + $ DCONJG( TAU( I ) ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + 10 CONTINUE + RETURN +* +* End of ZGEQL2 +* + END diff --git a/dspl/liblapack/SRC/zgeqlf.f b/dspl/liblapack/SRC/zgeqlf.f new file mode 100644 index 0000000..82bb9d1 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqlf.f @@ -0,0 +1,287 @@ +*> \brief \b ZGEQLF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQLF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQLF computes a QL factorization of a complex M-by-N matrix A: +*> A = Q * L. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m >= n, the lower triangle of the subarray +*> A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L; +*> if m <= n, the elements on and below the (n-m)-th +*> superdiagonal contain the M-by-N lower trapezoidal matrix L; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of elementary reflectors +*> (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in +*> A(1:m-k+i-1,n-k+i), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQL2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZGEQLF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQLF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQLF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQLF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the QL factorization of the current block +* A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1) +* + CALL ZGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL ZGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQLF +* + END diff --git a/dspl/liblapack/SRC/zgeqp3.f b/dspl/liblapack/SRC/zgeqp3.f new file mode 100644 index 0000000..ea069ee --- /dev/null +++ b/dspl/liblapack/SRC/zgeqp3.f @@ -0,0 +1,372 @@ +*> \brief \b ZGEQP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQP3 computes a QR factorization with column pivoting of a +*> matrix A: A*P = Q*R using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of the array contains the +*> min(M,N)-by-N upper trapezoidal matrix R; the elements below +*> the diagonal, together with the array TAU, represent the +*> unitary matrix Q as a product of min(M,N) elementary +*> reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(J).ne.0, the J-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(J)=0, +*> the J-th column of A is a free column. +*> On exit, if JPVT(J)=K, then the J-th column of A*P was the +*> the K-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N+1. +*> For optimal performance LWORK >= ( N+1 )*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a real/complex vector +*> with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in +*> A(i+1:m,i), and tau in TAU(i). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> +* ===================================================================== + SUBROUTINE ZGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER INB, INBMIN, IXOVER + PARAMETER ( INB = 1, INBMIN = 2, IXOVER = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB, + $ NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZLAQP2, ZLAQPS, ZSWAP, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + DOUBLE PRECISION DZNRM2 + EXTERNAL ILAENV, DZNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* ==================== +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + MINMN = MIN( M, N ) + IF( MINMN.EQ.0 ) THEN + IWS = 1 + LWKOPT = 1 + ELSE + IWS = N + 1 + NB = ILAENV( INB, 'ZGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = ( N + 1 )*NB + END IF + WORK( 1 ) = DCMPLX( LWKOPT ) +* + IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQP3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Move initial columns up front. +* + NFXD = 1 + DO 10 J = 1, N + IF( JPVT( J ).NE.0 ) THEN + IF( J.NE.NFXD ) THEN + CALL ZSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 ) + JPVT( J ) = JPVT( NFXD ) + JPVT( NFXD ) = J + ELSE + JPVT( J ) = J + END IF + NFXD = NFXD + 1 + ELSE + JPVT( J ) = J + END IF + 10 CONTINUE + NFXD = NFXD - 1 +* +* Factorize fixed columns +* ======================= +* +* Compute the QR factorization of fixed columns and update +* remaining columns. +* + IF( NFXD.GT.0 ) THEN + NA = MIN( M, NFXD ) +*CC CALL ZGEQR2( M, NA, A, LDA, TAU, WORK, INFO ) + CALL ZGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + IF( NA.LT.N ) THEN +*CC CALL ZUNM2R( 'Left', 'Conjugate Transpose', M, N-NA, +*CC $ NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK, +*CC $ INFO ) + CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A, + $ LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK, + $ INFO ) + IWS = MAX( IWS, INT( WORK( 1 ) ) ) + END IF + END IF +* +* Factorize free columns +* ====================== +* + IF( NFXD.LT.MINMN ) THEN +* + SM = M - NFXD + SN = N - NFXD + SMINMN = MINMN - NFXD +* +* Determine the block size. +* + NB = ILAENV( INB, 'ZGEQRF', ' ', SM, SN, -1, -1 ) + NBMIN = 2 + NX = 0 +* + IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( IXOVER, 'ZGEQRF', ' ', SM, SN, -1, + $ -1 ) ) +* +* + IF( NX.LT.SMINMN ) THEN +* +* Determine if workspace is large enough for blocked code. +* + MINWS = ( SN+1 )*NB + IWS = MAX( IWS, MINWS ) + IF( LWORK.LT.MINWS ) THEN +* +* Not enough workspace to use optimal NB: Reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / ( SN+1 ) + NBMIN = MAX( 2, ILAENV( INBMIN, 'ZGEQRF', ' ', SM, SN, + $ -1, -1 ) ) +* +* + END IF + END IF + END IF +* +* Initialize partial column norms. The first N elements of work +* store the exact column norms. +* + DO 20 J = NFXD + 1, N + RWORK( J ) = DZNRM2( SM, A( NFXD+1, J ), 1 ) + RWORK( N+J ) = RWORK( J ) + 20 CONTINUE +* + IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND. + $ ( NX.LT.SMINMN ) ) THEN +* +* Use blocked code initially. +* + J = NFXD + 1 +* +* Compute factorization: while loop. +* +* + TOPBMN = MINMN - NX + 30 CONTINUE + IF( J.LE.TOPBMN ) THEN + JB = MIN( NB, TOPBMN-J+1 ) +* +* Factorize JB columns among columns J:N. +* + CALL ZLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA, + $ JPVT( J ), TAU( J ), RWORK( J ), + $ RWORK( N+J ), WORK( 1 ), WORK( JB+1 ), + $ N-J+1 ) +* + J = J + FJB + GO TO 30 + END IF + ELSE + J = NFXD + 1 + END IF +* +* Use unblocked code to factor the last or only block. +* +* + IF( J.LE.MINMN ) + $ CALL ZLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ), + $ TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) ) +* + END IF +* + WORK( 1 ) = DCMPLX( LWKOPT ) + RETURN +* +* End of ZGEQP3 +* + END diff --git a/dspl/liblapack/SRC/zgeqr.f b/dspl/liblapack/SRC/zgeqr.f new file mode 100644 index 0000000..1aa457f --- /dev/null +++ b/dspl/liblapack/SRC/zgeqr.f @@ -0,0 +1,307 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZGEQR computes a QR factorization of an M-by-N matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R +*> (R is upper triangular if M >= N); +*> the elements below the diagonal are used to store part of the +*> data structure to represent Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (MAX(5,TSIZE)) +*> On exit, if INFO = 0, T(1) returns optimal (or either minimal +*> or optimal, if query is assumed) TSIZE. See TSIZE for details. +*> Remaining T contains part of the data structure used to represent Q. +*> If one wants to apply or construct Q, then one needs to keep T +*> (in addition to A) and pass it to further subroutines. +*> \endverbatim +*> +*> \param[in] TSIZE +*> \verbatim +*> TSIZE is INTEGER +*> If TSIZE >= 5, the dimension of the array T. +*> If TSIZE = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If TSIZE = -1, the routine calculates optimal size of T for the +*> optimum performance and returns this value in T(1). +*> If TSIZE = -2, the routine calculates minimal size of T and +*> returns this value in T(1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. The routine +*> only calculates the sizes of the T and WORK arrays, returns these +*> values as the first entries of the T and WORK arrays, and no error +*> message related to T or WORK is issued by XERBLA. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details +* ==================== +*> +*> \verbatim +*> +*> The goal of the interface is to give maximum freedom to the developers for +*> creating any QR factorization algorithm they wish. The triangular +*> (trapezoidal) R has to be stored in the upper part of A. The lower part of A +*> and the array T can be used to store any relevant information for applying or +*> constructing the Q factor. The WORK array can safely be discarded after exit. +*> +*> Caution: One should not expect the sizes of T and WORK to be the same from one +*> LAPACK implementation to the other, or even from one execution to the other. +*> A workspace query (for T and WORK) is needed at each execution. However, +*> for a given execution, the size of T and WORK are fixed and will not change +*> from one query to the next. +*> +*> \endverbatim +*> +*> \par Further Details particular to this LAPACK implementation: +* ============================================================== +*> +*> \verbatim +*> +*> These details are particular for this LAPACK implementation. Users should not +*> take them for granted. These details may change in the future, and are unlikely not +*> true for another LAPACK implementation. These details are relevant if one wants +*> to try to understand the code. They are not part of the interface. +*> +*> In this version, +*> +*> T(2): row block size (MB) +*> T(3): column block size (NB) +*> T(6:TSIZE): data structure needed for Q, computed by +*> ZLATSQR or ZGEQRT +*> +*> Depending on the matrix dimensions M and N, and row and column +*> block sizes MB and NB returned by ILAENV, ZGEQR will use either +*> ZLATSQR (if the matrix is tall-and-skinny) or ZGEQRT to compute +*> the QR factorization. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQR( M, N, A, LDA, T, TSIZE, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, TSIZE, LWORK +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY, LMINWS, MINT, MINW + INTEGER MB, NB, MINTSZ, NBLCKS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLATSQR, ZGEQRT, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, MOD +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + LQUERY = ( TSIZE.EQ.-1 .OR. TSIZE.EQ.-2 .OR. + $ LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) +* + MINT = .FALSE. + MINW = .FALSE. + IF( TSIZE.EQ.-2 .OR. LWORK.EQ.-2 ) THEN + IF( TSIZE.NE.-1 ) MINT = .TRUE. + IF( LWORK.NE.-1 ) MINW = .TRUE. + END IF +* +* Determine the block size +* + IF( MIN ( M, N ).GT.0 ) THEN + MB = ILAENV( 1, 'ZGEQR ', ' ', M, N, 1, -1 ) + NB = ILAENV( 1, 'ZGEQR ', ' ', M, N, 2, -1 ) + ELSE + MB = M + NB = 1 + END IF + IF( MB.GT.M .OR. MB.LE.N ) MB = M + IF( NB.GT.MIN( M, N ) .OR. NB.LT.1 ) NB = 1 + MINTSZ = N + 5 + IF( MB.GT.N .AND. M.GT.N ) THEN + IF( MOD( M - N, MB - N ).EQ.0 ) THEN + NBLCKS = ( M - N ) / ( MB - N ) + ELSE + NBLCKS = ( M - N ) / ( MB - N ) + 1 + END IF + ELSE + NBLCKS = 1 + END IF +* +* Determine if the workspace size satisfies minimal size +* + LMINWS = .FALSE. + IF( ( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) .OR. LWORK.LT.NB*N ) + $ .AND. ( LWORK.GE.N ) .AND. ( TSIZE.GE.MINTSZ ) + $ .AND. ( .NOT.LQUERY ) ) THEN + IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) ) THEN + LMINWS = .TRUE. + NB = 1 + MB = M + END IF + IF( LWORK.LT.NB*N ) THEN + LMINWS = .TRUE. + NB = 1 + END IF + END IF +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( TSIZE.LT.MAX( 1, NB*N*NBLCKS + 5 ) + $ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN + INFO = -6 + ELSE IF( ( LWORK.LT.MAX( 1, N*NB ) ) .AND. ( .NOT.LQUERY ) + $ .AND. ( .NOT.LMINWS ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( MINT ) THEN + T( 1 ) = MINTSZ + ELSE + T( 1 ) = NB*N*NBLCKS + 5 + END IF + T( 2 ) = MB + T( 3 ) = NB + IF( MINW ) THEN + WORK( 1 ) = MAX( 1, N ) + ELSE + WORK( 1 ) = MAX( 1, NB*N ) + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN( M, N ).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF( ( M.LE.N ) .OR. ( MB.LE.N ) .OR. ( MB.GE.M ) ) THEN + CALL ZGEQRT( M, N, NB, A, LDA, T( 6 ), NB, WORK, INFO ) + ELSE + CALL ZLATSQR( M, N, MB, NB, A, LDA, T( 6 ), NB, WORK, + $ LWORK, INFO ) + END IF +* + WORK( 1 ) = MAX( 1, NB*N ) +* + RETURN +* +* End of ZGEQR +* + END diff --git a/dspl/liblapack/SRC/zgeqr2.f b/dspl/liblapack/SRC/zgeqr2.f new file mode 100644 index 0000000..d2774d7 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqr2.f @@ -0,0 +1,192 @@ +*> \brief \b ZGEQR2 computes the QR factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQR2 computes a QR factorization of a complex m by n matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQR2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL ZLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of ZGEQR2 +* + END diff --git a/dspl/liblapack/SRC/zgeqr2p.f b/dspl/liblapack/SRC/zgeqr2p.f new file mode 100644 index 0000000..0e5e554 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqr2p.f @@ -0,0 +1,195 @@ +*> \brief \b ZGEQR2P computes the QR factorization of a general rectangular matrix with non-negative diagonal elements using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQR2P + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQR2P computes a QR factorization of a complex m by n matrix A: +*> A = Q * R. The diagonal entries of R are real and nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(m,n) by n upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are real and nonnegative; the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQR2P( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZLARFGP +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQR2P', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = 1, K +* +* Generate elementary reflector H(i) to annihilate A(i+1:m,i) +* + CALL ZLARFGP( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ TAU( I ) ) + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(i:m,i+1:n) from the left +* + ALPHA = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, + $ DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK ) + A( I, I ) = ALPHA + END IF + 10 CONTINUE + RETURN +* +* End of ZGEQR2P +* + END diff --git a/dspl/liblapack/SRC/zgeqrf.f b/dspl/liblapack/SRC/zgeqrf.f new file mode 100644 index 0000000..3ea1e71 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqrf.f @@ -0,0 +1,270 @@ +*> \brief \b ZGEQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQRF computes a QR factorization of a complex M-by-N matrix A: +*> A = Q * R. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n); the elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQR2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQRF +* + END diff --git a/dspl/liblapack/SRC/zgeqrfp.f b/dspl/liblapack/SRC/zgeqrfp.f new file mode 100644 index 0000000..cdc4bfa --- /dev/null +++ b/dspl/liblapack/SRC/zgeqrfp.f @@ -0,0 +1,273 @@ +*> \brief \b ZGEQRFP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQRFP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQRFP computes a QR factorization of a complex M-by-N matrix A: +*> A = Q * R. The diagonal entries of R are real and nonnegative. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if m >= n). The diagonal entries of R +*> are real and nonnegative; The elements below the diagonal, +*> with the array TAU, represent the unitary matrix Q as a +*> product of min(m,n) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i), +*> and tau in TAU(i). +*> +*> See Lapack Working Note 203 for details +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQRFP( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQR2P, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRFP', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially +* + DO 10 I = 1, K - NX, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block +* A(i:m,i:i+ib-1) +* + CALL ZGEQR2P( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + ELSE + I = 1 + END IF +* +* Use unblocked code to factor the last or only block. +* + IF( I.LE.K ) + $ CALL ZGEQR2P( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGEQRFP +* + END diff --git a/dspl/liblapack/SRC/zgeqrt.f b/dspl/liblapack/SRC/zgeqrt.f new file mode 100644 index 0000000..60adfec --- /dev/null +++ b/dspl/liblapack/SRC/zgeqrt.f @@ -0,0 +1,218 @@ +*> \brief \b ZGEQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQRT computes a blocked QR factorization of a complex M-by-N matrix A +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. MIN(M,N) >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix R (R is +*> upper triangular if M >= N); the elements below the diagonal +*> are the columns of V. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,MIN(M,N)) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. +*> +*> Let K=MIN(M,N). The number of blocks is B = ceiling(K/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = K - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-K matrix T as +*> +*> T = (T1 T2 ... TB). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, IINFO, K + LOGICAL USE_RECURSIVE_QR + PARAMETER( USE_RECURSIVE_QR=.TRUE. ) +* .. +* .. External Subroutines .. + EXTERNAL ZGEQRT2, ZGEQRT3, ZLARFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NB.LT.1 .OR. ( NB.GT.MIN(M,N) .AND. MIN(M,N).GT.0 ) )THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + K = MIN( M, N ) + IF( K.EQ.0 ) RETURN +* +* Blocked loop of length K +* + DO I = 1, K, NB + IB = MIN( K-I+1, NB ) +* +* Compute the QR factorization of the current block A(I:M,I:I+IB-1) +* + IF( USE_RECURSIVE_QR ) THEN + CALL ZGEQRT3( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + ELSE + CALL ZGEQRT2( M-I+1, IB, A(I,I), LDA, T(1,I), LDT, IINFO ) + END IF + IF( I+IB.LE.N ) THEN +* +* Update by applying H**H to A(I:M,I+IB:N) from the left +* + CALL ZLARFB( 'L', 'C', 'F', 'C', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, WORK , N-I-IB+1 ) + END IF + END DO + RETURN +* +* End of ZGEQRT +* + END diff --git a/dspl/liblapack/SRC/zgeqrt2.f b/dspl/liblapack/SRC/zgeqrt2.f new file mode 100644 index 0000000..bad7084 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqrt2.f @@ -0,0 +1,227 @@ +*> \brief \b ZGEQRT2 computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQRT2 computes a QR factorization of a complex M-by-N matrix A, +*> using the compact WY representation of Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the complex M-by-N matrix A. On exit, the elements on and +*> above the diagonal contain the N-by-N upper triangular matrix R; the +*> elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**H +*> +*> where V**H is the conjugate transpose of V. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEQRT2( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDT, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER( ONE = (1.0D+00,0.0D+00), ZERO = (0.0D+00,0.0D+00) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 AII, ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZGEMV, ZGERC, ZTRMV, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRT2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO I = 1, K +* +* Generate elem. refl. H(i) to annihilate A(i+1:m,i), tau(I) -> T(I,1) +* + CALL ZLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1, + $ T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* Apply H(i) to A(I:M,I+1:N) from the left +* + AII = A( I, I ) + A( I, I ) = ONE +* +* W(1:N-I) := A(I:M,I+1:N)^H * A(I:M,I) [W = T(:,N)] +* + CALL ZGEMV( 'C',M-I+1, N-I, ONE, A( I, I+1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, N ), 1 ) +* +* A(I:M,I+1:N) = A(I:m,I+1:N) + alpha*A(I:M,I)*W(1:N-1)^H +* + ALPHA = -CONJG(T( I, 1 )) + CALL ZGERC( M-I+1, N-I, ALPHA, A( I, I ), 1, + $ T( 1, N ), 1, A( I, I+1 ), LDA ) + A( I, I ) = AII + END IF + END DO +* + DO I = 2, N + AII = A( I, I ) + A( I, I ) = ONE +* +* T(1:I-1,I) := alpha * A(I:M,1:I-1)**H * A(I:M,I) +* + ALPHA = -T( I, 1 ) + CALL ZGEMV( 'C', M-I+1, I-1, ALPHA, A( I, 1 ), LDA, + $ A( I, I ), 1, ZERO, T( 1, I ), 1 ) + A( I, I ) = AII +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL ZTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1) = ZERO + END DO + +* +* End of ZGEQRT2 +* + END diff --git a/dspl/liblapack/SRC/zgeqrt3.f b/dspl/liblapack/SRC/zgeqrt3.f new file mode 100644 index 0000000..e0f2281 --- /dev/null +++ b/dspl/liblapack/SRC/zgeqrt3.f @@ -0,0 +1,257 @@ +*> \brief \b ZGEQRT3 recursively computes a QR factorization of a general real or complex matrix using the compact WY representation of Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGEQRT3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE ZGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEQRT3 recursively computes a QR factorization of a complex M-by-N +*> matrix A, using the compact WY representation of Q. +*> +*> Based on the algorithm of Elmroth and Gustavson, +*> IBM J. Res. Develop. Vol 44 No. 4 July 2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= N. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the complex M-by-N matrix A. On exit, the elements on +*> and above the diagonal contain the N-by-N upper triangular matrix R; +*> the elements below the diagonal are the columns of V. See below for +*> further details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The N-by-N upper triangular factor of the block reflector. +*> The elements on and above the diagonal contain the block +*> reflector T; the elements below the diagonal are not used. +*> See below for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix V stores the elementary reflectors H(i) in the i-th column +*> below the diagonal. For example, if M=5 and N=3, the matrix V is +*> +*> V = ( 1 ) +*> ( v1 1 ) +*> ( v1 v2 1 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> where the vi's represent the vectors which define H(i), which are returned +*> in the matrix A. The 1's along the diagonal of V are not stored in A. The +*> block reflector H is then given by +*> +*> H = I - V * T * V**H +*> +*> where V**H is the conjugate transpose of V. +*> +*> For details of the algorithm, see Elmroth and Gustavson (cited above). +*> \endverbatim +*> +* ===================================================================== + RECURSIVE SUBROUTINE ZGEQRT3( M, N, A, LDA, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D+00,0.0D+00) ) +* .. +* .. Local Scalars .. + INTEGER I, I1, J, J1, N1, N2, IINFO +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZTRMM, ZGEMM, XERBLA +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N .LT. 0 ) THEN + INFO = -2 + ELSE IF( M .LT. N ) THEN + INFO = -1 + ELSE IF( LDA .LT. MAX( 1, M ) ) THEN + INFO = -4 + ELSE IF( LDT .LT. MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGEQRT3', -INFO ) + RETURN + END IF +* + IF( N.EQ.1 ) THEN +* +* Compute Householder transform when N=1 +* + CALL ZLARFG( M, A(1,1), A( MIN( 2, M ), 1 ), 1, T(1,1) ) +* + ELSE +* +* Otherwise, split A into blocks... +* + N1 = N/2 + N2 = N-N1 + J1 = MIN( N1+1, N ) + I1 = MIN( N+1, M ) +* +* Compute A(1:M,1:N1) <- (Y1,R1,T1), where Q1 = I - Y1 T1 Y1^H +* + CALL ZGEQRT3( M, N1, A, LDA, T, LDT, IINFO ) +* +* Compute A(1:M,J1:N) = Q1^H A(1:M,J1:N) [workspace: T(1:N1,J1:N)] +* + DO J=1,N2 + DO I=1,N1 + T( I, J+N1 ) = A( I, J+N1 ) + END DO + END DO + CALL ZTRMM( 'L', 'L', 'C', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + CALL ZGEMM( 'C', 'N', N1, N2, M-N1, ONE, A( J1, 1 ), LDA, + & A( J1, J1 ), LDA, ONE, T( 1, J1 ), LDT) +* + CALL ZTRMM( 'L', 'U', 'C', 'N', N1, N2, ONE, + & T, LDT, T( 1, J1 ), LDT ) +* + CALL ZGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( J1, 1 ), LDA, + & T( 1, J1 ), LDT, ONE, A( J1, J1 ), LDA ) +* + CALL ZTRMM( 'L', 'L', 'N', 'U', N1, N2, ONE, + & A, LDA, T( 1, J1 ), LDT ) +* + DO J=1,N2 + DO I=1,N1 + A( I, J+N1 ) = A( I, J+N1 ) - T( I, J+N1 ) + END DO + END DO +* +* Compute A(J1:M,J1:N) <- (Y2,R2,T2) where Q2 = I - Y2 T2 Y2^H +* + CALL ZGEQRT3( M-N1, N2, A( J1, J1 ), LDA, + & T( J1, J1 ), LDT, IINFO ) +* +* Compute T3 = T(1:N1,J1:N) = -T1 Y1^H Y2 T2 +* + DO I=1,N1 + DO J=1,N2 + T( I, J+N1 ) = CONJG(A( J+N1, I )) + END DO + END DO +* + CALL ZTRMM( 'R', 'L', 'N', 'U', N1, N2, ONE, + & A( J1, J1 ), LDA, T( 1, J1 ), LDT ) +* + CALL ZGEMM( 'C', 'N', N1, N2, M-N, ONE, A( I1, 1 ), LDA, + & A( I1, J1 ), LDA, ONE, T( 1, J1 ), LDT ) +* + CALL ZTRMM( 'L', 'U', 'N', 'N', N1, N2, -ONE, T, LDT, + & T( 1, J1 ), LDT ) +* + CALL ZTRMM( 'R', 'U', 'N', 'N', N1, N2, ONE, + & T( J1, J1 ), LDT, T( 1, J1 ), LDT ) +* +* Y = (Y1,Y2); R = [ R1 A(1:N1,J1:N) ]; T = [T1 T3] +* [ 0 R2 ] [ 0 T2] +* + END IF +* + RETURN +* +* End of ZGEQRT3 +* + END diff --git a/dspl/liblapack/SRC/zgerfs.f b/dspl/liblapack/SRC/zgerfs.f new file mode 100644 index 0000000..4d09922 --- /dev/null +++ b/dspl/liblapack/SRC/zgerfs.f @@ -0,0 +1,448 @@ +*> \brief \b ZGERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGERFS improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates for +*> the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from ZGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGETRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, + $ 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(op(A))*abs(X) + abs(B). +* + IF( NOTRAN ) THEN + DO 50 K = 1, N + XK = CABS1( X( K, J ) ) + DO 40 I = 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 40 CONTINUE + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + DO 60 I = 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N, + $ INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZGERFS +* + END diff --git a/dspl/liblapack/SRC/zgerfsx.f b/dspl/liblapack/SRC/zgerfsx.f new file mode 100644 index 0000000..5aabe50 --- /dev/null +++ b/dspl/liblapack/SRC/zgerfsx.f @@ -0,0 +1,734 @@ +*> \brief \b ZGERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ), WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGERFSX improves the computed solution to a system of linear +*> equations and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED, R +*> and C below. In this case, the solution and error bounds returned +*> are for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The original N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from ZGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. +*> If R is accessed, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. +*> If C is accessed, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ R, C, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ), WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL ROWEQU, COLEQU, NOTRAN + INTEGER J, TRANS_TYPE, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGECON, ZLA_GERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILATRANS, ILAPREC + EXTERNAL DLAMCH, ZLANGE, ZLA_GERCOND_X, ZLA_GERCOND_C + DOUBLE PRECISION DLAMCH, ZLANGE, ZLA_GERCOND_X, ZLA_GERCOND_C + LOGICAL LSAME + INTEGER ILATRANS, ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + TRANS_TYPE = ILATRANS( TRANS ) + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS(LA_LINRX_ITHRESH_I) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + NOTRAN = LSAME( TRANS, 'N' ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) +* +* Test input parameters. +* + IF( TRANS_TYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( .NOT.ROWEQU .AND. .NOT.COLEQU .AND. + $ .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + IF( NOTRAN ) THEN + NORM = 'I' + ELSE + NORM = '1' + END IF + ANORM = ZLANGE( NORM, N, N, A, LDA, RWORK ) + CALL ZGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + IF ( NOTRAN ) THEN + CALL ZLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + ELSE + CALL ZLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, ROWEQU, R, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), + $ RCOND, ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( COLEQU .AND. NOTRAN ) THEN + RCOND_TMP = ZLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ C, .TRUE., INFO, WORK, RWORK ) + ELSE IF ( ROWEQU .AND. .NOT. NOTRAN ) THEN + RCOND_TMP = ZLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ R, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = ZLA_GERCOND_C( TRANS, N, A, LDA, AF, LDAF, IPIV, + $ C, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF (ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = ZLA_GERCOND_X( TRANS, N, A, LDA, AF, LDAF, + $ IPIV, X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of ZGERFSX +* + END diff --git a/dspl/liblapack/SRC/zgerq2.f b/dspl/liblapack/SRC/zgerq2.f new file mode 100644 index 0000000..73c1e53 --- /dev/null +++ b/dspl/liblapack/SRC/zgerq2.f @@ -0,0 +1,194 @@ +*> \brief \b ZGERQ2 computes the RQ factorization of a general rectangular matrix using an unblocked algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGERQ2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGERQ2 computes an RQ factorization of a complex m by n matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the m by m upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the m by n upper trapezoidal matrix R; the remaining +*> elements, with the array TAU, represent the unitary matrix +*> Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +*> exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, K + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZLARFG +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERQ2', -INFO ) + RETURN + END IF +* + K = MIN( M, N ) +* + DO 10 I = K, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* A(m-k+i,1:n-k+i-1) +* + CALL ZLACGV( N-K+I, A( M-K+I, 1 ), LDA ) + ALPHA = A( M-K+I, N-K+I ) + CALL ZLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, TAU( I ) ) +* +* Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right +* + A( M-K+I, N-K+I ) = ONE + CALL ZLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA, + $ TAU( I ), A, LDA, WORK ) + A( M-K+I, N-K+I ) = ALPHA + CALL ZLACGV( N-K+I-1, A( M-K+I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZGERQ2 +* + END diff --git a/dspl/liblapack/SRC/zgerqf.f b/dspl/liblapack/SRC/zgerqf.f new file mode 100644 index 0000000..ebc7d38 --- /dev/null +++ b/dspl/liblapack/SRC/zgerqf.f @@ -0,0 +1,287 @@ +*> \brief \b ZGERQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGERQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGERQF computes an RQ factorization of a complex M-by-N matrix A: +*> A = R * Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if m <= n, the upper triangle of the subarray +*> A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R; +*> if m >= n, the elements on and above the (m-n)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; +*> the remaining elements, with the array TAU, represent the +*> unitary matrix Q as a product of min(m,n) elementary +*> reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H, where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on +*> exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT, + $ MU, NB, NBMIN, NU, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERQ2, ZLARFB, ZLARFT +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + K = MIN( M, N ) + IF( K.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGERQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( K.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* + DO 10 I = K - KK + KI + 1, K - KK + 1, -NB + IB = MIN( K-I+1, NB ) +* +* Compute the RQ factorization of the current block +* A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1) +* + CALL ZGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) + IF( M-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL ZLARFB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', M-K+I-1, N-K+I+IB-1, IB, + $ A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 10 CONTINUE + MU = M - K + I + NB - 1 + NU = N - K + I + NB - 1 + ELSE + MU = M + NU = N + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 .AND. NU.GT.0 ) + $ CALL ZGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO ) +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGERQF +* + END diff --git a/dspl/liblapack/SRC/zgesc2.f b/dspl/liblapack/SRC/zgesc2.f new file mode 100644 index 0000000..72ef99d --- /dev/null +++ b/dspl/liblapack/SRC/zgesc2.f @@ -0,0 +1,202 @@ +*> \brief \b ZGESC2 solves a system of linear equations using the LU factorization with complete pivoting computed by sgetc2. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* .. Scalar Arguments .. +* INTEGER LDA, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX*16 A( LDA, * ), RHS( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESC2 solves a system of linear equations +*> +*> A * X = scale* RHS +*> +*> with a general N-by-N matrix A using the LU factorization with +*> complete pivoting computed by ZGETC2. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix A computed by ZGETC2: A = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is COMPLEX*16 array, dimension N. +*> On entry, the right hand side vector b. +*> On exit, the solution vector X. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, SCALE contains the scale factor. SCALE is chosen +*> 0 <= SCALE <= 1 to prevent owerflow in the solution. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16GEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 A( LDA, * ), RHS( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION BIGNUM, EPS, SMLNUM + COMPLEX*16 TEMP +* .. +* .. External Subroutines .. + EXTERNAL ZLASWP, ZSCAL, DLABAD +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL IZAMAX, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX +* .. +* .. Executable Statements .. +* +* Set constant to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Apply permutations IPIV to RHS +* + CALL ZLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 ) +* +* Solve for L part +* + DO 20 I = 1, N - 1 + DO 10 J = I + 1, N + RHS( J ) = RHS( J ) - A( J, I )*RHS( I ) + 10 CONTINUE + 20 CONTINUE +* +* Solve for U part +* + SCALE = ONE +* +* Check for scaling +* + I = IZAMAX( N, RHS, 1 ) + IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN + TEMP = DCMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) ) + CALL ZSCAL( N, TEMP, RHS( 1 ), 1 ) + SCALE = SCALE*DBLE( TEMP ) + END IF + DO 40 I = N, 1, -1 + TEMP = DCMPLX( ONE, ZERO ) / A( I, I ) + RHS( I ) = RHS( I )*TEMP + DO 30 J = I + 1, N + RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP ) + 30 CONTINUE + 40 CONTINUE +* +* Apply permutations JPIV to the solution (RHS) +* + CALL ZLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 ) + RETURN +* +* End of ZGESC2 +* + END diff --git a/dspl/liblapack/SRC/zgesdd.f b/dspl/liblapack/SRC/zgesdd.f new file mode 100644 index 0000000..bb9d2c2 --- /dev/null +++ b/dspl/liblapack/SRC/zgesdd.f @@ -0,0 +1,2220 @@ +*> \brief \b ZGESDD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESDD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), S( * ) +* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESDD computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors, by using divide-and-conquer method. The SVD is written +*> +*> A = U * SIGMA * conjugate-transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns VT = V**H, not V. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U and all N rows of V**H are +*> returned in the arrays U and VT; +*> = 'S': the first min(M,N) columns of U and the first +*> min(M,N) rows of V**H are returned in the arrays U +*> and VT; +*> = 'O': If M >= N, the first N columns of U are overwritten +*> in the array A and all rows of V**H are returned in +*> the array VT; +*> otherwise, all columns of U are returned in the +*> array U and the first M rows of V**H are overwritten +*> in the array A; +*> = 'N': no columns of U or rows of V**H are computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBZ = 'O', A is overwritten with the first N columns +*> of U (the left singular vectors, stored +*> columnwise) if M >= N; +*> A is overwritten with the first M rows +*> of V**H (the right singular vectors, stored +*> rowwise) otherwise. +*> if JOBZ .ne. 'O', the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,UCOL) +*> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; +*> UCOL = min(M,N) if JOBZ = 'S'. +*> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M +*> unitary matrix U; +*> if JOBZ = 'S', U contains the first min(M,N) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; +*> if JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX*16 array, dimension (LDVT,N) +*> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the +*> N-by-N unitary matrix V**H; +*> if JOBZ = 'S', VT contains the first min(M,N) rows of +*> V**H (the right singular vectors, stored rowwise); +*> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; +*> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; +*> if JOBZ = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> If LWORK = -1, a workspace query is assumed. The optimal +*> size for the WORK array is calculated and stored in WORK(1), +*> and no other work except argument checking is performed. +*> +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LWORK >= 2*mn + mx. +*> If JOBZ = 'O', LWORK >= 2*mn*mn + 2*mn + mx. +*> If JOBZ = 'S', LWORK >= mn*mn + 3*mn. +*> If JOBZ = 'A', LWORK >= mn*mn + 2*mn + mx. +*> These are not tight minimums in all cases; see comments inside code. +*> For good performance, LWORK should generally be larger; +*> a query is recommended. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> Let mx = max(M,N) and mn = min(M,N). +*> If JOBZ = 'N', LRWORK >= 5*mn (LAPACK <= 3.6 needs 7*mn); +*> else if mx >> mn, LRWORK >= 5*mn*mn + 5*mn; +*> else LRWORK >= max( 5*mn*mn + 5*mn, +*> 2*mx*mn + 2*mn*mn + mn ). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (8*min(M,N)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The updating process of DBDSDC did not converge. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE ZGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, + $ WORK, LWORK, RWORK, IWORK, INFO ) + implicit none +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS + INTEGER BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT, + $ ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT, + $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK, + $ MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL + INTEGER LWORK_ZGEBRD_MN, LWORK_ZGEBRD_MM, + $ LWORK_ZGEBRD_NN, LWORK_ZGELQF_MN, + $ LWORK_ZGEQRF_MN, + $ LWORK_ZUNGBR_P_MN, LWORK_ZUNGBR_P_NN, + $ LWORK_ZUNGBR_Q_MN, LWORK_ZUNGBR_Q_MM, + $ LWORK_ZUNGLQ_MN, LWORK_ZUNGLQ_NN, + $ LWORK_ZUNGQR_MM, LWORK_ZUNGQR_MN, + $ LWORK_ZUNMBR_PRC_MM, LWORK_ZUNMBR_QLN_MM, + $ LWORK_ZUNMBR_PRC_MN, LWORK_ZUNMBR_QLN_MN, + $ LWORK_ZUNMBR_PRC_NN, LWORK_ZUNMBR_QLN_NN + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DUM( 1 ) + COMPLEX*16 CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DBDSDC, DLASCL, XERBLA, ZGEBRD, ZGELQF, ZGEMM, + $ ZGEQRF, ZLACP2, ZLACPY, ZLACRM, ZLARCM, ZLASCL, + $ ZLASET, ZUNGBR, ZUNGLQ, ZUNGQR, ZUNMBR +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + MNTHR1 = INT( MINMN*17.0D0 / 9.0D0 ) + MNTHR2 = INT( MINMN*5.0D0 / 3.0D0 ) + WNTQA = LSAME( JOBZ, 'A' ) + WNTQS = LSAME( JOBZ, 'S' ) + WNTQAS = WNTQA .OR. WNTQS + WNTQO = LSAME( JOBZ, 'O' ) + WNTQN = LSAME( JOBZ, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + MINWRK = 1 + MAXWRK = 1 +* + IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR. + $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN + INFO = -8 + ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR. + $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR. + $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN + INFO = -10 + END IF +* +* Compute workspace +* Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace allocated at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD (dbdsdc) is +* BDSPAC = 3*N*N + 4*N for singular values and vectors; +* BDSPAC = 4*N for singular values only; +* not including e, RU, and RVT matrices. +* +* Compute space preferred for each routine + CALL ZGEBRD( M, N, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD_MN = INT( CDUM(1) ) +* + CALL ZGEBRD( N, N, CDUM(1), N, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD_NN = INT( CDUM(1) ) +* + CALL ZGEQRF( M, N, CDUM(1), M, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEQRF_MN = INT( CDUM(1) ) +* + CALL ZUNGBR( 'P', N, N, N, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_P_NN = INT( CDUM(1) ) +* + CALL ZUNGBR( 'Q', M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_Q_MM = INT( CDUM(1) ) +* + CALL ZUNGBR( 'Q', M, N, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_Q_MN = INT( CDUM(1) ) +* + CALL ZUNGQR( M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGQR_MM = INT( CDUM(1) ) +* + CALL ZUNGQR( M, N, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGQR_MN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_PRC_NN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_QLN_MM = INT( CDUM(1) ) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_QLN_MN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_QLN_NN = INT( CDUM(1) ) +* + IF( M.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* + MAXWRK = N + LWORK_ZGEQRF_MN + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZGEBRD_NN ) + MINWRK = 3*N + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* + WRKBL = N + LWORK_ZGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_ZUNGQR_MN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_PRC_NN ) + MAXWRK = M*N + N*N + WRKBL + MINWRK = 2*N*N + 3*N + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* + WRKBL = N + LWORK_ZGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_ZUNGQR_MN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_PRC_NN ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + 3*N + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* + WRKBL = N + LWORK_ZGEQRF_MN + WRKBL = MAX( WRKBL, N + LWORK_ZUNGQR_MM ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZGEBRD_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_QLN_NN ) + WRKBL = MAX( WRKBL, 2*N + LWORK_ZUNMBR_PRC_NN ) + MAXWRK = N*N + WRKBL + MINWRK = N*N + MAX( 3*N, N + M ) + END IF + ELSE IF( M.GE.MNTHR2 ) THEN +* +* Path 5 (M >> N, but not as much as MNTHR1) +* + MAXWRK = 2*N + LWORK_ZGEBRD_MN + MINWRK = 2*N + M + IF( WNTQO ) THEN +* Path 5o (M >> N, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_Q_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN +* Path 5s (M >> N, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_Q_MN ) + ELSE IF( WNTQA ) THEN +* Path 5a (M >> N, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_P_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNGBR_Q_MM ) + END IF + ELSE +* +* Path 6 (M >= N, but not much larger) +* + MAXWRK = 2*N + LWORK_ZGEBRD_MN + MINWRK = 2*N + M + IF( WNTQO ) THEN +* Path 6o (M >= N, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_PRC_NN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_QLN_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + N*N + ELSE IF( WNTQS ) THEN +* Path 6s (M >= N, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_QLN_MN ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_PRC_NN ) + ELSE IF( WNTQA ) THEN +* Path 6a (M >= N, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*N + LWORK_ZUNMBR_PRC_NN ) + END IF + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* There is no complex work space needed for bidiagonal SVD +* The real work space needed for bidiagonal SVD (dbdsdc) is +* BDSPAC = 3*M*M + 4*M for singular values and vectors; +* BDSPAC = 4*M for singular values only; +* not including e, RU, and RVT matrices. +* +* Compute space preferred for each routine + CALL ZGEBRD( M, N, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD_MN = INT( CDUM(1) ) +* + CALL ZGEBRD( M, M, CDUM(1), M, DUM(1), DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD_MM = INT( CDUM(1) ) +* + CALL ZGELQF( M, N, CDUM(1), M, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGELQF_MN = INT( CDUM(1) ) +* + CALL ZUNGBR( 'P', M, N, M, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_P_MN = INT( CDUM(1) ) +* + CALL ZUNGBR( 'P', N, N, M, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_P_NN = INT( CDUM(1) ) +* + CALL ZUNGBR( 'Q', M, M, N, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGBR_Q_MM = INT( CDUM(1) ) +* + CALL ZUNGLQ( M, N, M, CDUM(1), M, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGLQ_MN = INT( CDUM(1) ) +* + CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), + $ -1, IERR ) + LWORK_ZUNGLQ_NN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_PRC_MM = INT( CDUM(1) ) +* + CALL ZUNMBR( 'P', 'R', 'C', M, N, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_PRC_MN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'P', 'R', 'C', N, N, M, CDUM(1), N, CDUM(1), + $ CDUM(1), N, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_PRC_NN = INT( CDUM(1) ) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, CDUM(1), M, CDUM(1), + $ CDUM(1), M, CDUM(1), -1, IERR ) + LWORK_ZUNMBR_QLN_MM = INT( CDUM(1) ) +* + IF( N.GE.MNTHR1 ) THEN + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* + MAXWRK = M + LWORK_ZGELQF_MN + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZGEBRD_MM ) + MINWRK = 3*M + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* + WRKBL = M + LWORK_ZGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_ZUNGLQ_MN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_PRC_MM ) + MAXWRK = M*N + M*M + WRKBL + MINWRK = 2*M*M + 3*M + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* + WRKBL = M + LWORK_ZGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_ZUNGLQ_MN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_PRC_MM ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + 3*M + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* + WRKBL = M + LWORK_ZGELQF_MN + WRKBL = MAX( WRKBL, M + LWORK_ZUNGLQ_NN ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZGEBRD_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_QLN_MM ) + WRKBL = MAX( WRKBL, 2*M + LWORK_ZUNMBR_PRC_MM ) + MAXWRK = M*M + WRKBL + MINWRK = M*M + MAX( 3*M, M + N ) + END IF + ELSE IF( N.GE.MNTHR2 ) THEN +* +* Path 5t (N >> M, but not as much as MNTHR1) +* + MAXWRK = 2*M + LWORK_ZGEBRD_MN + MINWRK = 2*M + N + IF( WNTQO ) THEN +* Path 5to (N >> M, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_P_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN +* Path 5ts (N >> M, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_P_MN ) + ELSE IF( WNTQA ) THEN +* Path 5ta (N >> M, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_Q_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNGBR_P_NN ) + END IF + ELSE +* +* Path 6t (N > M, but not much larger) +* + MAXWRK = 2*M + LWORK_ZGEBRD_MN + MINWRK = 2*M + N + IF( WNTQO ) THEN +* Path 6to (N > M, JOBZ='O') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_PRC_MN ) + MAXWRK = MAXWRK + M*N + MINWRK = MINWRK + M*M + ELSE IF( WNTQS ) THEN +* Path 6ts (N > M, JOBZ='S') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_PRC_MN ) + ELSE IF( WNTQA ) THEN +* Path 6ta (N > M, JOBZ='A') + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_QLN_MM ) + MAXWRK = MAX( MAXWRK, 2*M + LWORK_ZUNMBR_PRC_NN ) + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + END IF + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = MAXWRK + IF( LWORK.LT.MINWRK .AND. .NOT. LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESDD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1 (M >> N, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N [tau] + N [work] +* CWorkspace: prefer N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out below R +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + N +* +* Perform bidiagonal SVD, compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2 (M >> N, JOBZ='O') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + IF( LWORK .GE. M*N + N*N + 3*N ) THEN +* +* WORK(IR) is M by N +* + LDWRKR = M + ELSE + LDWRKR = ( LWORK - N*N - 3*N ) / N + END IF + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK( IR ), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* CWorkspace: need N*N [U] + N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of R in WORK(IRU) and computing right singular vectors +* of R in WORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of R +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by the right singular vectors of R +* CWorkspace: need N*N [U] + N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in WORK(IR) and copying to A +* CWorkspace: need N*N [U] + N*N [R] +* CWorkspace: prefer N*N [U] + M*N [R] +* RWorkspace: need 0 +* + DO 10 I = 1, M, LDWRKR + CHUNK = MIN( M-I+1, LDWRKR ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IU ), LDWRKU, CZERO, + $ WORK( IR ), LDWRKR ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3 (M >> N, JOBZ='S') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IR = 1 +* +* WORK(IR) is N by N +* + LDWRKR = N + ITAU = IR + LDWRKR*N + NWORK = ITAU + N +* +* Compute A=Q*R +* CWorkspace: need N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ), + $ LDWRKR ) +* +* Generate Q in A +* CWorkspace: need N*N [R] + N [tau] + N [work] +* CWorkspace: prefer N*N [R] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of R +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* CWorkspace: need N*N [R] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* CWorkspace: need N*N [R] +* RWorkspace: need 0 +* + CALL ZLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR ) + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ), + $ LDWRKR, CZERO, U, LDU ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4 (M >> N, JOBZ='A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IU = 1 +* +* WORK(IU) is N by N +* + LDWRKU = N + ITAU = IU + LDWRKU*N + NWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* CWorkspace: need N*N [U] + N [tau] + N [work] +* CWorkspace: prefer N*N [U] + N [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* CWorkspace: need N*N [U] + N [tau] + M [work] +* CWorkspace: prefer N*N [U] + N [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce R in A, zeroing out below it +* + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize R in A +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IRU = IE + N + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of R +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of R +* CWorkspace: need N*N [U] + 2*N [tauq, taup] + N [work] +* CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* CWorkspace: need N*N [U] +* RWorkspace: need 0 +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ), + $ LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + END IF +* + ELSE IF( M.GE.MNTHR2 ) THEN +* +* MNTHR2 <= M < MNTHR1 +* +* Path 5 (M >> N, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* ZUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 5n (M >> N, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N +* +* Path 5o (M >> N, JOBZ='O') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate Q in A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK(IU) is LDWRKU by N +* + LDWRKU = ( LWORK - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in WORK(IU), copying to VT +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, + $ WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] +* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] +* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ), + $ N, WORK( IU ), LDWRKU, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 5s (M >> N, JOBZ='S') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) + ELSE +* +* Path 5a (M >> N, JOBZ='A') +* Copy A to VT, generate P**H +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to U, generate Q +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork] +* + CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) + END IF +* + ELSE +* +* M .LT. MNTHR2 +* +* Path 6 (M >= N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* Use ZUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + N + ITAUQ = 1 + ITAUP = ITAUQ + N + NWORK = ITAUP + N +* +* Bidiagonalize A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need N [e] +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 6n (M >= N, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need N [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IU = NWORK + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + IF( LWORK .GE. M*N + 3*N ) THEN +* +* WORK( IU ) is M by N +* + LDWRKU = M + ELSE +* +* WORK( IU ) is LDWRKU by N +* + LDWRKU = ( LWORK - 3*N ) / N + END IF + NWORK = IU + LDWRKU*N +* +* Path 6o (M >= N, JOBZ='O') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*N ) THEN +* +* Path 6o-fast +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by left singular vectors of A, copying +* to A +* CWorkspace: need 2*N [tauq, taup] + M*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IU ), + $ LDWRKU ) + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ), + $ LDWRKU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), WORK( IU ), LDWRKU, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA ) + ELSE +* +* Path 6o-slow +* Generate Q in A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*N [tauq, taup] + N*N [U] +* CWorkspace: prefer 2*N [tauq, taup] + M*N [U] +* RWorkspace: need N [e] + N*N [RU] + 2*N*N [rwork] +* RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here +* + NRWORK = IRVT + DO 30 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA, + $ RWORK( IRU ), N, WORK( IU ), LDWRKU, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 30 CONTINUE + END IF +* + ELSE IF( WNTQS ) THEN +* +* Path 6s (M >= N, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, U, LDU ) + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Path 6a (M >= N, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] + BDSPAC +* + IRU = NRWORK + IRVT = IRU + N*N + NRWORK = IRVT + N*N + CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ), + $ N, RWORK( IRVT ), N, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Set the right corner of U to identity matrix +* + CALL ZLASET( 'F', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.N ) THEN + CALL ZLASET( 'F', M-N, M-N, CZERO, CONE, + $ U( N+1, N+1 ), LDU ) + END IF +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + M [work] +* CWorkspace: prefer 2*N [tauq, taup] + M*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*N [tauq, taup] + N [work] +* CWorkspace: prefer 2*N [tauq, taup] + N*NB [work] +* RWorkspace: need N [e] + N*N [RU] + N*N [RVT] +* + CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR1 ) THEN +* + IF( WNTQN ) THEN +* +* Path 1t (N >> M, JOBZ='N') +* No singular vectors to be computed +* + ITAU = 1 + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M [tau] + M [work] +* CWorkspace: prefer M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Zero out above L +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + NRWORK = IE + M +* +* Perform bidiagonal SVD, compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL DBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) +* + ELSE IF( WNTQO ) THEN +* +* Path 2t (N >> M, JOBZ='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IVT = 1 + LDWKVT = M +* +* WORK(IVT) is M by M +* + IL = IVT + LDWKVT*M + IF( LWORK .GE. M*N + M*M + 3*M ) THEN +* +* WORK(IL) M by N +* + LDWRKL = M + CHUNK = N + ELSE +* +* WORK(IL) is M by CHUNK +* + LDWRKL = M + CHUNK = ( LWORK - M*M - 3*M ) / M + END IF + ITAU = IL + LDWRKL*CHUNK + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing about above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* CWorkspace: need M*M [VT] + M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix WORK(IU) +* Overwrite WORK(IU) by the left singular vectors of L +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by the right singular vectors of L +* CWorkspace: need M*M [VT] + M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IL) by Q +* in A, storing result in WORK(IL) and copying to A +* CWorkspace: need M*M [VT] + M*M [L] +* CWorkspace: prefer M*M [VT] + M*N [L] +* RWorkspace: need 0 +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M, + $ A( 1, I ), LDA, CZERO, WORK( IL ), + $ LDWRKL ) + CALL ZLACPY( 'F', M, BLK, WORK( IL ), LDWRKL, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE IF( WNTQS ) THEN +* +* Path 3t (N >> M, JOBZ='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IL = 1 +* +* WORK(IL) is M by M +* + LDWRKL = M + ITAU = IL + LDWRKL*M + NWORK = ITAU + M +* +* Compute A=L*Q +* CWorkspace: need M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy L to WORK(IL), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IL+LDWRKL ), LDWRKL ) +* +* Generate Q in A +* CWorkspace: need M*M [L] + M [tau] + M [work] +* CWorkspace: prefer M*M [L] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IL) +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by left singular vectors of L +* CWorkspace: need M*M [L] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy VT to WORK(IL), multiply right singular vectors of L +* in WORK(IL) by Q in A, storing result in VT +* CWorkspace: need M*M [L] +* RWorkspace: need 0 +* + CALL ZLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL ) + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL, + $ A, LDA, CZERO, VT, LDVT ) +* + ELSE IF( WNTQA ) THEN +* +* Path 4t (N >> M, JOBZ='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IVT = 1 +* +* WORK(IVT) is M by M +* + LDWKVT = M + ITAU = IVT + LDWKVT*M + NWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* CWorkspace: need M*M [VT] + M [tau] + M [work] +* CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* CWorkspace: need M*M [VT] + M [tau] + N [work] +* CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Produce L in A, zeroing out above it +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize L in A +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RU] + M*M [RVT] + BDSPAC +* + IRU = IE + M + IRVT = IRU + M*M + NRWORK = IRVT + M*M + CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of L +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of L +* CWorkspace: need M*M [VT] + 2*M [tauq, taup] + M [work] +* CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, M, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply right singular vectors of L in WORK(IVT) by +* Q in VT, storing result in A +* CWorkspace: need M*M [VT] +* RWorkspace: need 0 +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT, + $ VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + END IF +* + ELSE IF( N.GE.MNTHR2 ) THEN +* +* MNTHR2 <= N < MNTHR1 +* +* Path 5t (N >> M, but not as much as MNTHR1) +* Reduce to bidiagonal form without QR decomposition, use +* ZUNGBR and matrix multiplication to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) +* + IF( WNTQN ) THEN +* +* Path 5tn (N >> M, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + IVT = NWORK +* +* Path 5to (N >> M, JOBZ='O') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Generate P**H in A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* + LDWKVT = M + IF( LWORK .GE. M*N + 3*M ) THEN +* +* WORK( IVT ) is M by N +* + NWORK = IVT + LDWKVT*N + CHUNK = N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK - 3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRVT) +* storing the result in WORK(IVT), copying to U +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ), + $ LDWKVT, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU ) +* +* Multiply RWORK(IRVT) by P**H in A, storing the +* result in WORK(IVT), copying to A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] +* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] +* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + DO 50 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA, + $ WORK( IVT ), LDWKVT, RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 50 CONTINUE + ELSE IF( WNTQS ) THEN +* +* Path 5ts (N >> M, JOBZ='S') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) + ELSE +* +* Path 5ta (N >> M, JOBZ='A') +* Copy A to U, generate Q +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Copy A to VT, generate P**H +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] +* RWorkspace: need 0 +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Multiply Q in U by real matrix RWORK(IRU), storing the +* result in A, copying to U +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork] +* + CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* +* Multiply real matrix RWORK(IRVT) by P**H in VT, +* storing the result in A, copying to VT +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) + END IF +* + ELSE +* +* N .LT. MNTHR2 +* +* Path 6t (N > M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* Use ZUNMBR to compute singular vectors +* + IE = 1 + NRWORK = IE + M + ITAUQ = 1 + ITAUP = ITAUQ + M + NWORK = ITAUP + M +* +* Bidiagonalize A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work] +* RWorkspace: need M [e] +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1, + $ IERR ) + IF( WNTQN ) THEN +* +* Path 6tn (N > M, JOBZ='N') +* Compute singular values only +* CWorkspace: need 0 +* RWorkspace: need M [e] + BDSPAC +* + CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM,1,DUM,1, + $ DUM, IDUM, RWORK( NRWORK ), IWORK, INFO ) + ELSE IF( WNTQO ) THEN +* Path 6to (N > M, JOBZ='O') + LDWKVT = M + IVT = NWORK + IF( LWORK .GE. M*N + 3*M ) THEN +* +* WORK( IVT ) is M by N +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ), + $ LDWKVT ) + NWORK = IVT + LDWKVT*N + ELSE +* +* WORK( IVT ) is M by CHUNK +* + CHUNK = ( LWORK - 3*M ) / M + NWORK = IVT + LDWKVT*CHUNK + END IF +* +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* + IF( LWORK .GE. M*N + 3*M ) THEN +* +* Path 6to-fast +* Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT) +* Overwrite WORK(IVT) by right singular vectors of A, +* copying to A +* CWorkspace: need 2*M [tauq, taup] + M*N [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ), + $ LDWKVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), WORK( IVT ), LDWKVT, + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) + CALL ZLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA ) + ELSE +* +* Path 6to-slow +* Generate P**H in A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work] +* RWorkspace: need 0 +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( NWORK ), LWORK-NWORK+1, IERR ) +* +* Multiply Q in A by real matrix RWORK(IRU), storing the +* result in WORK(IU), copying to A +* CWorkspace: need 2*M [tauq, taup] + M*M [VT] +* CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] +* RWorkspace: need M [e] + M*M [RVT] + 2*M*M [rwork] +* RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here +* + NRWORK = IRU + DO 60 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), + $ LDA, WORK( IVT ), LDWKVT, + $ RWORK( NRWORK ) ) + CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT, + $ A( 1, I ), LDA ) + 60 CONTINUE + END IF + ELSE IF( WNTQS ) THEN +* +* Path 6ts (N > M, JOBZ='S') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL ZLASET( 'F', M, N, CZERO, CZERO, VT, LDVT ) + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + ELSE +* +* Path 6ta (N > M, JOBZ='A') +* Perform bidiagonal SVD, computing left singular vectors +* of bidiagonal matrix in RWORK(IRU) and computing right +* singular vectors of bidiagonal matrix in RWORK(IRVT) +* CWorkspace: need 0 +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] + BDSPAC +* + IRVT = NRWORK + IRU = IRVT + M*M + NRWORK = IRU + M*M +* + CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ), + $ M, RWORK( IRVT ), M, DUM, IDUM, + $ RWORK( NRWORK ), IWORK, INFO ) +* +* Copy real matrix RWORK(IRU) to complex matrix U +* Overwrite U by left singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + M [work] +* CWorkspace: prefer 2*M [tauq, taup] + M*NB [work] +* RWorkspace: need M [e] + M*M [RVT] + M*M [RU] +* + CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU ) + CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) +* +* Set all of VT to identity matrix +* + CALL ZLASET( 'F', N, N, CZERO, CONE, VT, LDVT ) +* +* Copy real matrix RWORK(IRVT) to complex matrix VT +* Overwrite VT by right singular vectors of A +* CWorkspace: need 2*M [tauq, taup] + N [work] +* CWorkspace: prefer 2*M [tauq, taup] + N*NB [work] +* RWorkspace: need M [e] + M*M [RVT] +* + CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT ) + CALL ZUNMBR( 'P', 'R', 'C', N, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ), + $ LWORK-NWORK+1, IERR ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of ZGESDD +* + END diff --git a/dspl/liblapack/SRC/zgesv.f b/dspl/liblapack/SRC/zgesv.f new file mode 100644 index 0000000..40dd14f --- /dev/null +++ b/dspl/liblapack/SRC/zgesv.f @@ -0,0 +1,179 @@ +*> \brief ZGESV computes the solution to system of linear equations A * X = B for GE matrices (simple driver) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as +*> A = P * L * U, +*> where P is a permutation matrix, L is unit lower triangular, and U is +*> upper triangular. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZGETRF, ZGETRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL ZGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of ZGESV +* + END diff --git a/dspl/liblapack/SRC/zgesvd.f b/dspl/liblapack/SRC/zgesvd.f new file mode 100644 index 0000000..b4dbf03 --- /dev/null +++ b/dspl/liblapack/SRC/zgesvd.f @@ -0,0 +1,3705 @@ +*> \brief ZGESVD computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, +* WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT +* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), S( * ) +* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESVD computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * conjugate-transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> Note that the routine returns V**H, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'A': all M columns of U are returned in array U: +*> = 'S': the first min(m,n) columns of U (the left singular +*> vectors) are returned in the array U; +*> = 'O': the first min(m,n) columns of U (the left singular +*> vectors) are overwritten on the array A; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**H: +*> = 'A': all N rows of V**H are returned in the array VT; +*> = 'S': the first min(m,n) rows of V**H (the right singular +*> vectors) are returned in the array VT; +*> = 'O': the first min(m,n) rows of V**H (the right singular +*> vectors) are overwritten on the array A; +*> = 'N': no rows of V**H (no right singular vectors) are +*> computed. +*> +*> JOBVT and JOBU cannot both be 'O'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> if JOBU = 'O', A is overwritten with the first min(m,n) +*> columns of U (the left singular vectors, +*> stored columnwise); +*> if JOBVT = 'O', A is overwritten with the first min(m,n) +*> rows of V**H (the right singular vectors, +*> stored rowwise); +*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A +*> are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,UCOL) +*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. +*> If JOBU = 'A', U contains the M-by-M unitary matrix U; +*> if JOBU = 'S', U contains the first min(m,n) columns of U +*> (the left singular vectors, stored columnwise); +*> if JOBU = 'N' or 'O', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'S' or 'A', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX*16 array, dimension (LDVT,N) +*> If JOBVT = 'A', VT contains the N-by-N unitary matrix +*> V**H; +*> if JOBVT = 'S', VT contains the first min(m,n) rows of +*> V**H (the right singular vectors, stored rowwise); +*> if JOBVT = 'N' or 'O', VT is not referenced. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N)) +*> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the +*> unconverged superdiagonal elements of an upper bidiagonal +*> matrix B whose diagonal is in S (not necessarily sorted). +*> B satisfies A = U * B * VT, so it has the same singular +*> values as A, and singular vectors related by U and VT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if ZBDSQR did not converge, INFO specifies how many +*> superdiagonals of an intermediate bidiagonal form B +*> did not converge to zero. See the description of RWORK +*> above for details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEsing +* +* ===================================================================== + SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, + $ VT, LDVT, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT + INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS, + $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS + INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL, + $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU, + $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU, + $ NRVT, WRKBL + INTEGER LWORK_ZGEQRF, LWORK_ZUNGQR_N, LWORK_ZUNGQR_M, + $ LWORK_ZGEBRD, LWORK_ZUNGBR_P, LWORK_ZUNGBR_Q, + $ LWORK_ZGELQF, LWORK_ZUNGLQ_N, LWORK_ZUNGLQ_M + DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) + COMPLEX*16 CDUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, XERBLA, ZBDSQR, ZGEBRD, ZGELQF, ZGEMM, + $ ZGEQRF, ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNGLQ, + $ ZUNGQR, ZUNMBR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + MINMN = MIN( M, N ) + WNTUA = LSAME( JOBU, 'A' ) + WNTUS = LSAME( JOBU, 'S' ) + WNTUAS = WNTUA .OR. WNTUS + WNTUO = LSAME( JOBU, 'O' ) + WNTUN = LSAME( JOBU, 'N' ) + WNTVA = LSAME( JOBVT, 'A' ) + WNTVS = LSAME( JOBVT, 'S' ) + WNTVAS = WNTVA .OR. WNTVS + WNTVO = LSAME( JOBVT, 'O' ) + WNTVN = LSAME( JOBVT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR. + $ ( WNTVO .AND. WNTUO ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN + INFO = -9 + ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR. + $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN + INFO = -11 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* CWorkspace refers to complex workspace, and RWorkspace to +* real workspace. NB refers to the optimal block size for the +* immediately following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( M.GE.N .AND. MINMN.GT.0 ) THEN +* +* Space needed for ZBDSQR is BDSPAC = 5*N +* + MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) +* Compute space needed for ZGEQRF + CALL ZGEQRF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEQRF = INT( CDUM(1) ) +* Compute space needed for ZUNGQR + CALL ZUNGQR( M, N, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZUNGQR_N = INT( CDUM(1) ) + CALL ZUNGQR( M, M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZUNGQR_M = INT( CDUM(1) ) +* Compute space needed for ZGEBRD + CALL ZGEBRD( N, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD = INT( CDUM(1) ) +* Compute space needed for ZUNGBR + CALL ZUNGBR( 'P', N, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_P = INT( CDUM(1) ) + CALL ZUNGBR( 'Q', N, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_Q = INT( CDUM(1) ) +* + IF( M.GE.MNTHR ) THEN + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* + MAXWRK = N + LWORK_ZGEQRF + MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZGEBRD ) + IF( WNTVO .OR. WNTVAS ) + $ MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P ) + MINWRK = 3*N + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P ) + MAXWRK = MAX( N*N+WRKBL, N*N+M*N ) + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUS .AND. WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_N ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P ) + MAXWRK = 2*N*N + WRKBL + MINWRK = 2*N + M + ELSE IF( WNTUA .AND. WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or +* 'A') +* + WRKBL = N + LWORK_ZGEQRF + WRKBL = MAX( WRKBL, N+LWORK_ZUNGQR_M ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_Q ) + WRKBL = MAX( WRKBL, 2*N+LWORK_ZUNGBR_P ) + MAXWRK = N*N + WRKBL + MINWRK = 2*N + M + END IF + ELSE +* +* Path 10 (M at least N, but not much larger) +* + CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD = INT( CDUM(1) ) + MAXWRK = 2*N + LWORK_ZGEBRD + IF( WNTUS .OR. WNTUO ) THEN + CALL ZUNGBR( 'Q', M, N, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_Q = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) + END IF + IF( WNTUA ) THEN + CALL ZUNGBR( 'Q', M, M, N, A, LDA, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_Q = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_Q ) + END IF + IF( .NOT.WNTVN ) THEN + MAXWRK = MAX( MAXWRK, 2*N+LWORK_ZUNGBR_P ) + END IF + MINWRK = 2*N + M + END IF + ELSE IF( MINMN.GT.0 ) THEN +* +* Space needed for ZBDSQR is BDSPAC = 5*M +* + MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) +* Compute space needed for ZGELQF + CALL ZGELQF( M, N, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGELQF = INT( CDUM(1) ) +* Compute space needed for ZUNGLQ + CALL ZUNGLQ( N, N, M, CDUM(1), N, CDUM(1), CDUM(1), -1, + $ IERR ) + LWORK_ZUNGLQ_N = INT( CDUM(1) ) + CALL ZUNGLQ( M, N, M, A, LDA, CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZUNGLQ_M = INT( CDUM(1) ) +* Compute space needed for ZGEBRD + CALL ZGEBRD( M, M, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD = INT( CDUM(1) ) +* Compute space needed for ZUNGBR P + CALL ZUNGBR( 'P', M, M, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_P = INT( CDUM(1) ) +* Compute space needed for ZUNGBR Q + CALL ZUNGBR( 'Q', M, M, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_Q = INT( CDUM(1) ) + IF( N.GE.MNTHR ) THEN + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* + MAXWRK = M + LWORK_ZGELQF + MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZGEBRD ) + IF( WNTUO .OR. WNTUAS ) + $ MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q ) + MINWRK = 3*M + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', +* JOBVT='O') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q ) + MAXWRK = MAX( M*M+WRKBL, M*M+M*N ) + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVS .AND. WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_M ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q ) + MAXWRK = 2*M*M + WRKBL + MINWRK = 2*M + N + ELSE IF( WNTVA .AND. WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* + WRKBL = M + LWORK_ZGELQF + WRKBL = MAX( WRKBL, M+LWORK_ZUNGLQ_N ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZGEBRD ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_P ) + WRKBL = MAX( WRKBL, 2*M+LWORK_ZUNGBR_Q ) + MAXWRK = M*M + WRKBL + MINWRK = 2*M + N + END IF + ELSE +* +* Path 10t(N greater than M, but not much larger) +* + CALL ZGEBRD( M, N, A, LDA, S, DUM(1), CDUM(1), + $ CDUM(1), CDUM(1), -1, IERR ) + LWORK_ZGEBRD = INT( CDUM(1) ) + MAXWRK = 2*M + LWORK_ZGEBRD + IF( WNTVS .OR. WNTVO ) THEN +* Compute space needed for ZUNGBR P + CALL ZUNGBR( 'P', M, N, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_P = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) + END IF + IF( WNTVA ) THEN + CALL ZUNGBR( 'P', N, N, M, A, N, CDUM(1), + $ CDUM(1), -1, IERR ) + LWORK_ZUNGBR_P = INT( CDUM(1) ) + MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_P ) + END IF + IF( .NOT.WNTUN ) THEN + MAXWRK = MAX( MAXWRK, 2*M+LWORK_ZUNGBR_Q ) + END IF + MINWRK = 2*M + N + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce using the QR +* decomposition (if sufficient workspace available) +* + IF( M.GE.MNTHR ) THEN +* + IF( WNTUN ) THEN +* +* Path 1 (M much larger than N, JOBU='N') +* No left singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: need 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out below R +* + IF( N .GT. 1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ), + $ LDA ) + END IF + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + NCVT = 0 + IF( WNTVO .OR. WNTVAS ) THEN +* +* If right singular vectors desired, generate P'. +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + NCVT = N + END IF + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If right singular vectors desired in VT, copy them there +* + IF( WNTVAS ) + $ CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT ) +* + ELSE IF( WNTUO .AND. WNTVN ) THEN +* +* Path 2 (M much larger than N, JOBU='O', JOBVT='N') +* N left singular vectors to be overwritten on A and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N, WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N, WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR) and zero out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: need 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1, + $ WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 10 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 10 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A +* (CWorkspace: need 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO .AND. WNTVAS ) THEN +* +* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A') +* N left singular vectors to be overwritten on A and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + LDWRKR = N + ELSE +* +* WORK(IU) is LDWRKU by N and WORK(IR) is N by N +* + LDWRKU = ( LWORK-N*N ) / N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT, copying result to WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) and computing right +* singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in WORK(IU) and copying to A +* (CWorkspace: need N*N+N, prefer N*N+M*N) +* (RWorkspace: 0) +* + DO 20 I = 1, M, LDWRKU + CHUNK = MIN( M-I+1, LDWRKU ) + CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ), + $ LDA, WORK( IR ), LDWRKR, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU, + $ A( I, 1 ), LDA ) + 20 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) +* +* Generate Q in A +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in A by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTUS ) THEN +* + IF( WNTVN ) THEN +* +* Path 4 (M much larger than N, JOBU='S', JOBVT='N') +* N left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IR), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IR ), LDWRKR, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 5 (M much larger than N, JOBU='S', JOBVT='O') +* N left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* +* Copy right singular vectors of R to A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left vectors bidiagonalizing R +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing R in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 6 (M much larger than N, JOBU='S', JOBVT='S' +* or 'A') +* N left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+3*N ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in A by left singular vectors of R in +* WORK(IU), storing result in U +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, + $ WORK( IU ), LDWRKU, CZERO, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTUA ) THEN +* + IF( WNTVN ) THEN +* +* Path 7 (M much larger than N, JOBU='A', JOBVT='N') +* M left singular vectors to be computed in U and +* no right singular vectors to be computed +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IR) is LDA by N +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is N by N +* + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Copy R to WORK(IR), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IR+1 ), LDWRKR ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IR) +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, + $ 1, WORK( IR ), LDWRKR, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IR), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IR ), LDWRKR, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, + $ 1, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVO ) THEN +* +* Path 8 (M much larger than N, JOBU='A', JOBVT='O') +* M left singular vectors to be computed in U and +* N right singular vectors to be overwritten on A +* + IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is N by N +* + LDWRKU = LDA + IR = IU + LDWRKU*N + LDWRKR = N + ELSE +* +* WORK(IU) is N by N and WORK(IR) is N by N +* + LDWRKU = N + IR = IU + LDWRKU*N + LDWRKR = N + END IF + ITAU = IR + LDWRKR*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*N*N+3*N, +* prefer 2*N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*N*N+3*N-1, +* prefer 2*N*N+2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in WORK(IR) +* (CWorkspace: need 2*N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, WORK( IU ), + $ LDWRKU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* +* Copy right singular vectors of R from WORK(IR) to A +* + CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Zero out below R in A +* + IF( N .GT. 1 ) THEN + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ A( 2, 1 ), LDA ) + END IF +* +* Bidiagonalize R in A +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in A +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* + END IF +* + ELSE IF( WNTVAS ) THEN +* +* Path 9 (M much larger than N, JOBU='A', JOBVT='S' +* or 'A') +* M left singular vectors to be computed in U and +* N right singular vectors to be computed in VT +* + IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*N ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is N by N +* + LDWRKU = N + END IF + ITAU = IU + LDWRKU*N + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IU+1 ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in WORK(IU), copying result to VT +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT, + $ LDVT ) +* +* Generate left bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need N*N+3*N-1, +* prefer N*N+2*N+(N-1)*NB) +* (RWorkspace: need 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of R in WORK(IU) and computing +* right singular vectors of R in VT +* (CWorkspace: need N*N) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT, + $ LDVT, WORK( IU ), LDWRKU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply Q in U by left singular vectors of R in +* WORK(IU), storing result in A +* (CWorkspace: need N*N) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, + $ WORK( IU ), LDWRKU, CZERO, A, LDA ) +* +* Copy left singular vectors of A from A to U +* + CALL ZLACPY( 'F', M, N, A, LDA, U, LDU ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + N +* +* Compute A=Q*R, copying result to U +* (CWorkspace: need 2*N, prefer N+N*NB) +* (RWorkspace: 0) +* + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) +* +* Generate Q in U +* (CWorkspace: need N+M, prefer N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy R from A to VT, zeroing out below it +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + IF( N.GT.1 ) + $ CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ VT( 2, 1 ), LDVT ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize R in VT +* (CWorkspace: need 3*N, prefer 2*N+2*N*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply Q in U by left bidiagonalizing vectors +* in VT +* (CWorkspace: need 2*N+M, prefer 2*N+M*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT, + $ WORK( ITAUQ ), U, LDU, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + N +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* M .LT. MNTHR +* +* Path 10 (M at least N, but not much larger) +* Reduce to bidiagonal form without QR decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + N + IWORK = ITAUP + N +* +* Bidiagonalize A +* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* (RWorkspace: need N) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'L', M, N, A, LDA, U, LDU ) + IF( WNTUS ) + $ NCU = N + IF( WNTUA ) + $ NCU = M + CALL ZUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT ) + CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N, prefer 2*N+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + N + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce using the LQ decomposition (if +* sufficient workspace available) +* + IF( N.GE.MNTHR ) THEN +* + IF( WNTVN ) THEN +* +* Path 1t(N much larger than M, JOBVT='N') +* No right singular vectors to be computed +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Zero out above L +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ), + $ LDA ) + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUO .OR. WNTUAS ) THEN +* +* If left singular vectors desired, generate Q +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + NRU = 0 + IF( WNTUO .OR. WNTUAS ) + $ NRU = M +* +* Perform bidiagonal QR iteration, computing left singular +* vectors of A in A if desired +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1, + $ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO ) +* +* If left singular vectors desired in U, copy them there +* + IF( WNTUAS ) + $ CALL ZLACPY( 'F', M, M, A, LDA, U, LDU ) +* + ELSE IF( WNTVO .AND. WNTUN ) THEN +* +* Path 2t(N much larger than M, JOBU='N', JOBVT='O') +* M right singular vectors to be overwritten on A and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR) and zero out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N) +* (RWorkspace: 0) +* + DO 30 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 30 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA, + $ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVO .AND. WNTUAS ) THEN +* +* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O') +* M right singular vectors to be overwritten on A and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is LDA by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = LDA + ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN +* +* WORK(IU) is LDA by N and WORK(IR) is M by M +* + LDWRKU = LDA + CHUNK = N + LDWRKR = M + ELSE +* +* WORK(IU) is M by CHUNK and WORK(IR) is M by M +* + LDWRKU = M + CHUNK = ( LWORK-M*M ) / M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing about above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U, copying result to WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR ) +* +* Generate right vectors bidiagonalizing L in WORK(IR) +* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U, and computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) + IU = ITAUQ +* +* Multiply right singular vectors of L in WORK(IR) by Q +* in A, storing result in WORK(IU) and copying to A +* (CWorkspace: need M*M+M, prefer M*M+M*N)) +* (RWorkspace: 0) +* + DO 40 I = 1, N, CHUNK + BLK = MIN( N-I+1, CHUNK ) + CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ), + $ LDWRKR, A( 1, I ), LDA, CZERO, + $ WORK( IU ), LDWRKU ) + CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU, + $ A( 1, I ), LDA ) + 40 CONTINUE +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ), + $ LDU ) +* +* Generate Q in A +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in A +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), A, LDA, WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left vectors bidiagonalizing L in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA, + $ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTVS ) THEN +* + IF( WNTUN ) THEN +* +* Path 4t(N much larger than M, JOBU='N', JOBVT='S') +* M right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right vectors bidiagonalizing L in +* WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy result to VT +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 5t(N much larger than M, JOBU='O', JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out below it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* +* Copy left singular vectors of L to A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right vectors bidiagonalizing L by Q in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors of L in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 6t(N much larger than M, JOBU='S' or 'A', +* JOBVT='S') +* M right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+3*M ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by N +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is LDA by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) +* +* Generate Q in A +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in A, storing result in VT +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, A, LDA, CZERO, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + ELSE IF( WNTVA ) THEN +* + IF( WNTUN ) THEN +* +* Path 7t(N much larger than M, JOBU='N', JOBVT='A') +* N right singular vectors to be computed in VT and +* no left singular vectors to be computed +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IR = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IR) is LDA by M +* + LDWRKR = LDA + ELSE +* +* WORK(IR) is M by M +* + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Copy L to WORK(IR), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), + $ LDWRKR ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IR+LDWRKR ), LDWRKR ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IR) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate right bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need M*M+3*M-1, +* prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of L in WORK(IR) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ), + $ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IR) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ), + $ LDWRKR, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT, + $ LDVT, CDUM, 1, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUO ) THEN +* +* Path 8t(N much larger than M, JOBU='O', JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be overwritten on A +* + IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+2*LDA*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is LDA by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = LDA + ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN +* +* WORK(IU) is LDA by M and WORK(IR) is M by M +* + LDWRKU = LDA + IR = IU + LDWRKU*M + LDWRKR = M + ELSE +* +* WORK(IU) is M by M and WORK(IR) is M by M +* + LDWRKU = M + IR = IU + LDWRKU*M + LDWRKR = M + END IF + ITAU = IR + LDWRKR*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to +* WORK(IR) +* (CWorkspace: need 2*M*M+3*M, +* prefer 2*M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, + $ WORK( IR ), LDWRKR ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need 2*M*M+3*M-1, +* prefer 2*M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in WORK(IR) +* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR, + $ WORK( ITAUQ ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in WORK(IR) and computing +* right singular vectors of L in WORK(IU) +* (CWorkspace: need 2*M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, WORK( IR ), + $ LDWRKR, CDUM, 1, RWORK( IRWORK ), + $ INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* +* Copy left singular vectors of A from WORK(IR) to A +* + CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A, + $ LDA ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Zero out above L in A +* + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ A( 1, 2 ), LDA ) +* +* Bidiagonalize L in A +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in A by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in A and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + ELSE IF( WNTUAS ) THEN +* +* Path 9t(N much larger than M, JOBU='S' or 'A', +* JOBVT='A') +* N right singular vectors to be computed in VT and +* M left singular vectors to be computed in U +* + IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN +* +* Sufficient workspace for a fast algorithm +* + IU = 1 + IF( LWORK.GE.WRKBL+LDA*M ) THEN +* +* WORK(IU) is LDA by M +* + LDWRKU = LDA + ELSE +* +* WORK(IU) is M by M +* + LDWRKU = M + END IF + ITAU = IU + LDWRKU*M + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to WORK(IU), zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ), + $ LDWRKU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( IU+LDWRKU ), LDWRKU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in WORK(IU), copying result to U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S, + $ RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U, + $ LDU ) +* +* Generate right bidiagonalizing vectors in WORK(IU) +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU, + $ WORK( ITAUP ), WORK( IWORK ), + $ LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of L in U and computing right +* singular vectors of L in WORK(IU) +* (CWorkspace: need M*M) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ), + $ WORK( IU ), LDWRKU, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* +* Multiply right singular vectors of L in WORK(IU) by +* Q in VT, storing result in A +* (CWorkspace: need M*M) +* (RWorkspace: 0) +* + CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ), + $ LDWRKU, VT, LDVT, CZERO, A, LDA ) +* +* Copy right singular vectors of A from A to VT +* + CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT ) +* + ELSE +* +* Insufficient workspace for a fast algorithm +* + ITAU = 1 + IWORK = ITAU + M +* +* Compute A=L*Q, copying result to VT +* (CWorkspace: need 2*M, prefer M+M*NB) +* (RWorkspace: 0) +* + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) +* +* Generate Q in VT +* (CWorkspace: need M+N, prefer M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Copy L to U, zeroing out above it +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ U( 1, 2 ), LDU ) + IE = 1 + ITAUQ = ITAU + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize L in U +* (CWorkspace: need 3*M, prefer 2*M+2*M*NB) +* (RWorkspace: need M) +* + CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Multiply right bidiagonalizing vectors in U by Q +* in VT +* (CWorkspace: need 2*M+N, prefer 2*M+N*NB) +* (RWorkspace: 0) +* + CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU, + $ WORK( ITAUP ), VT, LDVT, + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) +* +* Generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + IRWORK = IE + M +* +* Perform bidiagonal QR iteration, computing left +* singular vectors of A in U and computing right +* singular vectors of A in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, + $ RWORK( IRWORK ), INFO ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N .LT. MNTHR +* +* Path 10t(N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* + IE = 1 + ITAUQ = 1 + ITAUP = ITAUQ + M + IWORK = ITAUP + M +* +* Bidiagonalize A +* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* (RWorkspace: M) +* + CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ), + $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1, + $ IERR ) + IF( WNTUAS ) THEN +* +* If left singular vectors desired in U, copy result to U +* and generate left bidiagonalizing vectors in U +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'L', M, M, A, LDA, U, LDU ) + CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVAS ) THEN +* +* If right singular vectors desired in VT, copy result to +* VT and generate right bidiagonalizing vectors in VT +* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB) +* (RWorkspace: 0) +* + CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT ) + IF( WNTVA ) + $ NRVT = N + IF( WNTVS ) + $ NRVT = M + CALL ZUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTUO ) THEN +* +* If left singular vectors desired in A, generate left +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IF( WNTVO ) THEN +* +* If right singular vectors desired in A, generate right +* bidiagonalizing vectors in A +* (CWorkspace: need 3*M, prefer 2*M+M*NB) +* (RWorkspace: 0) +* + CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ), + $ WORK( IWORK ), LWORK-IWORK+1, IERR ) + END IF + IRWORK = IE + M + IF( WNTUAS .OR. WNTUO ) + $ NRU = M + IF( WNTUN ) + $ NRU = 0 + IF( WNTVAS .OR. WNTVO ) + $ NCVT = N + IF( WNTVN ) + $ NCVT = 0 + IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in U and computing right singular +* vectors in A +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A, + $ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + ELSE +* +* Perform bidiagonal QR iteration, if desired, computing +* left singular vectors in A and computing right singular +* vectors in VT +* (CWorkspace: 0) +* (RWorkspace: need BDSPAC) +* + CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT, + $ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ), + $ INFO ) + END IF +* + END IF +* + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN, + $ IERR ) + IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, + $ RWORK( IE ), MINMN, IERR ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = MAXWRK +* + RETURN +* +* End of ZGESVD +* + END diff --git a/dspl/liblapack/SRC/zgesvdx.f b/dspl/liblapack/SRC/zgesvdx.f new file mode 100644 index 0000000..56b5cd4 --- /dev/null +++ b/dspl/liblapack/SRC/zgesvdx.f @@ -0,0 +1,857 @@ +*> \brief ZGESVDX computes the singular value decomposition (SVD) for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESVDX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, +* $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, +* $ LWORK, RWORK, IWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER JOBU, JOBVT, RANGE +* INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS +* DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION S( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESVDX computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, optionally computing the left and/or right singular +*> vectors. The SVD is written +*> +*> A = U * SIGMA * transpose(V) +*> +*> where SIGMA is an M-by-N matrix which is zero except for its +*> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and +*> V is an N-by-N unitary matrix. The diagonal elements of SIGMA +*> are the singular values of A; they are real and non-negative, and +*> are returned in descending order. The first min(m,n) columns of +*> U and V are the left and right singular vectors of A. +*> +*> ZGESVDX uses an eigenvalue problem for obtaining the SVD, which +*> allows for the computation of a subset of singular values and +*> vectors. See DBDSVDX for details. +*> +*> Note that the routine returns V**T, not V. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies options for computing all or part of the matrix U: +*> = 'V': the first min(m,n) columns of U (the left singular +*> vectors) or as specified by RANGE are returned in +*> the array U; +*> = 'N': no columns of U (no left singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] JOBVT +*> \verbatim +*> JOBVT is CHARACTER*1 +*> Specifies options for computing all or part of the matrix +*> V**T: +*> = 'V': the first min(m,n) rows of V**T (the right singular +*> vectors) or as specified by RANGE are returned in +*> the array VT; +*> = 'N': no rows of V**T (no right singular vectors) are +*> computed. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all singular values will be found. +*> = 'V': all singular values in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th singular values will be found. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the contents of A are destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for singular values. VU > VL. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest singular value to be returned. +*> 1 <= IL <= IU <= min(M,N), if min(M,N) > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The total number of singular values found, +*> 0 <= NS <= min(M,N). +*> If RANGE = 'A', NS = min(M,N); if RANGE = 'I', NS = IU-IL+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (min(M,N)) +*> The singular values of A, sorted so that S(i) >= S(i+1). +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,UCOL) +*> If JOBU = 'V', U contains columns of U (the left singular +*> vectors, stored columnwise) as specified by RANGE; if +*> JOBU = 'N', U is not referenced. +*> Note: The user must ensure that UCOL >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= 1; if +*> JOBU = 'V', LDU >= M. +*> \endverbatim +*> +*> \param[out] VT +*> \verbatim +*> VT is COMPLEX*16 array, dimension (LDVT,N) +*> If JOBVT = 'V', VT contains the rows of V**T (the right singular +*> vectors, stored rowwise) as specified by RANGE; if JOBVT = 'N', +*> VT is not referenced. +*> Note: The user must ensure that LDVT >= NS; if RANGE = 'V', +*> the exact value of NS is not known in advance and an upper +*> bound must be used. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER +*> The leading dimension of the array VT. LDVT >= 1; if +*> JOBVT = 'V', LDVT >= NS (see above). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> LWORK >= MAX(1,MIN(M,N)*(MIN(M,N)+4)) for the paths (see +*> comments inside the code): +*> - PATH 1 (M much larger than N) +*> - PATH 1t (N much larger than M) +*> LWORK >= MAX(1,MIN(M,N)*2+MAX(M,N)) for the other paths. +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> LRWORK >= MIN(M,N)*(MIN(M,N)*2+15*MIN(M,N)). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (12*MIN(M,N)) +*> If INFO = 0, the first NS elements of IWORK are zero. If INFO > 0, +*> then IWORK contains the indices of the eigenvectors that failed +*> to converge in DBDSVDX/DSTEVX. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in DBDSVDX/DSTEVX. +*> if INFO = N*2 + 1, an internal error occurred in +*> DBDSVDX +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEsing +* +* ===================================================================== + SUBROUTINE ZGESVDX( JOBU, JOBVT, RANGE, M, N, A, LDA, VL, VU, + $ IL, IU, NS, S, U, LDU, VT, LDVT, WORK, + $ LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBU, JOBVT, RANGE + INTEGER IL, INFO, IU, LDA, LDU, LDVT, LWORK, M, N, NS + DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION S( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + CHARACTER JOBZ, RNGTGK + LOGICAL ALLS, INDS, LQUERY, VALS, WANTU, WANTVT + INTEGER I, ID, IE, IERR, ILQF, ILTGK, IQRF, ISCL, + $ ITAU, ITAUP, ITAUQ, ITEMP, ITEMPR, ITGKZ, + $ IUTGK, J, K, MAXWRK, MINMN, MINWRK, MNTHR + DOUBLE PRECISION ABSTOL, ANRM, BIGNUM, EPS, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL ZGEBRD, ZGELQF, ZGEQRF, ZLASCL, ZLASET, ZLACPY, + $ ZUNMLQ, ZUNMBR, ZUNMQR, DBDSVDX, DLASCL, XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + NS = 0 + INFO = 0 + ABSTOL = 2*DLAMCH('S') + LQUERY = ( LWORK.EQ.-1 ) + MINMN = MIN( M, N ) + + WANTU = LSAME( JOBU, 'V' ) + WANTVT = LSAME( JOBVT, 'V' ) + IF( WANTU .OR. WANTVT ) THEN + JOBZ = 'V' + ELSE + JOBZ = 'N' + END IF + ALLS = LSAME( RANGE, 'A' ) + VALS = LSAME( RANGE, 'V' ) + INDS = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.LSAME( JOBU, 'V' ) .AND. + $ .NOT.LSAME( JOBU, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( JOBVT, 'V' ) .AND. + $ .NOT.LSAME( JOBVT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLS .OR. VALS .OR. INDS ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( M.GT.LDA ) THEN + INFO = -7 + ELSE IF( MINMN.GT.0 ) THEN + IF( VALS ) THEN + IF( VL.LT.ZERO ) THEN + INFO = -8 + ELSE IF( VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDS ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, MINMN ) ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( MINMN, IL ) .OR. IU.GT.MINMN ) THEN + INFO = -11 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( WANTU .AND. LDU.LT.M ) THEN + INFO = -15 + ELSE IF( WANTVT ) THEN + IF( INDS ) THEN + IF( LDVT.LT.IU-IL+1 ) THEN + INFO = -17 + END IF + ELSE IF( LDVT.LT.MINMN ) THEN + INFO = -17 + END IF + END IF + END IF + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + IF( MINMN.GT.0 ) THEN + IF( M.GE.N ) THEN + MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N) +* + MINWRK = N*(N+5) + MAXWRK = N + N*ILAENV(1,'ZGEQRF',' ',M,N,-1,-1) + MAXWRK = MAX(MAXWRK, + $ N*N+2*N+2*N*ILAENV(1,'ZGEBRD',' ',N,N,-1,-1)) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ N*N+2*N+N*ILAENV(1,'ZUNMQR','LN',N,N,N,-1)) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* + MINWRK = 3*N + M + MAXWRK = 2*N + (M+N)*ILAENV(1,'ZGEBRD',' ',M,N,-1,-1) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ 2*N+N*ILAENV(1,'ZUNMQR','LN',N,N,N,-1)) + END IF + END IF + ELSE + MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 ) + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M) +* + MINWRK = M*(M+5) + MAXWRK = M + M*ILAENV(1,'ZGELQF',' ',M,N,-1,-1) + MAXWRK = MAX(MAXWRK, + $ M*M+2*M+2*M*ILAENV(1,'ZGEBRD',' ',M,M,-1,-1)) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ M*M+2*M+M*ILAENV(1,'ZUNMQR','LN',M,M,M,-1)) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* +* + MINWRK = 3*M + N + MAXWRK = 2*M + (M+N)*ILAENV(1,'ZGEBRD',' ',M,N,-1,-1) + IF (WANTU .OR. WANTVT) THEN + MAXWRK = MAX(MAXWRK, + $ 2*M+M*ILAENV(1,'ZUNMQR','LN',M,M,M,-1)) + END IF + END IF + END IF + END IF + MAXWRK = MAX( MAXWRK, MINWRK ) + WORK( 1 ) = DCMPLX( DBLE( MAXWRK ), ZERO ) +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVDX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Set singular values indices accord to RANGE='A'. +* + IF( ALLS ) THEN + RNGTGK = 'I' + ILTGK = 1 + IUTGK = MIN( M, N ) + ELSE IF( INDS ) THEN + RNGTGK = 'I' + ILTGK = IL + IUTGK = IU + ELSE + RNGTGK = 'V' + ILTGK = 0 + IUTGK = 0 + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + ISCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + ELSE IF( ANRM.GT.BIGNUM ) THEN + ISCL = 1 + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + END IF +* + IF( M.GE.N ) THEN +* +* A has at least as many rows as columns. If A has sufficiently +* more rows than columns, first reduce A using the QR +* decomposition. +* + IF( M.GE.MNTHR ) THEN +* +* Path 1 (M much larger than N): +* A = Q * R = Q * ( QB * B * PB**T ) +* = Q * ( QB * ( UB * S * VB**T ) * PB**T ) +* U = Q * QB * UB; V**T = VB**T * PB**T +* +* Compute A=Q*R +* (Workspace: need 2*N, prefer N+N*NB) +* + ITAU = 1 + ITEMP = ITAU + N + CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Copy R into WORK and bidiagonalize it: +* (Workspace: need N*N+3*N, prefer N*N+N+2*N*NB) +* + IQRF = ITEMP + ITAUQ = ITEMP + N*N + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + ID = 1 + IE = ID + N + ITGKZ = IE + N + CALL ZLACPY( 'U', N, N, A, LDA, WORK( IQRF ), N ) + CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, + $ WORK( IQRF+1 ), N ) + CALL ZGEBRD( N, N, WORK( IQRF ), N, RWORK( ID ), + $ RWORK( IE ), WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( ITEMP ), LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + N*(N*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*N*N+14*N) +* + CALL DBDSVDX( 'U', JOBZ, RNGTGK, N, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), N*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, N + U( J, I ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO + CALL ZLASET( 'A', M-N, NS, CZERO, CZERO, U( N+1,1 ), LDU) +* +* Call ZUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL ZUNMBR( 'Q', 'L', 'N', N, NS, N, WORK( IQRF ), N, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call ZUNMQR to compute Q*(QB*UB). +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAU ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + N + DO I = 1, NS + DO J = 1, N + VT( I, J ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO +* +* Call ZUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL ZUNMBR( 'P', 'R', 'C', NS, N, N, WORK( IQRF ), N, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2 (M at least N, but not much larger) +* Reduce A to bidiagonal form without QR decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 2*N+M, prefer 2*N+(M+N)*NB) +* + ITAUQ = 1 + ITAUP = ITAUQ + N + ITEMP = ITAUP + N + ID = 1 + IE = ID + N + ITGKZ = IE + N + CALL ZGEBRD( M, N, A, LDA, RWORK( ID ), RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + N*(N*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*N*N+14*N) +* + CALL DBDSVDX( 'U', JOBZ, RNGTGK, N, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), N*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, N + U( J, I ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO + CALL ZLASET( 'A', M-N, NS, CZERO, CZERO, U( N+1,1 ), LDU) +* +* Call ZUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + N + DO I = 1, NS + DO J = 1, N + VT( I, J ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + N + END DO +* +* Call ZUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need N, prefer N*NB) +* + CALL ZUNMBR( 'P', 'R', 'C', NS, N, N, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, IERR ) + END IF + END IF + ELSE +* +* A has more columns than rows. If A has sufficiently more +* columns than rows, first reduce A using the LQ decomposition. +* + IF( N.GE.MNTHR ) THEN +* +* Path 1t (N much larger than M): +* A = L * Q = ( QB * B * PB**T ) * Q +* = ( QB * ( UB * S * VB**T ) * PB**T ) * Q +* U = QB * UB ; V**T = VB**T * PB**T * Q +* +* Compute A=L*Q +* (Workspace: need 2*M, prefer M+M*NB) +* + ITAU = 1 + ITEMP = ITAU + M + CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + +* Copy L into WORK and bidiagonalize it: +* (Workspace in WORK( ITEMP ): need M*M+3*M, prefer M*M+M+2*M*NB) +* + ILQF = ITEMP + ITAUQ = ILQF + M*M + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + ID = 1 + IE = ID + M + ITGKZ = IE + M + CALL ZLACPY( 'L', M, M, A, LDA, WORK( ILQF ), M ) + CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, + $ WORK( ILQF+M ), M ) + CALL ZGEBRD( M, M, WORK( ILQF ), M, RWORK( ID ), + $ RWORK( IE ), WORK( ITAUQ ), WORK( ITAUP ), + $ WORK( ITEMP ), LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + M*(M*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + CALL DBDSVDX( 'U', JOBZ, RNGTGK, M, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), M*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, M + U( J, I ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO +* +* Call ZUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, NS, M, WORK( ILQF ), M, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + M + DO I = 1, NS + DO J = 1, M + VT( I, J ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO + CALL ZLASET( 'A', NS, N-M, CZERO, CZERO, + $ VT( 1,M+1 ), LDVT ) +* +* Call ZUNMBR to compute (VB**T)*(PB**T) +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL ZUNMBR( 'P', 'R', 'C', NS, M, M, WORK( ILQF ), M, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) +* +* Call ZUNMLQ to compute ((VB**T)*(PB**T))*Q. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL ZUNMLQ( 'R', 'N', NS, N, M, A, LDA, + $ WORK( ITAU ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + ELSE +* +* Path 2t (N greater than M, but not much larger) +* Reduce to bidiagonal form without LQ decomposition +* A = QB * B * PB**T = QB * ( UB * S * VB**T ) * PB**T +* U = QB * UB; V**T = VB**T * PB**T +* +* Bidiagonalize A +* (Workspace: need 2*M+N, prefer 2*M+(M+N)*NB) +* + ITAUQ = 1 + ITAUP = ITAUQ + M + ITEMP = ITAUP + M + ID = 1 + IE = ID + M + ITGKZ = IE + M + CALL ZGEBRD( M, N, A, LDA, RWORK( ID ), RWORK( IE ), + $ WORK( ITAUQ ), WORK( ITAUP ), WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + ITEMPR = ITGKZ + M*(M*2+1) +* +* Solve eigenvalue problem TGK*Z=Z*S. +* (Workspace: need 2*M*M+14*M) +* + CALL DBDSVDX( 'L', JOBZ, RNGTGK, M, RWORK( ID ), + $ RWORK( IE ), VL, VU, ILTGK, IUTGK, NS, S, + $ RWORK( ITGKZ ), M*2, RWORK( ITEMPR ), + $ IWORK, INFO) +* +* If needed, compute left singular vectors. +* + IF( WANTU ) THEN + K = ITGKZ + DO I = 1, NS + DO J = 1, M + U( J, I ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO +* +* Call ZUNMBR to compute QB*UB. +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL ZUNMBR( 'Q', 'L', 'N', M, NS, N, A, LDA, + $ WORK( ITAUQ ), U, LDU, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF +* +* If needed, compute right singular vectors. +* + IF( WANTVT) THEN + K = ITGKZ + M + DO I = 1, NS + DO J = 1, M + VT( I, J ) = DCMPLX( RWORK( K ), ZERO ) + K = K + 1 + END DO + K = K + M + END DO + CALL ZLASET( 'A', NS, N-M, CZERO, CZERO, + $ VT( 1,M+1 ), LDVT ) +* +* Call ZUNMBR to compute VB**T * PB**T +* (Workspace in WORK( ITEMP ): need M, prefer M*NB) +* + CALL ZUNMBR( 'P', 'R', 'C', NS, N, M, A, LDA, + $ WORK( ITAUP ), VT, LDVT, WORK( ITEMP ), + $ LWORK-ITEMP+1, INFO ) + END IF + END IF + END IF +* +* Undo scaling if necessary +* + IF( ISCL.EQ.1 ) THEN + IF( ANRM.GT.BIGNUM ) + $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + IF( ANRM.LT.SMLNUM ) + $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, + $ S, MINMN, INFO ) + END IF +* +* Return optimal workspace in WORK(1) +* + WORK( 1 ) = DCMPLX( DBLE( MAXWRK ), ZERO ) +* + RETURN +* +* End of ZGESVDX +* + END diff --git a/dspl/liblapack/SRC/zgesvj.f b/dspl/liblapack/SRC/zgesvj.f new file mode 100644 index 0000000..fd32f92 --- /dev/null +++ b/dspl/liblapack/SRC/zgesvj.f @@ -0,0 +1,1443 @@ +*> \brief ZGESVJ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESVJ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, +* LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N +* CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK ) +* DOUBLE PRECISION RWORK( LRWORK ), SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESVJ computes the singular value decomposition (SVD) of a complex +*> M-by-N matrix A, where M >= N. The SVD of A is written as +*> [++] [xx] [x0] [xx] +*> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx] +*> [++] [xx] +*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal +*> matrix, and V is an N-by-N unitary matrix. The diagonal elements +*> of SIGMA are the singular values of A. The columns of U and V are the +*> left and the right singular vectors of A, respectively. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBA +*> \verbatim +*> JOBA is CHARACTER*1 +*> Specifies the structure of A. +*> = 'L': The input matrix A is lower triangular; +*> = 'U': The input matrix A is upper triangular; +*> = 'G': The input matrix A is general M-by-N matrix, M >= N. +*> \endverbatim +*> +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> Specifies whether to compute the left singular vectors +*> (columns of U): +*> = 'U' or 'F': The left singular vectors corresponding to the nonzero +*> singular values are computed and returned in the leading +*> columns of A. See more details in the description of A. +*> The default numerical orthogonality threshold is set to +*> approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=DLAMCH('E'). +*> = 'C': Analogous to JOBU='U', except that user can control the +*> level of numerical orthogonality of the computed left +*> singular vectors. TOL can be set to TOL = CTOL*EPS, where +*> CTOL is given on input in the array WORK. +*> No CTOL smaller than ONE is allowed. CTOL greater +*> than 1 / EPS is meaningless. The option 'C' +*> can be used if M*EPS is satisfactory orthogonality +*> of the computed left singular vectors, so CTOL=M could +*> save few sweeps of Jacobi rotations. +*> See the descriptions of A and WORK(1). +*> = 'N': The matrix U is not computed. However, see the +*> description of A. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether to compute the right singular vectors, that +*> is, the matrix V: +*> = 'V' or 'J': the matrix V is computed and returned in the array V +*> = 'A' : the Jacobi rotations are applied to the MV-by-N +*> array V. In other words, the right singular vector +*> matrix V is not computed explicitly; instead it is +*> applied to an MV-by-N matrix initially stored in the +*> first MV rows of V. +*> = 'N' : the matrix V is not computed and the array V is not +*> referenced +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> If JOBU .EQ. 'U' .OR. JOBU .EQ. 'C': +*> If INFO .EQ. 0 : +*> RANKA orthonormal columns of U are returned in the +*> leading RANKA columns of the array A. Here RANKA <= N +*> is the number of computed singular values of A that are +*> above the underflow threshold DLAMCH('S'). The singular +*> vectors corresponding to underflowed or zero singular +*> values are not computed. The value of RANKA is returned +*> in the array RWORK as RANKA=NINT(RWORK(2)). Also see the +*> descriptions of SVA and RWORK. The computed columns of U +*> are mutually numerically orthogonal up to approximately +*> TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU.EQ.'C'), +*> see the description of JOBU. +*> If INFO .GT. 0, +*> the procedure ZGESVJ did not converge in the given number +*> of iterations (sweeps). In that case, the computed +*> columns of U may not be orthogonal up to TOL. The output +*> U (stored in A), SIGMA (given by the computed singular +*> values in SVA(1:N)) and V is still a decomposition of the +*> input matrix A in the sense that the residual +*> || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small. +*> If JOBU .EQ. 'N': +*> If INFO .EQ. 0 : +*> Note that the left singular vectors are 'for free' in the +*> one-sided Jacobi SVD algorithm. However, if only the +*> singular values are needed, the level of numerical +*> orthogonality of U is not an issue and iterations are +*> stopped when the columns of the iterated matrix are +*> numerically orthogonal up to approximately M*EPS. Thus, +*> on exit, A contains the columns of U scaled with the +*> corresponding singular values. +*> If INFO .GT. 0 : +*> the procedure ZGESVJ did not converge in the given number +*> of iterations (sweeps). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On exit, +*> If INFO .EQ. 0 : +*> depending on the value SCALE = RWORK(1), we have: +*> If SCALE .EQ. ONE: +*> SVA(1:N) contains the computed singular values of A. +*> During the computation SVA contains the Euclidean column +*> norms of the iterated matrices in the array A. +*> If SCALE .NE. ONE: +*> The singular values of A are SCALE*SVA(1:N), and this +*> factored representation is due to the fact that some of the +*> singular values of A might underflow or overflow. +*> +*> If INFO .GT. 0 : +*> the procedure ZGESVJ did not converge in the given number of +*> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate. +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then the product of Jacobi rotations in ZGESVJ +*> is applied to the first MV rows of V. See the description of JOBV. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,N) +*> If JOBV = 'V', then V contains on exit the N-by-N matrix of +*> the right singular vectors; +*> If JOBV = 'A', then V contains the product of the computed right +*> singular vector matrix and the initial matrix in +*> the array V. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV .GE. 1. +*> If JOBV .EQ. 'V', then LDV .GE. max(1,N). +*> If JOBV .EQ. 'A', then LDV .GE. max(1,MV) . +*> \endverbatim +*> +*> \param[in,out] CWORK +*> \verbatim +*> CWORK is COMPLEX*16 array, dimension (max(1,LWORK)) +*> Used as workspace. +*> If on entry LWORK .EQ. -1, then a workspace query is assumed and +*> no computation is done; CWORK(1) is set to the minial (and optimal) +*> length of CWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER. +*> Length of CWORK, LWORK >= M+N. +*> \endverbatim +*> +*> \param[in,out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(6,LRWORK)) +*> On entry, +*> If JOBU .EQ. 'C' : +*> RWORK(1) = CTOL, where CTOL defines the threshold for convergence. +*> The process stops if all columns of A are mutually +*> orthogonal up to CTOL*EPS, EPS=DLAMCH('E'). +*> It is required that CTOL >= ONE, i.e. it is not +*> allowed to force the routine to obtain orthogonality +*> below EPSILON. +*> On exit, +*> RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N) +*> are the computed singular values of A. +*> (See description of SVA().) +*> RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero +*> singular values. +*> RWORK(3) = NINT(RWORK(3)) is the number of the computed singular +*> values that are larger than the underflow threshold. +*> RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi +*> rotations needed for numerical convergence. +*> RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep. +*> This is useful information in cases when ZGESVJ did +*> not converge, as it can be used to estimate whether +*> the output is stil useful and for post festum analysis. +*> RWORK(6) = the largest absolute value over all sines of the +*> Jacobi rotation angles in the last sweep. It can be +*> useful for a post festum analysis. +*> If on entry LRWORK .EQ. -1, then a workspace query is assumed and +*> no computation is done; RWORK(1) is set to the minial (and optimal) +*> length of RWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> Length of RWORK, LRWORK >= MAX(6,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> > 0 : ZGESVJ did not converge in the maximal allowed number +*> (NSWEEP=30) of sweeps. The output may still be useful. +*> See the description of RWORK. +*> \endverbatim +*> +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane +*> rotations. In the case of underflow of the tangent of the Jacobi angle, a +*> modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses +*> column interchanges of de Rijk [1]. The relative accuracy of the computed +*> singular values and the accuracy of the computed singular vectors (in +*> angle metric) is as guaranteed by the theory of Demmel and Veselic [2]. +*> The condition number that determines the accuracy in the full rank case +*> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the +*> spectral condition number. The best performance of this Jacobi SVD +*> procedure is achieved if used in an accelerated version of Drmac and +*> Veselic [4,5], and it is the kernel routine in the SIGMA library [6]. +*> Some tunning parameters (marked with [TP]) are available for the +*> implementer. +*> The computational range for the nonzero singular values is the machine +*> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even +*> denormalized singular values can be computed with the corresponding +*> gradual loss of accurate digits. +*> \endverbatim +* +*> \par Contributor: +* ================== +*> +*> \verbatim +*> +*> ============ +*> +*> Zlatko Drmac (Zagreb, Croatia) +*> +*> \endverbatim +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the +*> singular value decomposition on a vector computer. +*> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371. +*> [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR. +*> [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular +*> value computation in floating point arithmetic. +*> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222. +*> [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342. +*> LAPACK Working note 169. +*> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II. +*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362. +*> LAPACK Working note 170. +*> [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV, +*> QSVD, (H,K)-SVD computations. +*> Department of Mathematics, University of Zagreb, 2008, 2015. +*> \endverbatim +* +*> \par Bugs, examples and comments: +* ================================= +*> +*> \verbatim +*> =========================== +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V, + $ LDV, CWORK, LWORK, RWORK, LRWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* + IMPLICIT NONE +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N + CHARACTER*1 JOBA, JOBU, JOBV +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK ) + DOUBLE PRECISION RWORK( LRWORK ), SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = (0.0D0, 0.0D0), CONE = (1.0D0, 0.0D0) ) + INTEGER NSWEEP + PARAMETER ( NSWEEP = 30 ) +* .. +* .. Local Scalars .. + COMPLEX*16 AAPQ, OMPQ + DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, CTOL, EPSLN, MXAAPQ, + $ MXSINJ, ROOTBIG, ROOTEPS, ROOTSFMIN, ROOTTOL, + $ SKL, SFMIN, SMALL, SN, T, TEMP1, THETA, THSIGN, TOL + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, N2, N34, + $ N4, NBL, NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, GOSCALE, LOWER, LQUERY, LSVEC, NOSCALE, ROTOK, + $ RSVEC, UCTOL, UPPER +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, CONJG, DBLE, SIGN, SQRT +* .. +* .. External Functions .. +* .. +* from BLAS + DOUBLE PRECISION DZNRM2 + COMPLEX*16 ZDOTC + EXTERNAL ZDOTC, DZNRM2 + INTEGER IDAMAX + EXTERNAL IDAMAX +* from LAPACK + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL ZCOPY, ZROT, ZDSCAL, ZSWAP, ZAXPY +* from LAPACK + EXTERNAL DLASCL, ZLASCL, ZLASET, ZLASSQ, XERBLA + EXTERNAL ZGSVJ0, ZGSVJ1 +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LSVEC = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' ) + UCTOL = LSAME( JOBU, 'C' ) + RSVEC = LSAME( JOBV, 'V' ) .OR. LSAME( JOBV, 'J' ) + APPLV = LSAME( JOBV, 'A' ) + UPPER = LSAME( JOBA, 'U' ) + LOWER = LSAME( JOBA, 'L' ) +* + LQUERY = ( LWORK .EQ. -1 ) .OR. ( LRWORK .EQ. -1 ) + IF( .NOT.( UPPER .OR. LOWER .OR. LSAME( JOBA, 'G' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSVEC .OR. UCTOL .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -5 + ELSE IF( LDA.LT.M ) THEN + INFO = -7 + ELSE IF( MV.LT.0 ) THEN + INFO = -9 + ELSE IF( ( RSVEC .AND. ( LDV.LT.N ) ) .OR. + $ ( APPLV .AND. ( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( UCTOL .AND. ( RWORK( 1 ).LE.ONE ) ) THEN + INFO = -12 + ELSE IF( ( LWORK.LT.( M+N ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -13 + ELSE IF( ( LRWORK.LT.MAX( N, 6 ) ) .AND. ( .NOT.LQUERY ) ) THEN + INFO = -15 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVJ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN + CWORK(1) = M + N + RWORK(1) = MAX( N, 6 ) + RETURN + END IF +* +* #:) Quick return for void matrix +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )RETURN +* +* Set numerical parameters +* The stopping criterion for Jacobi rotations is +* +* max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS +* +* where EPS is the round-off and CTOL is defined as follows: +* + IF( UCTOL ) THEN +* ... user controlled + CTOL = RWORK( 1 ) + ELSE +* ... default + IF( LSVEC .OR. RSVEC .OR. APPLV ) THEN + CTOL = SQRT( DBLE( M ) ) + ELSE + CTOL = DBLE( M ) + END IF + END IF +* ... and the machine dependent parameters are +*[!] (Make sure that SLAMCH() works properly on the target machine.) +* + EPSLN = DLAMCH( 'Epsilon' ) + ROOTEPS = SQRT( EPSLN ) + SFMIN = DLAMCH( 'SafeMinimum' ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPSLN + BIG = DLAMCH( 'Overflow' ) +* BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN +* LARGE = BIG / SQRT( DBLE( M*N ) ) + BIGTHETA = ONE / ROOTEPS +* + TOL = CTOL*EPSLN + ROOTTOL = SQRT( TOL ) +* + IF( DBLE( M )*EPSLN.GE.ONE ) THEN + INFO = -4 + CALL XERBLA( 'ZGESVJ', -INFO ) + RETURN + END IF +* +* Initialize the right singular vector matrix. +* + IF( RSVEC ) THEN + MVL = N + CALL ZLASET( 'A', MVL, N, CZERO, CONE, V, LDV ) + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV +* +* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N ) +*(!) If necessary, scale A to protect the largest singular value +* from overflow. It is possible that saving the largest singular +* value destroys the information about the small ones. +* This initial scaling is almost minimal in the sense that the +* goal is to make sure that no column norm overflows, and that +* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries +* in A are detected, the procedure returns with INFO=-6. +* + SKL = ONE / SQRT( DBLE( M )*DBLE( N ) ) + NOSCALE = .TRUE. + GOSCALE = .TRUE. +* + IF( LOWER ) THEN +* the input matrix is M-by-N lower triangular (trapezoidal) + DO 1874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL ZLASSQ( M-p+1, A( p, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'ZGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 1873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 1873 CONTINUE + END IF + END IF + 1874 CONTINUE + ELSE IF( UPPER ) THEN +* the input matrix is M-by-N upper triangular (trapezoidal) + DO 2874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL ZLASSQ( p, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'ZGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 2873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 2873 CONTINUE + END IF + END IF + 2874 CONTINUE + ELSE +* the input matrix is M-by-N general dense + DO 3874 p = 1, N + AAPP = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, p ), 1, AAPP, AAQQ ) + IF( AAPP.GT.BIG ) THEN + INFO = -6 + CALL XERBLA( 'ZGESVJ', -INFO ) + RETURN + END IF + AAQQ = SQRT( AAQQ ) + IF( ( AAPP.LT.( BIG / AAQQ ) ) .AND. NOSCALE ) THEN + SVA( p ) = AAPP*AAQQ + ELSE + NOSCALE = .FALSE. + SVA( p ) = AAPP*( AAQQ*SKL ) + IF( GOSCALE ) THEN + GOSCALE = .FALSE. + DO 3873 q = 1, p - 1 + SVA( q ) = SVA( q )*SKL + 3873 CONTINUE + END IF + END IF + 3874 CONTINUE + END IF +* + IF( NOSCALE )SKL = ONE +* +* Move the smaller part of the spectrum from the underflow threshold +*(!) Start by determining the position of the nonzero entries of the +* array SVA() relative to ( SFMIN, BIG ). +* + AAPP = ZERO + AAQQ = BIG + DO 4781 p = 1, N + IF( SVA( p ).NE.ZERO )AAQQ = MIN( AAQQ, SVA( p ) ) + AAPP = MAX( AAPP, SVA( p ) ) + 4781 CONTINUE +* +* #:) Quick return for zero matrix +* + IF( AAPP.EQ.ZERO ) THEN + IF( LSVEC )CALL ZLASET( 'G', M, N, CZERO, CONE, A, LDA ) + RWORK( 1 ) = ONE + RWORK( 2 ) = ZERO + RWORK( 3 ) = ZERO + RWORK( 4 ) = ZERO + RWORK( 5 ) = ZERO + RWORK( 6 ) = ZERO + RETURN + END IF +* +* #:) Quick return for one-column matrix +* + IF( N.EQ.1 ) THEN + IF( LSVEC )CALL ZLASCL( 'G', 0, 0, SVA( 1 ), SKL, M, 1, + $ A( 1, 1 ), LDA, IERR ) + RWORK( 1 ) = ONE / SKL + IF( SVA( 1 ).GE.SFMIN ) THEN + RWORK( 2 ) = ONE + ELSE + RWORK( 2 ) = ZERO + END IF + RWORK( 3 ) = ZERO + RWORK( 4 ) = ZERO + RWORK( 5 ) = ZERO + RWORK( 6 ) = ZERO + RETURN + END IF +* +* Protect small singular values from underflow, and try to +* avoid underflows/overflows in computing Jacobi rotations. +* + SN = SQRT( SFMIN / EPSLN ) + TEMP1 = SQRT( BIG / DBLE( N ) ) + IF( ( AAPP.LE.SN ) .OR. ( AAQQ.GE.TEMP1 ) .OR. + $ ( ( SN.LE.AAQQ ) .AND. ( AAPP.LE.TEMP1 ) ) ) THEN + TEMP1 = MIN( BIG, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.LE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / (AAPP*SQRT( DBLE(N)) ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.GE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MAX( SN / AAQQ, TEMP1 / AAPP ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE IF( ( AAQQ.LE.SN ) .AND. ( AAPP.GE.TEMP1 ) ) THEN + TEMP1 = MIN( SN / AAQQ, BIG / ( SQRT( DBLE( N ) )*AAPP ) ) +* AAQQ = AAQQ*TEMP1 +* AAPP = AAPP*TEMP1 + ELSE + TEMP1 = ONE + END IF +* +* Scale, if necessary +* + IF( TEMP1.NE.ONE ) THEN + CALL DLASCL( 'G', 0, 0, ONE, TEMP1, N, 1, SVA, N, IERR ) + END IF + SKL = TEMP1*SKL + IF( SKL.NE.ONE ) THEN + CALL ZLASCL( JOBA, 0, 0, ONE, SKL, M, N, A, LDA, IERR ) + SKL = ONE / SKL + END IF +* +* Row-cyclic Jacobi SVD algorithm with column pivoting +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 + + DO 1868 q = 1, N + CWORK( q ) = CONE + 1868 CONTINUE +* +* +* + SWBAND = 3 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if ZGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* + IF( ( LOWER .OR. UPPER ) .AND. ( N.GT.MAX( 64, 4*KBL ) ) ) THEN +*[TP] The number of partition levels and the actual partition are +* tuning parameters. + N4 = N / 4 + N2 = N / 2 + N34 = 3*N4 + IF( APPLV ) THEN + q = 0 + ELSE + q = 1 + END IF +* + IF( LOWER ) THEN +* +* This works very well on lower triangular matrices, in particular +* in the framework of the preconditioned Jacobi SVD (xGEJSV). +* The idea is simple: +* [+ 0 0 0] Note that Jacobi transformations of [0 0] +* [+ + 0 0] [0 0] +* [+ + x 0] actually work on [x 0] [x 0] +* [+ + x x] [x x]. [x x] +* + CALL ZGSVJ0( JOBV, M-N34, N-N34, A( N34+1, N34+1 ), LDA, + $ CWORK( N34+1 ), SVA( N34+1 ), MVL, + $ V( N34*q+1, N34+1 ), LDV, EPSLN, SFMIN, TOL, + $ 2, CWORK( N+1 ), LWORK-N, IERR ) + + CALL ZGSVJ0( JOBV, M-N2, N34-N2, A( N2+1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 2, + $ CWORK( N+1 ), LWORK-N, IERR ) + + CALL ZGSVJ1( JOBV, M-N2, N-N2, N4, A( N2+1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) + + CALL ZGSVJ0( JOBV, M-N4, N2-N4, A( N4+1, N4+1 ), LDA, + $ CWORK( N4+1 ), SVA( N4+1 ), MVL, + $ V( N4*q+1, N4+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) +* + CALL ZGSVJ0( JOBV, M, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL ZGSVJ1( JOBV, M, N2, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), + $ LWORK-N, IERR ) +* +* + ELSE IF( UPPER ) THEN +* +* + CALL ZGSVJ0( JOBV, N4, N4, A, LDA, CWORK, SVA, MVL, V, LDV, + $ EPSLN, SFMIN, TOL, 2, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL ZGSVJ0( JOBV, N2, N4, A( 1, N4+1 ), LDA, CWORK( N4+1 ), + $ SVA( N4+1 ), MVL, V( N4*q+1, N4+1 ), LDV, + $ EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), LWORK-N, + $ IERR ) +* + CALL ZGSVJ1( JOBV, N2, N2, N4, A, LDA, CWORK, SVA, MVL, V, + $ LDV, EPSLN, SFMIN, TOL, 1, CWORK( N+1 ), + $ LWORK-N, IERR ) +* + CALL ZGSVJ0( JOBV, N2+N4, N4, A( 1, N2+1 ), LDA, + $ CWORK( N2+1 ), SVA( N2+1 ), MVL, + $ V( N2*q+1, N2+1 ), LDV, EPSLN, SFMIN, TOL, 1, + $ CWORK( N+1 ), LWORK-N, IERR ) + + END IF +* + END IF +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = CWORK(p) + CWORK(p) = CWORK(q) + CWORK(q) = AAPQ + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +*[!] Caveat: +* Unfortunately, some BLAS implementations compute DZNRM2(M,A(1,p),1) +* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, DZNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF +* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = DZNRM2( M, A( 1, p ), 1 ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL ZCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, CWORK(N+1), LDA, IERR ) + AAPQ = ZDOTC( M, CWORK(N+1), 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAPP ) / AAQQ + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = ZDOTC( M, A(1, p ), 1, + $ CWORK(N+1), 1 ) / AAPP + END IF + END IF +* + +* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/AAPQ1 +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + CS = ONE + + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF ( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF ( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + CWORK(p) = -CWORK(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL ZCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, CWORK(N+1), LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + CALL ZAXPY( M, -AAPQ, CWORK(N+1), 1, + $ A( 1, q ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DZNRM2( M, A( 1, q ), 1 ) + ELSE + T = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DZNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL ZCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = ZDOTC( M, CWORK(N+1), 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ CWORK(N+1), LDA, IERR ) + AAPQ = ZDOTC( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) / AAPP + END IF + END IF +* + +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + CWORK(p) = -CWORK(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL ZCOPY( M, A( 1, p ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, CWORK(N+1),LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL ZAXPY( M, -AAPQ, CWORK(N+1), + $ 1, A( 1, q ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ CWORK(N+1), 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, CWORK(N+1),LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL ZAXPY( M, -CONJG(AAPQ), + $ CWORK(N+1), 1, A( 1, p ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DZNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DZNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DZNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( DBLE( N ) )* + $ TOL ) .AND. ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the singular values and find how many are above +* the underflow threshold. +* + N2 = 0 + N4 = 0 + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + IF( SVA( p ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( p )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF + 5991 CONTINUE + IF( SVA( N ).NE.ZERO ) THEN + N4 = N4 + 1 + IF( SVA( N )*SKL.GT.SFMIN )N2 = N2 + 1 + END IF +* +* Normalize the left singular vectors. +* + IF( LSVEC .OR. UCTOL ) THEN + DO 1998 p = 1, N4 +* CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 ) + CALL ZLASCL( 'G',0,0, SVA(p), ONE, M, 1, A(1,p), M, IERR ) + 1998 CONTINUE + END IF +* +* Scale the product of Jacobi rotations. +* + IF( RSVEC ) THEN + DO 2399 p = 1, N + TEMP1 = ONE / DZNRM2( MVL, V( 1, p ), 1 ) + CALL ZDSCAL( MVL, TEMP1, V( 1, p ), 1 ) + 2399 CONTINUE + END IF +* +* Undo scaling, if necessary (and possible). + IF( ( ( SKL.GT.ONE ) .AND. ( SVA( 1 ).LT.( BIG / SKL ) ) ) + $ .OR. ( ( SKL.LT.ONE ) .AND. ( SVA( MAX( N2, 1 ) ) .GT. + $ ( SFMIN / SKL ) ) ) ) THEN + DO 2400 p = 1, N + SVA( p ) = SKL*SVA( p ) + 2400 CONTINUE + SKL = ONE + END IF +* + RWORK( 1 ) = SKL +* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE +* then some of the singular values may overflow or underflow and +* the spectrum is given in this factored representation. +* + RWORK( 2 ) = DBLE( N4 ) +* N4 is the number of computed nonzero singular values of A. +* + RWORK( 3 ) = DBLE( N2 ) +* N2 is the number of singular values of A greater than SFMIN. +* If N2 \brief ZGESVX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), +* $ RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESVX uses the LU factorization to compute the solution to a complex +*> system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor the +*> matrix A (after equilibration if FACT = 'E') as +*> A = P * L * U, +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by ZGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X +*> to the original system of equations. Note that A and B are +*> modified on exit if EQUED .ne. 'N', and the solution to the +*> equilibrated system is inv(diag(C))*X if TRANS = 'N' and +*> EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C' +*> and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> On exit, RWORK(1) contains the reciprocal pivot growth +*> factor norm(A)/norm(U). The "max absolute element" norm is +*> used. If RWORK(1) is much less than 1, then the stability +*> of the LU factorization of the (equilibrated) matrix A +*> could be poor. This also means that the solution X, condition +*> estimator RCOND, and forward error bound FERR could be +*> unreliable. If factorization fails with 0 RWORK(1) contains the reciprocal pivot growth factor for the +*> leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization has +*> been completed, but the factor U is exactly +*> singular, so the solution and error bounds +*> could not be computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), C( * ), FERR( * ), R( * ), + $ RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + CHARACTER NORM + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, RPVGRW, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE, ZLANTR + EXTERNAL LSAME, DLAMCH, ZLANGE, ZLANTR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGECON, ZGEEQU, ZGERFS, ZGETRF, ZGETRS, + $ ZLACPY, ZLAQGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = R( I )*B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( COLEQU ) THEN + DO 60 J = 1, NRHS + DO 50 I = 1, N + B( I, J ) = C( I )*B( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL ZLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL ZGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = ZLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF, + $ RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGE( 'M', N, INFO, A, LDA, RWORK ) / + $ RPVGRW + END IF + RWORK( 1 ) = RPVGRW + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A and the +* reciprocal pivot growth factor RPVGRW. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGE( NORM, N, N, A, LDA, RWORK ) + RPVGRW = ZLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK ) + IF( RPVGRW.EQ.ZERO ) THEN + RPVGRW = ONE + ELSE + RPVGRW = ZLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW + END IF +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( NOTRAN ) THEN + IF( COLEQU ) THEN + DO 80 J = 1, NRHS + DO 70 I = 1, N + X( I, J ) = C( I )*X( I, J ) + 70 CONTINUE + 80 CONTINUE + DO 90 J = 1, NRHS + FERR( J ) = FERR( J ) / COLCND + 90 CONTINUE + END IF + ELSE IF( ROWEQU ) THEN + DO 110 J = 1, NRHS + DO 100 I = 1, N + X( I, J ) = R( I )*X( I, J ) + 100 CONTINUE + 110 CONTINUE + DO 120 J = 1, NRHS + FERR( J ) = FERR( J ) / ROWCND + 120 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RWORK( 1 ) = RPVGRW + RETURN +* +* End of ZGESVX +* + END diff --git a/dspl/liblapack/SRC/zgesvxx.f b/dspl/liblapack/SRC/zgesvxx.f new file mode 100644 index 0000000..c3727b7 --- /dev/null +++ b/dspl/liblapack/SRC/zgesvxx.f @@ -0,0 +1,769 @@ +*> \brief ZGESVXX computes the solution to system of linear equations A * X = B for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, +* BERR, N_ERR_BNDS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, TRANS +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX , * ),WORK( * ) +* DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGESVXX uses the LU factorization to compute the solution to a +*> complex*16 system of linear equations A * X = B, where A is an +*> N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. ZGESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> ZGESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> ZGESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what ZGESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> TRANS = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B +*> TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B +*> TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N') +*> or diag(C)*B (if TRANS = 'T' or 'C'). +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = P * L * U, +*> +*> where P is a permutation matrix, L is a unit lower triangular +*> matrix, and U is upper triangular. +*> +*> 3. If some U(i,i)=0, so that U is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is less +*> than machine precision, the routine still goes on to solve for X +*> and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so +*> that it solves the original system before equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by R and C. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. If FACT = 'F' and EQUED is +*> not 'N', then A must have been equilibrated by the scaling +*> factors in R and/or C. A is not modified if FACT = 'F' or +*> 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if EQUED .ne. 'N', A is scaled as follows: +*> EQUED = 'R': A := diag(R) * A +*> EQUED = 'C': A := A * diag(C) +*> EQUED = 'B': A := diag(R) * A * diag(C). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. If EQUED .ne. 'N', then +*> AF is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the factors L and U from the factorization A = P*L*U +*> of the equilibrated matrix A (see the description of A for +*> the form of the equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the factorization A = P*L*U +*> as computed by ZGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the original matrix A. +*> +*> If FACT = 'E', then IPIV is an output argument and on exit +*> contains the pivot indices from the factorization A = P*L*U +*> of the equilibrated matrix A. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'R' or 'B', A is +*> multiplied on the left by diag(R); if EQUED = 'N' or 'C', R +*> is not accessed. R is an input argument if FACT = 'F'; +*> otherwise, R is an output argument. If FACT = 'F' and +*> EQUED = 'R' or 'B', each element of R must be positive. +*> If R is output, each element of R is a power of the radix. +*> If R is input, each element of R should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If EQUED = 'C' or 'B', A is +*> multiplied on the right by diag(C); if EQUED = 'N' or 'R', C +*> is not accessed. C is an input argument if FACT = 'F'; +*> otherwise, C is an output argument. If FACT = 'F' and +*> EQUED = 'C' or 'B', each element of C must be positive. +*> If C is output, each element of C is a power of the radix. +*> If C is input, each element of C should be a power of the radix +*> to ensure a reliable solution and error estimates. Scaling by +*> powers of the radix does not cause rounding errors unless the +*> result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by +*> diag(R)*B; +*> if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is +*> overwritten by diag(C)*B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit +*> if EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(C))*X if TRANS = 'N' and EQUED = 'C' or 'B', or +*> inv(diag(R))*X if TRANS = 'T' or 'C' and EQUED = 'R' or 'B'. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. In ZGESVX, this quantity is +*> returned in WORK(1). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGESVXX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, R, C, B, LDB, X, LDX, RCOND, RPVGRW, + $ BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, TRANS + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX , * ),WORK( * ) + DOUBLE PRECISION R( * ), C( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, COLCND, RCMAX, RCMIN, + $ ROWCND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, ZLA_GERPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLA_GERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL ZGEEQUB, ZGETRF, ZGETRS, ZLACPY, ZLAQGE, + $ XERBLA, ZLASCL2, ZGERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + NOTRAN = LSAME( TRANS, 'N' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + ROWEQU = .FALSE. + COLEQU = .FALSE. + ELSE + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in ZGERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until ZGERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( ROWEQU ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 10 J = 1, N + RCMIN = MIN( RCMIN, R( J ) ) + RCMAX = MAX( RCMAX, R( J ) ) + 10 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + ROWCND = ONE + END IF + END IF + IF( COLEQU .AND. INFO.EQ.0 ) THEN + RCMIN = BIGNUM + RCMAX = ZERO + DO 20 J = 1, N + RCMIN = MIN( RCMIN, C( J ) ) + RCMAX = MAX( RCMAX, C( J ) ) + 20 CONTINUE + IF( RCMIN.LE.ZERO ) THEN + INFO = -12 + ELSE IF( N.GT.0 ) THEN + COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM ) + ELSE + COLCND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZGEEQUB( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) + ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' ) + COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' ) + END IF +* +* If the scaling factors are not applied, set them to 1.0. +* + IF ( .NOT.ROWEQU ) THEN + DO J = 1, N + R( J ) = 1.0D+0 + END DO + END IF + IF ( .NOT.COLEQU ) THEN + DO J = 1, N + C( J ) = 1.0D+0 + END DO + END IF + END IF +* +* Scale the right-hand side. +* + IF( NOTRAN ) THEN + IF( ROWEQU ) CALL ZLASCL2( N, NRHS, R, B, LDB ) + ELSE + IF( COLEQU ) CALL ZLASCL2( N, NRHS, C, B, LDB ) + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LU factorization of A. +* + CALL ZLACPY( 'Full', N, N, A, LDA, AF, LDAF ) + CALL ZGETRF( N, N, AF, LDAF, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = ZLA_GERPVGRW( N, INFO, A, LDA, AF, LDAF ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = ZLA_GERPVGRW( N, N, A, LDA, AF, LDAF ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZGERFSX( TRANS, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ IPIV, R, C, B, LDB, X, LDX, RCOND, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( COLEQU .AND. NOTRAN ) THEN + CALL ZLASCL2 ( N, NRHS, C, X, LDX ) + ELSE IF ( ROWEQU .AND. .NOT.NOTRAN ) THEN + CALL ZLASCL2 ( N, NRHS, R, X, LDX ) + END IF +* + RETURN +* +* End of ZGESVXX +* + END diff --git a/dspl/liblapack/SRC/zgetc2.f b/dspl/liblapack/SRC/zgetc2.f new file mode 100644 index 0000000..b790e34 --- /dev/null +++ b/dspl/liblapack/SRC/zgetc2.f @@ -0,0 +1,234 @@ +*> \brief \b ZGETC2 computes the LU factorization with complete pivoting of the general n-by-n matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETC2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETC2 computes an LU factorization, using complete pivoting, of the +*> n-by-n matrix A. The factorization has the form A = P * L * U * Q, +*> where P and Q are permutation matrices, L is lower triangular with +*> unit diagonal elements and U is upper triangular. +*> +*> This is a level 1 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the n-by-n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U*Q; the unit diagonal elements of L are not stored. +*> If U(k, k) appears to be less than SMIN, U(k, k) is given the +*> value of SMIN, giving a nonsingular perturbed system. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, U(k, k) is likely to produce overflow if +*> one tries to solve for x in Ax = b. So U is perturbed +*> to avoid the overflow. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IP, IPV, J, JP, JPV + DOUBLE PRECISION BIGNUM, EPS, SMIN, SMLNUM, XMAX +* .. +* .. External Subroutines .. + EXTERNAL ZGERU, ZSWAP, DLABAD +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Handle the case N=1 by itself +* + IF( N.EQ.1 ) THEN + IPIV( 1 ) = 1 + JPIV( 1 ) = 1 + IF( ABS( A( 1, 1 ) ).LT.SMLNUM ) THEN + INFO = 1 + A( 1, 1 ) = DCMPLX( SMLNUM, ZERO ) + END IF + RETURN + END IF +* +* Factorize A using complete pivoting. +* Set pivots less than SMIN to SMIN +* + DO 40 I = 1, N - 1 +* +* Find max element in matrix A +* + XMAX = ZERO + DO 20 IP = I, N + DO 10 JP = I, N + IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN + XMAX = ABS( A( IP, JP ) ) + IPV = IP + JPV = JP + END IF + 10 CONTINUE + 20 CONTINUE + IF( I.EQ.1 ) + $ SMIN = MAX( EPS*XMAX, SMLNUM ) +* +* Swap rows +* + IF( IPV.NE.I ) + $ CALL ZSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA ) + IPIV( I ) = IPV +* +* Swap columns +* + IF( JPV.NE.I ) + $ CALL ZSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 ) + JPIV( I ) = JPV +* +* Check for singularity +* + IF( ABS( A( I, I ) ).LT.SMIN ) THEN + INFO = I + A( I, I ) = DCMPLX( SMIN, ZERO ) + END IF + DO 30 J = I + 1, N + A( J, I ) = A( J, I ) / A( I, I ) + 30 CONTINUE + CALL ZGERU( N-I, N-I, -DCMPLX( ONE ), A( I+1, I ), 1, + $ A( I, I+1 ), LDA, A( I+1, I+1 ), LDA ) + 40 CONTINUE +* + IF( ABS( A( N, N ) ).LT.SMIN ) THEN + INFO = N + A( N, N ) = DCMPLX( SMIN, ZERO ) + END IF +* +* Set last pivots to N +* + IPIV( N ) = N + JPIV( N ) = N +* + RETURN +* +* End of ZGETC2 +* + END diff --git a/dspl/liblapack/SRC/zgetf2.f b/dspl/liblapack/SRC/zgetf2.f new file mode 100644 index 0000000..a98e36e --- /dev/null +++ b/dspl/liblapack/SRC/zgetf2.f @@ -0,0 +1,214 @@ +*> \brief \b ZGETF2 computes the LU factorization of a general m-by-n matrix using partial pivoting with row interchanges (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETF2 computes an LU factorization of a general m-by-n matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN + INTEGER I, J, JP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IZAMAX + EXTERNAL DLAMCH, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* + DO 10 J = 1, MIN( M, N ) +* +* Find pivot and test for singularity. +* + JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 ) + IPIV( J ) = JP + IF( A( JP, J ).NE.ZERO ) THEN +* +* Apply the interchange to columns 1:N. +* + IF( JP.NE.J ) + $ CALL ZSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA ) +* +* Compute elements J+1:M of J-th column. +* + IF( J.LT.M ) THEN + IF( ABS(A( J, J )) .GE. SFMIN ) THEN + CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) + ELSE + DO 20 I = 1, M-J + A( J+I, J ) = A( J+I, J ) / A( J, J ) + 20 CONTINUE + END IF + END IF +* + ELSE IF( INFO.EQ.0 ) THEN +* + INFO = J + END IF +* + IF( J.LT.MIN( M, N ) ) THEN +* +* Update trailing submatrix. +* + CALL ZGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), + $ LDA, A( J+1, J+1 ), LDA ) + END IF + 10 CONTINUE + RETURN +* +* End of ZGETF2 +* + END diff --git a/dspl/liblapack/SRC/zgetrf.f b/dspl/liblapack/SRC/zgetrf.f new file mode 100644 index 0000000..c7b07b6 --- /dev/null +++ b/dspl/liblapack/SRC/zgetrf.f @@ -0,0 +1,225 @@ +*> \brief \b ZGETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETRF computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the right-looking Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IINFO, J, JB, NB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZGETRF2, ZLASWP, ZTRSM +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN +* +* Use unblocked code. +* + CALL ZGETRF2( M, N, A, LDA, IPIV, INFO ) + ELSE +* +* Use blocked code. +* + DO 20 J = 1, MIN( M, N ), NB + JB = MIN( MIN( M, N )-J+1, NB ) +* +* Factor diagonal and subdiagonal blocks and test for exact +* singularity. +* + CALL ZGETRF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO ) +* +* Adjust INFO and the pivot indices. +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + J - 1 + DO 10 I = J, MIN( M, J+JB-1 ) + IPIV( I ) = J - 1 + IPIV( I ) + 10 CONTINUE +* +* Apply interchanges to columns 1:J-1. +* + CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 ) +* + IF( J+JB.LE.N ) THEN +* +* Apply interchanges to columns J+JB:N. +* + CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1, + $ IPIV, 1 ) +* +* Compute block row of U. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB, + $ N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ), + $ LDA ) + IF( J+JB.LE.M ) THEN +* +* Update trailing submatrix. +* + CALL ZGEMM( 'No transpose', 'No transpose', M-J-JB+1, + $ N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA, + $ A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ), + $ LDA ) + END IF + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZGETRF +* + END diff --git a/dspl/liblapack/SRC/zgetrf2.f b/dspl/liblapack/SRC/zgetrf2.f new file mode 100644 index 0000000..44e2731 --- /dev/null +++ b/dspl/liblapack/SRC/zgetrf2.f @@ -0,0 +1,274 @@ +*> \brief \b ZGETRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE ZGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETRF2 computes an LU factorization of a general M-by-N matrix A +*> using partial pivoting with row interchanges. +*> +*> The factorization has the form +*> A = P * L * U +*> where P is a permutation matrix, L is lower triangular with unit +*> diagonal elements (lower trapezoidal if m > n), and U is upper +*> triangular (upper trapezoidal if m < n). +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = min(m,n)/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> [ A11 ] +*> The subroutine calls itself to factor [ --- ], +*> [ A12 ] +*> [ A12 ] +*> do the swaps on [ --- ], solve A12, update A22, +*> [ A22 ] +*> +*> then calls itself to factor A22 and do the swaps on A21. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix to be factored. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (min(M,N)) +*> The pivot indices; for 1 <= i <= min(M,N), row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE ZGETRF2( M, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SFMIN + COMPLEX*16 TEMP + INTEGER I, IINFO, N1, N2 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER IZAMAX + EXTERNAL DLAMCH, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZSCAL, ZLASWP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN + + IF ( M.EQ.1 ) THEN +* +* Use unblocked code for one row case +* Just need to handle IPIV and INFO +* + IPIV( 1 ) = 1 + IF ( A(1,1).EQ.ZERO ) + $ INFO = 1 +* + ELSE IF( N.EQ.1 ) THEN +* +* Use unblocked code for one column case +* +* +* Compute machine safe minimum +* + SFMIN = DLAMCH('S') +* +* Find pivot and test for singularity +* + I = IZAMAX( M, A( 1, 1 ), 1 ) + IPIV( 1 ) = I + IF( A( I, 1 ).NE.ZERO ) THEN +* +* Apply the interchange +* + IF( I.NE.1 ) THEN + TEMP = A( 1, 1 ) + A( 1, 1 ) = A( I, 1 ) + A( I, 1 ) = TEMP + END IF +* +* Compute elements 2:M of the column +* + IF( ABS(A( 1, 1 )) .GE. SFMIN ) THEN + CALL ZSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 ) + ELSE + DO 10 I = 1, M-1 + A( 1+I, 1 ) = A( 1+I, 1 ) / A( 1, 1 ) + 10 CONTINUE + END IF +* + ELSE + INFO = 1 + END IF + + ELSE +* +* Use recursive code +* + N1 = MIN( M, N ) / 2 + N2 = N-N1 +* +* [ A11 ] +* Factor [ --- ] +* [ A21 ] +* + CALL ZGETRF2( M, N1, A, LDA, IPIV, IINFO ) + + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* [ A12 ] +* Apply interchanges to [ --- ] +* [ A22 ] +* + CALL ZLASWP( N2, A( 1, N1+1 ), LDA, 1, N1, IPIV, 1 ) +* +* Solve A12 +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA, + $ A( 1, N1+1 ), LDA ) +* +* Update A22 +* + CALL ZGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA ) +* +* Factor A22 +* + CALL ZGETRF2( M-N1, N2, A( N1+1, N1+1 ), LDA, IPIV( N1+1 ), + $ IINFO ) +* +* Adjust INFO and the pivot indices +* + IF ( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + N1 + DO 20 I = N1+1, MIN( M, N ) + IPIV( I ) = IPIV( I ) + N1 + 20 CONTINUE +* +* Apply interchanges to A21 +* + CALL ZLASWP( N1, A( 1, 1 ), LDA, N1+1, MIN( M, N), IPIV, 1 ) +* + END IF + RETURN +* +* End of ZGETRF2 +* + END diff --git a/dspl/liblapack/SRC/zgetri.f b/dspl/liblapack/SRC/zgetri.f new file mode 100644 index 0000000..deff71f --- /dev/null +++ b/dspl/liblapack/SRC/zgetri.f @@ -0,0 +1,262 @@ +*> \brief \b ZGETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETRI computes the inverse of a matrix using the LU factorization +*> computed by ZGETRF. +*> +*> This method inverts U and then computes inv(A) by solving the system +*> inv(A)*L = inv(U) for inv(A). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. +*> On exit, if INFO = 0, the inverse of the original matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from ZGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, then WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimal performance LWORK >= N*NB, where NB is +*> the optimal blocksize returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero; the matrix is +*> singular and its inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, + $ NBMIN, NN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRI', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular, +* and the inverse is not computed. +* + CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = MAX( LDWORK*NB, 1 ) + IF( LWORK.LT.IWS ) THEN + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) ) + END IF + ELSE + IWS = N + END IF +* +* Solve the equation inv(A)*L = inv(U) for inv(A). +* + IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + DO 20 J = N, 1, -1 +* +* Copy current column of L to WORK and replace with zeros. +* + DO 10 I = J + 1, N + WORK( I ) = A( I, J ) + A( I, J ) = ZERO + 10 CONTINUE +* +* Compute current column of inv(A). +* + IF( J.LT.N ) + $ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), + $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) + 20 CONTINUE + ELSE +* +* Use blocked code. +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 50 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) +* +* Copy current block column of L to WORK and replace with +* zeros. +* + DO 40 JJ = J, J + JB - 1 + DO 30 I = JJ + 1, N + WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) + A( I, JJ ) = ZERO + 30 CONTINUE + 40 CONTINUE +* +* Compute current block column of inv(A). +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'No transpose', N, JB, + $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, + $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) + CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, + $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) + 50 CONTINUE + END IF +* +* Apply column interchanges. +* + DO 60 J = N - 1, 1, -1 + JP = IPIV( J ) + IF( JP.NE.J ) + $ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) + 60 CONTINUE +* + WORK( 1 ) = IWS + RETURN +* +* End of ZGETRI +* + END diff --git a/dspl/liblapack/SRC/zgetrs.f b/dspl/liblapack/SRC/zgetrs.f new file mode 100644 index 0000000..5c0bd35 --- /dev/null +++ b/dspl/liblapack/SRC/zgetrs.f @@ -0,0 +1,225 @@ +*> \brief \b ZGETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETRS solves a system of linear equations +*> A * X = B, A**T * X = B, or A**H * X = B +*> with a general N-by-N matrix A using the LU factorization computed +*> by ZGETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from ZGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLASWP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B or A**H * X = B. +* +* Solve U**T *X = B or U**H *X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Solve L**T *X = B, or L**H *X = B overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A, + $ LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of ZGETRS +* + END diff --git a/dspl/liblapack/SRC/zgetsls.f b/dspl/liblapack/SRC/zgetsls.f new file mode 100644 index 0000000..5ce11ef --- /dev/null +++ b/dspl/liblapack/SRC/zgetsls.f @@ -0,0 +1,497 @@ +* Definition: +* =========== +* +* SUBROUTINE ZGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGETSLS solves overdetermined or underdetermined complex linear systems +*> involving an M-by-N matrix A, using a tall skinny QR or short wide LQ +*> factorization of A. It is assumed that A has full rank. +*> +*> +*> +*> The following options are provided: +*> +*> 1. If TRANS = 'N' and m >= n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A*X ||. +*> +*> 2. If TRANS = 'N' and m < n: find the minimum norm solution of +*> an underdetermined system A * X = B. +*> +*> 3. If TRANS = 'C' and m >= n: find the minimum norm solution of +*> an undetermined system A**T * X = B. +*> +*> 4. If TRANS = 'C' and m < n: find the least squares solution of +*> an overdetermined system, i.e., solve the least squares problem +*> minimize || B - A**T * X ||. +*> +*> Several right hand side vectors b and solution vectors x can be +*> handled in a single call; they are stored as the columns of the +*> M-by-NRHS right hand side matrix B and the N-by-NRHS solution +*> matrix X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': the linear system involves A; +*> = 'C': the linear system involves A**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of +*> columns of the matrices B and X. NRHS >=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, +*> A is overwritten by details of its QR or LQ +*> factorization as returned by ZGEQR or ZGELQ. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix B of right hand side vectors, stored +*> columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS +*> if TRANS = 'C'. +*> On exit, if INFO = 0, B is overwritten by the solution +*> vectors, stored columnwise: +*> if TRANS = 'N' and m >= n, rows 1 to n of B contain the least +*> squares solution vectors. +*> if TRANS = 'N' and m < n, rows 1 to N of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m >= n, rows 1 to M of B contain the +*> minimum norm solution vectors; +*> if TRANS = 'C' and m < n, rows 1 to M of B contain the +*> least squares solution vectors. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= MAX(1,M,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) contains optimal (or either minimal +*> or optimal, if query was assumed) LWORK. +*> See LWORK for details. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If LWORK = -1 or -2, then a workspace query is assumed. +*> If LWORK = -1, the routine calculates optimal size of WORK for the +*> optimal performance and returns this value in WORK(1). +*> If LWORK = -2, the routine calculates minimal size of WORK and +*> returns this value in WORK(1). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of the +*> triangular factor of A is zero, so that A does not have +*> full rank; the least squares solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEsolve +* +* ===================================================================== + SUBROUTINE ZGETSLS( TRANS, M, N, NRHS, A, LDA, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, LWORK, M, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, TRAN + INTEGER I, IASCL, IBSCL, J, MINMN, MAXMN, BROW, + $ SCLLEN, MNK, TSZO, TSZM, LWO, LWM, LW1, LW2, + $ WSIZEO, WSIZEM, INFO2 + DOUBLE PRECISION ANRM, BIGNUM, BNRM, SMLNUM, DUM( 1 ) + COMPLEX*16 TQ( 5 ), WORKQ( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLABAD, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL ZGEQR, ZGEMQR, ZLASCL, ZLASET, + $ ZTRTRS, XERBLA, ZGELQ, ZGEMLQ +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, INT +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + MINMN = MIN( M, N ) + MAXMN = MAX( M, N ) + MNK = MAX( MINMN, NRHS ) + TRAN = LSAME( TRANS, 'C' ) +* + LQUERY = ( LWORK.EQ.-1 .OR. LWORK.EQ.-2 ) + IF( .NOT.( LSAME( TRANS, 'N' ) .OR. + $ LSAME( TRANS, 'C' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size and minimum LWORK +* + IF( M.GE.N ) THEN + CALL ZGEQR( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL ZGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL ZGEQR( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL ZGEMQR( 'L', TRANS, M, NRHS, N, A, LDA, TQ, + $ TSZM, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + ELSE + CALL ZGELQ( M, N, A, LDA, TQ, -1, WORKQ, -1, INFO2 ) + TSZO = INT( TQ( 1 ) ) + LWO = INT( WORKQ( 1 ) ) + CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWO = MAX( LWO, INT( WORKQ( 1 ) ) ) + CALL ZGELQ( M, N, A, LDA, TQ, -2, WORKQ, -2, INFO2 ) + TSZM = INT( TQ( 1 ) ) + LWM = INT( WORKQ( 1 ) ) + CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ, + $ TSZO, B, LDB, WORKQ, -1, INFO2 ) + LWM = MAX( LWM, INT( WORKQ( 1 ) ) ) + WSIZEO = TSZO + LWO + WSIZEM = TSZM + LWM + END IF +* + IF( ( LWORK.LT.WSIZEM ).AND.( .NOT.LQUERY ) ) THEN + INFO = -10 + END IF +* + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGETSLS', -INFO ) + WORK( 1 ) = DBLE( WSIZEO ) + RETURN + END IF + IF( LQUERY ) THEN + IF( LWORK.EQ.-1 ) WORK( 1 ) = REAL( WSIZEO ) + IF( LWORK.EQ.-2 ) WORK( 1 ) = REAL( WSIZEM ) + RETURN + END IF + IF( LWORK.LT.WSIZEO ) THEN + LW1 = TSZM + LW2 = LWM + ELSE + LW1 = TSZO + LW2 = LWO + END IF +* +* Quick return if possible +* + IF( MIN( M, N, NRHS ).EQ.0 ) THEN + CALL ZLASET( 'FULL', MAX( M, N ), NRHS, CZERO, CZERO, + $ B, LDB ) + RETURN + END IF +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* +* Scale A, B if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', M, N, A, LDA, DUM ) + IASCL = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO ) + IASCL = 1 + ELSE IF( ANRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO ) + IASCL = 2 + ELSE IF( ANRM.EQ.ZERO ) THEN +* +* Matrix all zero. Return zero solution. +* + CALL ZLASET( 'F', MAXMN, NRHS, CZERO, CZERO, B, LDB ) + GO TO 50 + END IF +* + BROW = M + IF ( TRAN ) THEN + BROW = N + END IF + BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, DUM ) + IBSCL = 0 + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN +* +* Scale matrix norm up to SMLNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 1 + ELSE IF( BNRM.GT.BIGNUM ) THEN +* +* Scale matrix norm down to BIGNUM +* + CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB, + $ INFO ) + IBSCL = 2 + END IF +* + IF ( M.GE.N ) THEN +* +* compute QR factorization of A +* + CALL ZGEQR( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) + IF ( .NOT.TRAN ) THEN +* +* Least-Squares Problem min || A * X - B || +* +* B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS) +* + CALL ZGEMQR( 'L' , 'C', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'U', 'N', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) + IF( INFO.GT.0 ) THEN + RETURN + END IF + SCLLEN = N + ELSE +* +* Overdetermined system of equations A**T * X = B +* +* B(1:N,1:NRHS) := inv(R**T) * B(1:N,1:NRHS) +* + CALL ZTRTRS( 'U', 'C', 'N', N, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(N+1:M,1:NRHS) = CZERO +* + DO 20 J = 1, NRHS + DO 10 I = N + 1, M + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) +* + CALL ZGEMQR( 'L', 'N', M, NRHS, N, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* + SCLLEN = M +* + END IF +* + ELSE +* +* Compute LQ factorization of A +* + CALL ZGELQ( M, N, A, LDA, WORK( LW2+1 ), LW1, + $ WORK( 1 ), LW2, INFO ) +* +* workspace at least M, optimally M*NB. +* + IF( .NOT.TRAN ) THEN +* +* underdetermined system of equations A * X = B +* +* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'L', 'N', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* +* B(M+1:N,1:NRHS) = 0 +* + DO 40 J = 1, NRHS + DO 30 I = M + 1, N + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE +* +* B(1:N,1:NRHS) := Q(1:N,:)**T * B(1:M,1:NRHS) +* + CALL ZGEMLQ( 'L', 'C', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* + SCLLEN = N +* + ELSE +* +* overdetermined system min || A**T * X - B || +* +* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) +* + CALL ZGEMLQ( 'L', 'N', N, NRHS, M, A, LDA, + $ WORK( LW2+1 ), LW1, B, LDB, WORK( 1 ), LW2, + $ INFO ) +* +* workspace at least NRHS, optimally NRHS*NB +* +* B(1:M,1:NRHS) := inv(L**T) * B(1:M,1:NRHS) +* + CALL ZTRTRS( 'L', 'C', 'N', M, NRHS, + $ A, LDA, B, LDB, INFO ) +* + IF( INFO.GT.0 ) THEN + RETURN + END IF +* + SCLLEN = M +* + END IF +* + END IF +* +* Undo scaling +* + IF( IASCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IASCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF + IF( IBSCL.EQ.1 ) THEN + CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + ELSE IF( IBSCL.EQ.2 ) THEN + CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB, + $ INFO ) + END IF +* + 50 CONTINUE + WORK( 1 ) = DBLE( TSZO + LWO ) + RETURN +* +* End of ZGETSLS +* + END diff --git a/dspl/liblapack/SRC/zggbak.f b/dspl/liblapack/SRC/zggbak.f new file mode 100644 index 0000000..cb10b4f --- /dev/null +++ b/dspl/liblapack/SRC/zggbak.f @@ -0,0 +1,307 @@ +*> \brief \b ZGGBAK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGBAK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, +* LDV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB, SIDE +* INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION LSCALE( * ), RSCALE( * ) +* COMPLEX*16 V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGBAK forms the right or left eigenvectors of a complex generalized +*> eigenvalue problem A*x = lambda*B*x, by backward transformation on +*> the computed eigenvectors of the balanced pair of matrices output by +*> ZGGBAL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the type of backward transformation required: +*> = 'N': do nothing, return immediately; +*> = 'P': do backward transformation for permutation only; +*> = 'S': do backward transformation for scaling only; +*> = 'B': do backward transformations for both permutation and +*> scaling. +*> JOB must be the same as the argument JOB supplied to ZGGBAL. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': V contains right eigenvectors; +*> = 'L': V contains left eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrix V. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> The integers ILO and IHI determined by ZGGBAL. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the left side of A and B, as returned by ZGGBAL. +*> \endverbatim +*> +*> \param[in] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and/or scaling factors applied +*> to the right side of A and B, as returned by ZGGBAL. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix V. M >= 0. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,M) +*> On entry, the matrix of right or left eigenvectors to be +*> transformed, as returned by ZTGEVC. +*> On exit, V is overwritten by the transformed eigenvectors. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the matrix V. LDV >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. Ward, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V, + $ LDV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB, SIDE + INTEGER IHI, ILO, INFO, LDV, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ) + COMPLEX*16 V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFTV, RIGHTV + INTEGER I, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, INT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + RIGHTV = LSAME( SIDE, 'R' ) + LEFTV = LSAME( SIDE, 'L' ) +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( N.EQ.0 .AND. IHI.EQ.0 .AND. ILO.NE.1 ) THEN + INFO = -4 + ELSE IF( N.GT.0 .AND. ( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) ) + $ THEN + INFO = -5 + ELSE IF( N.EQ.0 .AND. ILO.EQ.1 .AND. IHI.NE.0 ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -8 + ELSE IF( LDV.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGBAK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( M.EQ.0 ) + $ RETURN + IF( LSAME( JOB, 'N' ) ) + $ RETURN +* + IF( ILO.EQ.IHI ) + $ GO TO 30 +* +* Backward balance +* + IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward transformation on right eigenvectors +* + IF( RIGHTV ) THEN + DO 10 I = ILO, IHI + CALL ZDSCAL( M, RSCALE( I ), V( I, 1 ), LDV ) + 10 CONTINUE + END IF +* +* Backward transformation on left eigenvectors +* + IF( LEFTV ) THEN + DO 20 I = ILO, IHI + CALL ZDSCAL( M, LSCALE( I ), V( I, 1 ), LDV ) + 20 CONTINUE + END IF + END IF +* +* Backward permutation +* + 30 CONTINUE + IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN +* +* Backward permutation on right eigenvectors +* + IF( RIGHTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 50 + DO 40 I = ILO - 1, 1, -1 + K = INT(RSCALE( I )) + IF( K.EQ.I ) + $ GO TO 40 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 40 CONTINUE +* + 50 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 70 + DO 60 I = IHI + 1, N + K = INT(RSCALE( I )) + IF( K.EQ.I ) + $ GO TO 60 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 60 CONTINUE + END IF +* +* Backward permutation on left eigenvectors +* + 70 CONTINUE + IF( LEFTV ) THEN + IF( ILO.EQ.1 ) + $ GO TO 90 + DO 80 I = ILO - 1, 1, -1 + K = INT(LSCALE( I )) + IF( K.EQ.I ) + $ GO TO 80 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 80 CONTINUE +* + 90 CONTINUE + IF( IHI.EQ.N ) + $ GO TO 110 + DO 100 I = IHI + 1, N + K = INT(LSCALE( I )) + IF( K.EQ.I ) + $ GO TO 100 + CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV ) + 100 CONTINUE + END IF + END IF +* + 110 CONTINUE +* + RETURN +* +* End of ZGGBAK +* + END diff --git a/dspl/liblapack/SRC/zggbal.f b/dspl/liblapack/SRC/zggbal.f new file mode 100644 index 0000000..beca512 --- /dev/null +++ b/dspl/liblapack/SRC/zggbal.f @@ -0,0 +1,572 @@ +*> \brief \b ZGGBAL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGBAL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, +* RSCALE, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOB +* INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION LSCALE( * ), RSCALE( * ), WORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGBAL balances a pair of general complex matrices (A,B). This +*> involves, first, permuting A and B by similarity transformations to +*> isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N +*> elements on the diagonal; and second, applying a diagonal similarity +*> transformation to rows and columns ILO to IHI to make the rows +*> and columns as close in norm as possible. Both steps are optional. +*> +*> Balancing may reduce the 1-norm of the matrices, and improve the +*> accuracy of the computed eigenvalues and/or eigenvectors in the +*> generalized eigenvalue problem A*x = lambda*B*x. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies the operations to be performed on A and B: +*> = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 +*> and RSCALE(I) = 1.0 for i=1,...,N; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the input matrix A. +*> On exit, A is overwritten by the balanced matrix. +*> If JOB = 'N', A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the input matrix B. +*> On exit, B is overwritten by the balanced matrix. +*> If JOB = 'N', B is not referenced. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are set to integers such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If JOB = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If P(j) is the index of the +*> row interchanged with row j, and D(j) is the scaling factor +*> applied to row j, then +*> LSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If P(j) is the index of the +*> column interchanged with column j, and D(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = P(j) for J = 1,...,ILO-1 +*> = D(j) for J = ILO,...,IHI +*> = P(j) for J = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (lwork) +*> lwork must be at least max(1,6*N) when JOB = 'S' or 'B', and +*> at least 1 when JOB = 'N' or 'P'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GBcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> See R.C. WARD, Balancing the generalized eigenvalue problem, +*> SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE, + $ RSCALE, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOB + INTEGER IHI, ILO, INFO, LDA, LDB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION LSCALE( * ), RSCALE( * ), WORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION THREE, SCLFAC + PARAMETER ( THREE = 3.0D+0, SCLFAC = 1.0D+1 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1, + $ K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN, + $ M, NR, NRP2 + DOUBLE PRECISION ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2, + $ COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX, + $ SFMIN, SUM, T, TA, TB, TC + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DDOT, DLAMCH + EXTERNAL LSAME, IZAMAX, DDOT, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DAXPY, DSCAL, XERBLA, ZDSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, INT, LOG10, MAX, MIN, SIGN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND. + $ .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGBAL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + ILO = 1 + IHI = N + RETURN + END IF +* + IF( N.EQ.1 ) THEN + ILO = 1 + IHI = N + LSCALE( 1 ) = ONE + RSCALE( 1 ) = ONE + RETURN + END IF +* + IF( LSAME( JOB, 'N' ) ) THEN + ILO = 1 + IHI = N + DO 10 I = 1, N + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 10 CONTINUE + RETURN + END IF +* + K = 1 + L = N + IF( LSAME( JOB, 'S' ) ) + $ GO TO 190 +* + GO TO 30 +* +* Permute the matrices A and B to isolate the eigenvalues. +* +* Find row with one nonzero in columns 1 through L +* + 20 CONTINUE + L = LM1 + IF( L.NE.1 ) + $ GO TO 30 +* + RSCALE( 1 ) = 1 + LSCALE( 1 ) = 1 + GO TO 190 +* + 30 CONTINUE + LM1 = L - 1 + DO 80 I = L, 1, -1 + DO 40 J = 1, LM1 + JP1 = J + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 50 + 40 CONTINUE + J = L + GO TO 70 +* + 50 CONTINUE + DO 60 J = JP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 80 + 60 CONTINUE + J = JP1 - 1 +* + 70 CONTINUE + M = L + IFLOW = 1 + GO TO 160 + 80 CONTINUE + GO TO 100 +* +* Find column with one nonzero in rows K through N +* + 90 CONTINUE + K = K + 1 +* + 100 CONTINUE + DO 150 J = K, L + DO 110 I = K, LM1 + IP1 = I + 1 + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 120 + 110 CONTINUE + I = L + GO TO 140 + 120 CONTINUE + DO 130 I = IP1, L + IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO ) + $ GO TO 150 + 130 CONTINUE + I = IP1 - 1 + 140 CONTINUE + M = K + IFLOW = 2 + GO TO 160 + 150 CONTINUE + GO TO 190 +* +* Permute rows M and I +* + 160 CONTINUE + LSCALE( M ) = I + IF( I.EQ.M ) + $ GO TO 170 + CALL ZSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA ) + CALL ZSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB ) +* +* Permute columns M and J +* + 170 CONTINUE + RSCALE( M ) = J + IF( J.EQ.M ) + $ GO TO 180 + CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 ) + CALL ZSWAP( L, B( 1, J ), 1, B( 1, M ), 1 ) +* + 180 CONTINUE + GO TO ( 20, 90 )IFLOW +* + 190 CONTINUE + ILO = K + IHI = L +* + IF( LSAME( JOB, 'P' ) ) THEN + DO 195 I = ILO, IHI + LSCALE( I ) = ONE + RSCALE( I ) = ONE + 195 CONTINUE + RETURN + END IF +* + IF( ILO.EQ.IHI ) + $ RETURN +* +* Balance the submatrix in rows ILO to IHI. +* + NR = IHI - ILO + 1 + DO 200 I = ILO, IHI + RSCALE( I ) = ZERO + LSCALE( I ) = ZERO +* + WORK( I ) = ZERO + WORK( I+N ) = ZERO + WORK( I+2*N ) = ZERO + WORK( I+3*N ) = ZERO + WORK( I+4*N ) = ZERO + WORK( I+5*N ) = ZERO + 200 CONTINUE +* +* Compute right side vector in resulting linear equations +* + BASL = LOG10( SCLFAC ) + DO 240 I = ILO, IHI + DO 230 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) THEN + TA = ZERO + GO TO 210 + END IF + TA = LOG10( CABS1( A( I, J ) ) ) / BASL +* + 210 CONTINUE + IF( B( I, J ).EQ.CZERO ) THEN + TB = ZERO + GO TO 220 + END IF + TB = LOG10( CABS1( B( I, J ) ) ) / BASL +* + 220 CONTINUE + WORK( I+4*N ) = WORK( I+4*N ) - TA - TB + WORK( J+5*N ) = WORK( J+5*N ) - TA - TB + 230 CONTINUE + 240 CONTINUE +* + COEF = ONE / DBLE( 2*NR ) + COEF2 = COEF*COEF + COEF5 = HALF*COEF2 + NRP2 = NR + 2 + BETA = ZERO + IT = 1 +* +* Start generalized conjugate gradient iteration +* + 250 CONTINUE +* + GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) + + $ DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 ) +* + EW = ZERO + EWC = ZERO + DO 260 I = ILO, IHI + EW = EW + WORK( I+4*N ) + EWC = EWC + WORK( I+5*N ) + 260 CONTINUE +* + GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2 + IF( GAMMA.EQ.ZERO ) + $ GO TO 350 + IF( IT.NE.1 ) + $ BETA = GAMMA / PGAMMA + T = COEF5*( EWC-THREE*EW ) + TC = COEF5*( EW-THREE*EWC ) +* + CALL DSCAL( NR, BETA, WORK( ILO ), 1 ) + CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 ) +* + CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 ) + CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 ) +* + DO 270 I = ILO, IHI + WORK( I ) = WORK( I ) + TC + WORK( I+N ) = WORK( I+N ) + T + 270 CONTINUE +* +* Apply matrix to vector +* + DO 300 I = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 290 J = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 280 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 280 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 290 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( J ) + 290 CONTINUE + WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM + 300 CONTINUE +* + DO 330 J = ILO, IHI + KOUNT = 0 + SUM = ZERO + DO 320 I = ILO, IHI + IF( A( I, J ).EQ.CZERO ) + $ GO TO 310 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 310 CONTINUE + IF( B( I, J ).EQ.CZERO ) + $ GO TO 320 + KOUNT = KOUNT + 1 + SUM = SUM + WORK( I+N ) + 320 CONTINUE + WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM + 330 CONTINUE +* + SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) + + $ DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 ) + ALPHA = GAMMA / SUM +* +* Determine correction to current iteration +* + CMAX = ZERO + DO 340 I = ILO, IHI + COR = ALPHA*WORK( I+N ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + LSCALE( I ) = LSCALE( I ) + COR + COR = ALPHA*WORK( I ) + IF( ABS( COR ).GT.CMAX ) + $ CMAX = ABS( COR ) + RSCALE( I ) = RSCALE( I ) + COR + 340 CONTINUE + IF( CMAX.LT.HALF ) + $ GO TO 350 +* + CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 ) + CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 ) +* + PGAMMA = GAMMA + IT = IT + 1 + IF( IT.LE.NRP2 ) + $ GO TO 250 +* +* End generalized conjugate gradient iteration +* + 350 CONTINUE + SFMIN = DLAMCH( 'S' ) + SFMAX = ONE / SFMIN + LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE ) + LSFMAX = INT( LOG10( SFMAX ) / BASL ) + DO 360 I = ILO, IHI + IRAB = IZAMAX( N-ILO+1, A( I, ILO ), LDA ) + RAB = ABS( A( I, IRAB+ILO-1 ) ) + IRAB = IZAMAX( N-ILO+1, B( I, ILO ), LDB ) + RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) ) + LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE ) + IR = INT(LSCALE( I ) + SIGN( HALF, LSCALE( I ) )) + IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB ) + LSCALE( I ) = SCLFAC**IR + ICAB = IZAMAX( IHI, A( 1, I ), 1 ) + CAB = ABS( A( ICAB, I ) ) + ICAB = IZAMAX( IHI, B( 1, I ), 1 ) + CAB = MAX( CAB, ABS( B( ICAB, I ) ) ) + LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE ) + JC = INT(RSCALE( I ) + SIGN( HALF, RSCALE( I ) )) + JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB ) + RSCALE( I ) = SCLFAC**JC + 360 CONTINUE +* +* Row scaling of matrices A and B +* + DO 370 I = ILO, IHI + CALL ZDSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA ) + CALL ZDSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB ) + 370 CONTINUE +* +* Column scaling of matrices A and B +* + DO 380 J = ILO, IHI + CALL ZDSCAL( IHI, RSCALE( J ), A( 1, J ), 1 ) + CALL ZDSCAL( IHI, RSCALE( J ), B( 1, J ), 1 ) + 380 CONTINUE +* + RETURN +* +* End of ZGGBAL +* + END diff --git a/dspl/liblapack/SRC/zgges.f b/dspl/liblapack/SRC/zgges.f new file mode 100644 index 0000000..555abcd --- /dev/null +++ b/dspl/liblapack/SRC/zgges.f @@ -0,0 +1,599 @@ +*> \brief ZGGES computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGES + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, +* SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, +* LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGES computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the generalized complex Schur +*> form (S, T), and optionally left and/or right Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T. The leading +*> columns of VSL and VSR then form an unitary basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> ZGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0, and even for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if S +*> and T are upper triangular and, in addition, the diagonal elements +*> of T are non-negative real numbers. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue ALPHA(j)/BETA(j) is selected if +*> SELCTG(ALPHA(j),BETA(j)) is true. +*> +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+2 (See INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +*> j=1,...,N are the diagonals of the complex Schur form (A,B) +*> output by ZGGES. The BETA(j) will be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX*16 array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX*16 array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in ZHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in ZTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB, + $ SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, + $ LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN, + $ LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, + $ ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = MAX( 1, N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) ) + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'ZUNMQR', ' ', N, 1, N, -1 ) ) + IF( ILVSL ) THEN + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGES ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* (Workspace: none needed) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL ZTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZGGES +* + END diff --git a/dspl/liblapack/SRC/zgges3.f b/dspl/liblapack/SRC/zgges3.f new file mode 100644 index 0000000..08557b1 --- /dev/null +++ b/dspl/liblapack/SRC/zgges3.f @@ -0,0 +1,595 @@ +*> \brief ZGGES3 computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGES3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, +* $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, +* $ WORK, LWORK, RWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGES3 computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the generalized complex Schur +*> form (S, T), and optionally left and/or right Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T. The leading +*> columns of VSL and VSR then form an unitary basis for the +*> corresponding left and right eigenspaces (deflating subspaces). +*> +*> (If only the generalized eigenvalues are needed, use the driver +*> ZGGEV instead, which is faster.) +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0, and even for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if S +*> and T are upper triangular and, in addition, the diagonal elements +*> of T are non-negative real numbers. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> An eigenvalue ALPHA(j)/BETA(j) is selected if +*> SELCTG(ALPHA(j),BETA(j)) is true. +*> +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+2 (See INFO below). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j), j=1,...,N and BETA(j), +*> j=1,...,N are the diagonals of the complex Schur form (A,B) +*> output by ZGGES3. The BETA(j) will be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX*16 array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >= 1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX*16 array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in ZHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in ZTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGGES3( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, + $ LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, + $ WORK, LWORK, RWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTST + INTEGER I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, + $ ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL, + $ PVSR, SMLNUM +* .. +* .. Local Arrays .. + INTEGER IDUM( 1 ) + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHD3, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, + $ ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL ZGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 1, N + INT ( WORK( 1 ) ) ) + CALL ZUNMQR( 'L', 'C', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + IF( ILVSL ) THEN + CALL ZUNGQR( N, N, N, VSL, LDVSL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + END IF + CALL ZGGHD3( JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N + INT ( WORK( 1 ) ) ) + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, INT ( WORK( 1 ) ) ) + IF( WANTST ) THEN + CALL ZTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, + $ PVSL, PVSR, DIF, WORK, -1, IDUM, 1, IERR ) + LWKOPT = MAX( LWKOPT, INT ( WORK( 1 ) ) ) + END IF + WORK( 1 ) = DCMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGES3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* + CALL ZGGHD3( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, WORK( IWRK ), LWORK+1-IWRK, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* + IWRK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 30 + END IF +* +* Sort eigenvalues ALPHA/BETA if desired +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before selecting +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* + CALL ZTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA, + $ BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR ) + IF( IERR.EQ.1 ) + $ INFO = N + 3 +* + END IF +* +* Apply back-permutation to VSL and VSR +* + IF( ILVSL ) + $ CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) + IF( ILVSR ) + $ CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 20 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 20 CONTINUE +* + END IF +* + 30 CONTINUE +* + WORK( 1 ) = DCMPLX( LWKOPT ) +* + RETURN +* +* End of ZGGES3 +* + END diff --git a/dspl/liblapack/SRC/zggesx.f b/dspl/liblapack/SRC/zggesx.f new file mode 100644 index 0000000..6615234 --- /dev/null +++ b/dspl/liblapack/SRC/zggesx.f @@ -0,0 +1,718 @@ +*> \brief ZGGESX computes the eigenvalues, the Schur form, and, optionally, the matrix of Schur vectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGESX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, +* B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, +* LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, +* IWORK, LIWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVSL, JOBVSR, SENSE, SORT +* INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, +* $ SDIM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), +* $ WORK( * ) +* .. +* .. Function Arguments .. +* LOGICAL SELCTG +* EXTERNAL SELCTG +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGESX computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, the complex Schur form (S,T), +*> and, optionally, the left and/or right matrices of Schur vectors (VSL +*> and VSR). This gives the generalized Schur factorization +*> +*> (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H ) +*> +*> where (VSR)**H is the conjugate-transpose of VSR. +*> +*> Optionally, it also orders the eigenvalues so that a selected cluster +*> of eigenvalues appears in the leading diagonal blocks of the upper +*> triangular matrix S and the upper triangular matrix T; computes +*> a reciprocal condition number for the average of the selected +*> eigenvalues (RCONDE); and computes a reciprocal condition number for +*> the right and left deflating subspaces corresponding to the selected +*> eigenvalues (RCONDV). The leading columns of VSL and VSR then form +*> an orthonormal basis for the corresponding left and right eigenspaces +*> (deflating subspaces). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar w +*> or a ratio alpha/beta = w, such that A - w*B is singular. It is +*> usually represented as the pair (alpha,beta), as there is a +*> reasonable interpretation for beta=0 or for both being zero. +*> +*> A pair of matrices (S,T) is in generalized complex Schur form if T is +*> upper triangular with non-negative diagonal and S is upper +*> triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVSL +*> \verbatim +*> JOBVSL is CHARACTER*1 +*> = 'N': do not compute the left Schur vectors; +*> = 'V': compute the left Schur vectors. +*> \endverbatim +*> +*> \param[in] JOBVSR +*> \verbatim +*> JOBVSR is CHARACTER*1 +*> = 'N': do not compute the right Schur vectors; +*> = 'V': compute the right Schur vectors. +*> \endverbatim +*> +*> \param[in] SORT +*> \verbatim +*> SORT is CHARACTER*1 +*> Specifies whether or not to order the eigenvalues on the +*> diagonal of the generalized Schur form. +*> = 'N': Eigenvalues are not ordered; +*> = 'S': Eigenvalues are ordered (see SELCTG). +*> \endverbatim +*> +*> \param[in] SELCTG +*> \verbatim +*> SELCTG is a LOGICAL FUNCTION of two COMPLEX*16 arguments +*> SELCTG must be declared EXTERNAL in the calling subroutine. +*> If SORT = 'N', SELCTG is not referenced. +*> If SORT = 'S', SELCTG is used to select eigenvalues to sort +*> to the top left of the Schur form. +*> Note that a selected complex eigenvalue may no longer satisfy +*> SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since +*> ordering may change the value of complex eigenvalues +*> (especially if the eigenvalue is ill-conditioned), in this +*> case INFO is set to N+3 see INFO below). +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N' : None are computed; +*> = 'E' : Computed for average of selected eigenvalues only; +*> = 'V' : Computed for selected deflating subspaces only; +*> = 'B' : Computed for both. +*> If SENSE = 'E', 'V', or 'B', SORT must equal 'S'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VSL, and VSR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the first of the pair of matrices. +*> On exit, A has been overwritten by its generalized Schur +*> form S. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the second of the pair of matrices. +*> On exit, B has been overwritten by its generalized Schur +*> form T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] SDIM +*> \verbatim +*> SDIM is INTEGER +*> If SORT = 'N', SDIM = 0. +*> If SORT = 'S', SDIM = number of eigenvalues (after sorting) +*> for which SELCTG is true. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. ALPHA(j) and BETA(j),j=1,...,N are +*> the diagonals of the complex Schur form (S,T). BETA(j) will +*> be non-negative real. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VSL +*> \verbatim +*> VSL is COMPLEX*16 array, dimension (LDVSL,N) +*> If JOBVSL = 'V', VSL will contain the left Schur vectors. +*> Not referenced if JOBVSL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSL +*> \verbatim +*> LDVSL is INTEGER +*> The leading dimension of the matrix VSL. LDVSL >=1, and +*> if JOBVSL = 'V', LDVSL >= N. +*> \endverbatim +*> +*> \param[out] VSR +*> \verbatim +*> VSR is COMPLEX*16 array, dimension (LDVSR,N) +*> If JOBVSR = 'V', VSR will contain the right Schur vectors. +*> Not referenced if JOBVSR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVSR +*> \verbatim +*> LDVSR is INTEGER +*> The leading dimension of the matrix VSR. LDVSR >= 1, and +*> if JOBVSR = 'V', LDVSR >= N. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension ( 2 ) +*> If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the +*> reciprocal condition numbers for the average of the selected +*> eigenvalues. +*> Not referenced if SENSE = 'N' or 'V'. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension ( 2 ) +*> If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the +*> reciprocal condition number for the selected deflating +*> subspaces. +*> Not referenced if SENSE = 'N' or 'E'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N = 0, LWORK >= 1, else if SENSE = 'E', 'V', or 'B', +*> LWORK >= MAX(1,2*N,2*SDIM*(N-SDIM)), else +*> LWORK >= MAX(1,2*N). Note that 2*SDIM*(N-SDIM) <= N*N/2. +*> Note also that an error is only returned if +*> LWORK < MAX(1,2*N), but if SENSE = 'E' or 'V' or 'B' this may +*> not be large enough. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the bound on the optimal size of the WORK +*> array and the minimum size of the IWORK array, returns these +*> values as the first entries of the WORK and IWORK arrays, and +*> no error message related to LWORK or LIWORK is issued by +*> XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension ( 8*N ) +*> Real workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If SENSE = 'N' or N = 0, LIWORK >= 1, otherwise +*> LIWORK >= N+2. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the bound on the optimal size of the +*> WORK array and the minimum size of the IWORK array, returns +*> these values as the first entries of the WORK and IWORK +*> arrays, and no error message related to LWORK or LIWORK is +*> issued by XERBLA. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> Not referenced if SORT = 'N'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. (A,B) are not in Schur +*> form, but ALPHA(j) and BETA(j) should be correct for +*> j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in ZHGEQZ +*> =N+2: after reordering, roundoff changed values of +*> some complex eigenvalues so that leading +*> eigenvalues in the Generalized Schur form no +*> longer satisfy SELCTG=.TRUE. This could also +*> be caused due to scaling. +*> =N+3: reordering failed in ZTGSEN. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA, + $ B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR, + $ LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK, + $ IWORK, LIWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBVSL, JOBVSR, SENSE, SORT + INTEGER INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N, + $ SDIM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION RCONDE( 2 ), RCONDV( 2 ), RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ), + $ WORK( * ) +* .. +* .. Function Arguments .. + LOGICAL SELCTG + EXTERNAL SELCTG +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL, + $ LQUERY, WANTSB, WANTSE, WANTSN, WANTST, WANTSV + INTEGER I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR, + $ ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, + $ LIWMIN, LWRK, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL, + $ PR, SMLNUM +* .. +* .. Local Arrays .. + DOUBLE PRECISION DIF( 2 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, + $ ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVSL, 'N' ) ) THEN + IJOBVL = 1 + ILVSL = .FALSE. + ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN + IJOBVL = 2 + ILVSL = .TRUE. + ELSE + IJOBVL = -1 + ILVSL = .FALSE. + END IF +* + IF( LSAME( JOBVSR, 'N' ) ) THEN + IJOBVR = 1 + ILVSR = .FALSE. + ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN + IJOBVR = 2 + ILVSR = .TRUE. + ELSE + IJOBVR = -1 + ILVSR = .FALSE. + END IF +* + WANTST = LSAME( SORT, 'S' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) + IF( WANTSN ) THEN + IJOB = 0 + ELSE IF( WANTSE ) THEN + IJOB = 1 + ELSE IF( WANTSV ) THEN + IJOB = 2 + ELSE IF( WANTSB ) THEN + IJOB = 4 + END IF +* +* Test the input arguments +* + INFO = 0 + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR. + $ ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN + INFO = -15 + ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN + INFO = -17 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV.) +* + IF( INFO.EQ.0 ) THEN + IF( N.GT.0) THEN + MINWRK = 2*N + MAXWRK = N*(1 + ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, N*( 1 + + $ ILAENV( 1, 'ZUNMQR', ' ', N, 1, N, -1 ) ) ) + IF( ILVSL ) THEN + MAXWRK = MAX( MAXWRK, N*( 1 + + $ ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, -1 ) ) ) + END IF + LWRK = MAXWRK + IF( IJOB.GE.1 ) + $ LWRK = MAX( LWRK, N*N/2 ) + ELSE + MINWRK = 1 + MAXWRK = 1 + LWRK = 1 + END IF + WORK( 1 ) = LWRK + IF( WANTSN .OR. N.EQ.0 ) THEN + LIWMIN = 1 + ELSE + LIWMIN = N + 2 + END IF + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY) THEN + INFO = -24 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGESX', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SDIM = 0 + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrix to make it more nearly triangular +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + ICOLS = N + 1 - ILO + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VSL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVSL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VSL( ILO+1, ILO ), LDVSL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VSR +* + IF( ILVSR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL, + $ LDVSL, VSR, LDVSR, IERR ) +* + SDIM = 0 +* +* Perform QZ algorithm, computing Schur vectors if desired +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 40 + END IF +* +* Sort eigenvalues ALPHA/BETA and compute the reciprocal of +* condition number(s) +* + IF( WANTST ) THEN +* +* Undo scaling on eigenvalues before SELCTGing +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* +* Select eigenvalues +* + DO 10 I = 1, N + BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) ) + 10 CONTINUE +* +* Reorder eigenvalues, transform Generalized Schur vectors, and +* compute reciprocal condition numbers +* (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM)) +* otherwise, need 1 ) +* + CALL ZTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, + $ ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR, + $ DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK, + $ IERR ) +* + IF( IJOB.GE.1 ) + $ MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) ) + IF( IERR.EQ.-21 ) THEN +* +* not enough complex workspace +* + INFO = -21 + ELSE + IF( IJOB.EQ.1 .OR. IJOB.EQ.4 ) THEN + RCONDE( 1 ) = PL + RCONDE( 2 ) = PR + END IF + IF( IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + RCONDV( 1 ) = DIF( 1 ) + RCONDV( 2 ) = DIF( 2 ) + END IF + IF( IERR.EQ.1 ) + $ INFO = N + 3 + END IF +* + END IF +* +* Apply permutation to VSL and VSR +* (Workspace: none needed) +* + IF( ILVSL ) + $ CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSL, LDVSL, IERR ) +* + IF( ILVSR ) + $ CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VSR, LDVSR, IERR ) +* +* Undo scaling +* + IF( ILASCL ) THEN + CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR ) + CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) + END IF +* + IF( ILBSCL ) THEN + CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR ) + CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) + END IF +* + IF( WANTST ) THEN +* +* Check if reordering is correct +* + LASTSL = .TRUE. + SDIM = 0 + DO 30 I = 1, N + CURSL = SELCTG( ALPHA( I ), BETA( I ) ) + IF( CURSL ) + $ SDIM = SDIM + 1 + IF( CURSL .AND. .NOT.LASTSL ) + $ INFO = N + 2 + LASTSL = CURSL + 30 CONTINUE +* + END IF +* + 40 CONTINUE +* + WORK( 1 ) = MAXWRK + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZGGESX +* + END diff --git a/dspl/liblapack/SRC/zggev.f b/dspl/liblapack/SRC/zggev.f new file mode 100644 index 0000000..946de6c --- /dev/null +++ b/dspl/liblapack/SRC/zggev.f @@ -0,0 +1,558 @@ +*> \brief ZGGEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGEV computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right generalized eigenvector v(j) corresponding to the +*> generalized eigenvalue lambda(j) of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left generalized eigenvector u(j) corresponding to the +*> generalized eigenvalues lambda(j) of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> For good performance, LWORK must generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: =N+1: other then QZ iteration failed in DHGEQZ, +*> =N+2: error return from DTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKMIN, LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, + $ ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 2*N ) + LWKOPT = MAX( 1, N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) ) + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'ZUNMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + LWKOPT = MAX( LWKOPT, N + + $ N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, -1 ) ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* (Real Workspace: need 6*N) +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* (Complex Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* (Real Workspace: need 2*N) +* (Complex Workspace: need 2*N) +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + 70 CONTINUE +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZGGEV +* + END diff --git a/dspl/liblapack/SRC/zggev3.f b/dspl/liblapack/SRC/zggev3.f new file mode 100644 index 0000000..2e88ade --- /dev/null +++ b/dspl/liblapack/SRC/zggev3.f @@ -0,0 +1,559 @@ +*> \brief ZGGEV3 computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices (blocked algorithm) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGEV3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, +* VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBVL, JOBVR +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGEV3 computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B), the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right generalized eigenvector v(j) corresponding to the +*> generalized eigenvalue lambda(j) of (A,B) satisfies +*> +*> A * v(j) = lambda(j) * B * v(j). +*> +*> The left generalized eigenvector u(j) corresponding to the +*> generalized eigenvalues lambda(j) of (A,B) satisfies +*> +*> u(j)**H * A = lambda(j) * u(j)**H * B +*> +*> where u(j)**H is the conjugate-transpose of u(j). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the +*> generalized eigenvalues. +*> +*> Note: the quotients ALPHA(j)/BETA(j) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio alpha/beta. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector is scaled so the largest component has +*> abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (8*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> =1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be +*> correct for j=INFO+1,...,N. +*> > N: =N+1: other then QZ iteration failed in DHGEQZ, +*> =N+2: error return from DTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complex16GEeigen +* +* ===================================================================== + SUBROUTINE ZGGEV3( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA, + $ VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBVL, JOBVR + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY + CHARACTER CHTEMP + INTEGER ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO, + $ IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR, + $ LWKOPT + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHD3, + $ ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, + $ ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( IJOBVL.LE.0 ) THEN + INFO = -1 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -11 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL ZGEQRF( N, N, B, LDB, WORK, WORK, -1, IERR ) + LWKOPT = MAX( 1, N+INT( WORK( 1 ) ) ) + CALL ZUNMQR( 'L', 'C', N, N, N, B, LDB, WORK, A, LDA, WORK, + $ -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + IF( ILVL ) THEN + CALL ZUNGQR( N, N, N, VL, LDVL, WORK, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + END IF + IF( ILV ) THEN + CALL ZGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + CALL ZHGEQZ( 'S', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + ELSE + CALL ZGGHD3( JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK, -1, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + CALL ZHGEQZ( 'E', JOBVL, JOBVR, N, 1, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK, -1, + $ RWORK, IERR ) + LWKOPT = MAX( LWKOPT, N+INT( WORK( 1 ) ) ) + END IF + WORK( 1 ) = DCMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGEV3 ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'E' )*DLAMCH( 'B' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute the matrices A, B to isolate eigenvalues if possible +* + ILEFT = 1 + IRIGHT = N + 1 + IRWRK = IRIGHT + N + CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), RWORK( IRWRK ), IERR ) +* +* Reduce B to triangular form (QR decomposition of B) +* + IROWS = IHI + 1 - ILO + IF( ILV ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the orthogonal transformation to matrix A +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Initialize VR +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* + IF( ILV ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHD3( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, WORK( IWRK ), LWORK+1-IWRK, IERR ) + ELSE + CALL ZGGHD3( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur form and Schur vectors) +* + IWRK = ITAU + IF( ILV ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK( IRWRK ), IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 70 + END IF +* +* Compute Eigenvectors +* + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL, + $ VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ), + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 70 + END IF +* +* Undo balancing on VL and VR and normalization +* + IF( ILVL ) THEN + CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VL, LDVL, IERR ) + DO 30 JC = 1, N + TEMP = ZERO + DO 10 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 10 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 30 + TEMP = ONE / TEMP + DO 20 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 20 CONTINUE + 30 CONTINUE + END IF + IF( ILVR ) THEN + CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ), + $ RWORK( IRIGHT ), N, VR, LDVR, IERR ) + DO 60 JC = 1, N + TEMP = ZERO + DO 40 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 40 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 60 + TEMP = ONE / TEMP + DO 50 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* Undo scaling if necessary +* + 70 CONTINUE +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = DCMPLX( LWKOPT ) + RETURN +* +* End of ZGGEV3 +* + END diff --git a/dspl/liblapack/SRC/zggevx.f b/dspl/liblapack/SRC/zggevx.f new file mode 100644 index 0000000..5549feb --- /dev/null +++ b/dspl/liblapack/SRC/zggevx.f @@ -0,0 +1,804 @@ +*> \brief ZGGEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for GE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, +* ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, +* LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, +* WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER BALANC, JOBVL, JOBVR, SENSE +* INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N +* DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. +* LOGICAL BWORK( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION LSCALE( * ), RCONDE( * ), RCONDV( * ), +* $ RSCALE( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGEVX computes for a pair of N-by-N complex nonsymmetric matrices +*> (A,B) the generalized eigenvalues, and optionally, the left and/or +*> right generalized eigenvectors. +*> +*> Optionally, it also computes a balancing transformation to improve +*> the conditioning of the eigenvalues and eigenvectors (ILO, IHI, +*> LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for +*> the eigenvalues (RCONDE), and reciprocal condition numbers for the +*> right eigenvectors (RCONDV). +*> +*> A generalized eigenvalue for a pair of matrices (A,B) is a scalar +*> lambda or a ratio alpha/beta = lambda, such that A - lambda*B is +*> singular. It is usually represented as the pair (alpha,beta), as +*> there is a reasonable interpretation for beta=0, and even for both +*> being zero. +*> +*> The right eigenvector v(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> A * v(j) = lambda(j) * B * v(j) . +*> The left eigenvector u(j) corresponding to the eigenvalue lambda(j) +*> of (A,B) satisfies +*> u(j)**H * A = lambda(j) * u(j)**H * B. +*> where u(j)**H is the conjugate-transpose of u(j). +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] BALANC +*> \verbatim +*> BALANC is CHARACTER*1 +*> Specifies the balance option to be performed: +*> = 'N': do not diagonally scale or permute; +*> = 'P': permute only; +*> = 'S': scale only; +*> = 'B': both permute and scale. +*> Computed reciprocal condition numbers will be for the +*> matrices after permuting and/or balancing. Permuting does +*> not change condition numbers (in exact arithmetic), but +*> balancing does. +*> \endverbatim +*> +*> \param[in] JOBVL +*> \verbatim +*> JOBVL is CHARACTER*1 +*> = 'N': do not compute the left generalized eigenvectors; +*> = 'V': compute the left generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] JOBVR +*> \verbatim +*> JOBVR is CHARACTER*1 +*> = 'N': do not compute the right generalized eigenvectors; +*> = 'V': compute the right generalized eigenvectors. +*> \endverbatim +*> +*> \param[in] SENSE +*> \verbatim +*> SENSE is CHARACTER*1 +*> Determines which reciprocal condition numbers are computed. +*> = 'N': none are computed; +*> = 'E': computed for eigenvalues only; +*> = 'V': computed for eigenvectors only; +*> = 'B': computed for eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A, B, VL, and VR. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the matrix A in the pair (A,B). +*> On exit, A has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then A contains the first part of the complex Schur +*> form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the matrix B in the pair (A,B). +*> On exit, B has been overwritten. If JOBVL='V' or JOBVR='V' +*> or both, then B contains the second part of the complex +*> Schur form of the "balanced" versions of the input A and B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized +*> eigenvalues. +*> +*> Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or +*> underflow, and BETA(j) may even be zero. Thus, the user +*> should avoid naively computing the ratio ALPHA/BETA. +*> However, ALPHA will be always less than and usually +*> comparable with norm(A) in magnitude, and BETA always less +*> than and usually comparable with norm(B). +*> \endverbatim +*> +*> \param[out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,N) +*> If JOBVL = 'V', the left generalized eigenvectors u(j) are +*> stored one after another in the columns of VL, in the same +*> order as their eigenvalues. +*> Each eigenvector will be scaled so the largest component +*> will have abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVL = 'N'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the matrix VL. LDVL >= 1, and +*> if JOBVL = 'V', LDVL >= N. +*> \endverbatim +*> +*> \param[out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,N) +*> If JOBVR = 'V', the right generalized eigenvectors v(j) are +*> stored one after another in the columns of VR, in the same +*> order as their eigenvalues. +*> Each eigenvector will be scaled so the largest component +*> will have abs(real part) + abs(imag. part) = 1. +*> Not referenced if JOBVR = 'N'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the matrix VR. LDVR >= 1, and +*> if JOBVR = 'V', LDVR >= N. +*> \endverbatim +*> +*> \param[out] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[out] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI are integer values such that on exit +*> A(i,j) = 0 and B(i,j) = 0 if i > j and +*> j = 1,...,ILO-1 or i = IHI+1,...,N. +*> If BALANC = 'N' or 'S', ILO = 1 and IHI = N. +*> \endverbatim +*> +*> \param[out] LSCALE +*> \verbatim +*> LSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the left side of A and B. If PL(j) is the index of the +*> row interchanged with row j, and DL(j) is the scaling +*> factor applied to row j, then +*> LSCALE(j) = PL(j) for j = 1,...,ILO-1 +*> = DL(j) for j = ILO,...,IHI +*> = PL(j) for j = IHI+1,...,N. +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] RSCALE +*> \verbatim +*> RSCALE is DOUBLE PRECISION array, dimension (N) +*> Details of the permutations and scaling factors applied +*> to the right side of A and B. If PR(j) is the index of the +*> column interchanged with column j, and DR(j) is the scaling +*> factor applied to column j, then +*> RSCALE(j) = PR(j) for j = 1,...,ILO-1 +*> = DR(j) for j = ILO,...,IHI +*> = PR(j) for j = IHI+1,...,N +*> The order in which the interchanges are made is N to IHI+1, +*> then 1 to ILO-1. +*> \endverbatim +*> +*> \param[out] ABNRM +*> \verbatim +*> ABNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix A. +*> \endverbatim +*> +*> \param[out] BBNRM +*> \verbatim +*> BBNRM is DOUBLE PRECISION +*> The one-norm of the balanced matrix B. +*> \endverbatim +*> +*> \param[out] RCONDE +*> \verbatim +*> RCONDE is DOUBLE PRECISION array, dimension (N) +*> If SENSE = 'E' or 'B', the reciprocal condition numbers of +*> the eigenvalues, stored in consecutive elements of the array. +*> If SENSE = 'N' or 'V', RCONDE is not referenced. +*> \endverbatim +*> +*> \param[out] RCONDV +*> \verbatim +*> RCONDV is DOUBLE PRECISION array, dimension (N) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the eigenvectors, stored in consecutive elements +*> of the array. If the eigenvalues cannot be reordered to +*> compute RCONDV(j), RCONDV(j) is set to 0; this can only occur +*> when the true value would be very small anyway. +*> If SENSE = 'N' or 'E', RCONDV is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,2*N). +*> If SENSE = 'E', LWORK >= max(1,4*N). +*> If SENSE = 'V' or 'B', LWORK >= max(1,2*N*N+2*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (lrwork) +*> lrwork must be at least max(1,6*N) if BALANC = 'S' or 'B', +*> and at least max(1,2*N) otherwise. +*> Real workspace. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+2) +*> If SENSE = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] BWORK +*> \verbatim +*> BWORK is LOGICAL array, dimension (N) +*> If SENSE = 'N', BWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1,...,N: +*> The QZ iteration failed. No eigenvectors have been +*> calculated, but ALPHA(j) and BETA(j) should be correct +*> for j=INFO+1,...,N. +*> > N: =N+1: other than QZ iteration failed in ZHGEQZ. +*> =N+2: error return from ZTGEVC. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Balancing a matrix pair (A,B) includes, first, permuting rows and +*> columns to isolate eigenvalues, second, applying diagonal similarity +*> transformation to the rows and columns to make the rows and columns +*> as close in norm as possible. The computed reciprocal condition +*> numbers correspond to the balanced matrix. Permuting rows and columns +*> will not change the condition numbers (in exact arithmetic) but +*> diagonal scaling will. For further explanation of balancing, see +*> section 4.11.1.2 of LAPACK Users' Guide. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I) +*> +*> An approximate error bound for the angle between the i-th computed +*> eigenvector VL(i) or VR(i) is given by +*> +*> EPS * norm(ABNRM, BBNRM) / DIF(i). +*> +*> For further explanation of the reciprocal condition numbers RCONDE +*> and RCONDV, see section 4.11 of LAPACK User's Guide. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI, + $ LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV, + $ WORK, LWORK, RWORK, IWORK, BWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER BALANC, JOBVL, JOBVR, SENSE + INTEGER IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N + DOUBLE PRECISION ABNRM, BBNRM +* .. +* .. Array Arguments .. + LOGICAL BWORK( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION LSCALE( * ), RCONDE( * ), RCONDV( * ), + $ RSCALE( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, NOSCL, + $ WANTSB, WANTSE, WANTSN, WANTSV + CHARACTER CHTEMP + INTEGER I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS, + $ ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK + DOUBLE PRECISION ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, + $ SMLNUM, TEMP + COMPLEX*16 X +* .. +* .. Local Arrays .. + LOGICAL LDUMMA( 1 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, DLASCL, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, + $ ZGGHRD, ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGEVC, + $ ZTGSNA, ZUNGQR, ZUNMQR +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANGE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode the input arguments +* + IF( LSAME( JOBVL, 'N' ) ) THEN + IJOBVL = 1 + ILVL = .FALSE. + ELSE IF( LSAME( JOBVL, 'V' ) ) THEN + IJOBVL = 2 + ILVL = .TRUE. + ELSE + IJOBVL = -1 + ILVL = .FALSE. + END IF +* + IF( LSAME( JOBVR, 'N' ) ) THEN + IJOBVR = 1 + ILVR = .FALSE. + ELSE IF( LSAME( JOBVR, 'V' ) ) THEN + IJOBVR = 2 + ILVR = .TRUE. + ELSE + IJOBVR = -1 + ILVR = .FALSE. + END IF + ILV = ILVL .OR. ILVR +* + NOSCL = LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'P' ) + WANTSN = LSAME( SENSE, 'N' ) + WANTSE = LSAME( SENSE, 'E' ) + WANTSV = LSAME( SENSE, 'V' ) + WANTSB = LSAME( SENSE, 'B' ) +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.( NOSCL .OR. LSAME( BALANC,'S' ) .OR. + $ LSAME( BALANC, 'B' ) ) ) THEN + INFO = -1 + ELSE IF( IJOBVL.LE.0 ) THEN + INFO = -2 + ELSE IF( IJOBVR.LE.0 ) THEN + INFO = -3 + ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) ) + $ THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN + INFO = -15 + END IF +* +* Compute workspace +* (Note: Comments in the code beginning "Workspace:" describe the +* minimal amount of workspace needed at that point in the code, +* as well as the preferred amount for good performance. +* NB refers to the optimal block size for the immediately +* following subroutine, as returned by ILAENV. The workspace is +* computed assuming ILO = 1 and IHI = N, the worst case.) +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + MINWRK = 1 + MAXWRK = 1 + ELSE + MINWRK = 2*N + IF( WANTSE ) THEN + MINWRK = 4*N + ELSE IF( WANTSV .OR. WANTSB ) THEN + MINWRK = 2*N*( N + 1) + END IF + MAXWRK = MINWRK + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 ) ) + MAXWRK = MAX( MAXWRK, + $ N + N*ILAENV( 1, 'ZUNMQR', ' ', N, 1, N, 0 ) ) + IF( ILVL ) THEN + MAXWRK = MAX( MAXWRK, N + + $ N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N, 0 ) ) + END IF + END IF + WORK( 1 ) = MAXWRK +* + IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN + INFO = -25 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SQRT( SMLNUM ) / EPS + BIGNUM = ONE / SMLNUM +* +* Scale A if max element outside range [SMLNUM,BIGNUM] +* + ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK ) + ILASCL = .FALSE. + IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN + ANRMTO = SMLNUM + ILASCL = .TRUE. + ELSE IF( ANRM.GT.BIGNUM ) THEN + ANRMTO = BIGNUM + ILASCL = .TRUE. + END IF + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR ) +* +* Scale B if max element outside range [SMLNUM,BIGNUM] +* + BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK ) + ILBSCL = .FALSE. + IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN + BNRMTO = SMLNUM + ILBSCL = .TRUE. + ELSE IF( BNRM.GT.BIGNUM ) THEN + BNRMTO = BIGNUM + ILBSCL = .TRUE. + END IF + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR ) +* +* Permute and/or balance the matrix pair (A,B) +* (Real Workspace: need 6*N if BALANC = 'S' or 'B', 1 otherwise) +* + CALL ZGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE, + $ RWORK, IERR ) +* +* Compute ABNRM and BBNRM +* + ABNRM = ZLANGE( '1', N, N, A, LDA, RWORK( 1 ) ) + IF( ILASCL ) THEN + RWORK( 1 ) = ABNRM + CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + ABNRM = RWORK( 1 ) + END IF +* + BBNRM = ZLANGE( '1', N, N, B, LDB, RWORK( 1 ) ) + IF( ILBSCL ) THEN + RWORK( 1 ) = BBNRM + CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1, + $ IERR ) + BBNRM = RWORK( 1 ) + END IF +* +* Reduce B to triangular form (QR decomposition of B) +* (Complex Workspace: need N, prefer N*NB ) +* + IROWS = IHI + 1 - ILO + IF( ILV .OR. .NOT.WANTSN ) THEN + ICOLS = N + 1 - ILO + ELSE + ICOLS = IROWS + END IF + ITAU = 1 + IWRK = ITAU + IROWS + CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ), + $ WORK( IWRK ), LWORK+1-IWRK, IERR ) +* +* Apply the unitary transformation to A +* (Complex Workspace: need N, prefer N*NB) +* + CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB, + $ WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ), + $ LWORK+1-IWRK, IERR ) +* +* Initialize VL and/or VR +* (Workspace: need N, prefer N*NB) +* + IF( ILVL ) THEN + CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL ) + IF( IROWS.GT.1 ) THEN + CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB, + $ VL( ILO+1, ILO ), LDVL ) + END IF + CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL, + $ WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR ) + END IF +* + IF( ILVR ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR ) +* +* Reduce to generalized Hessenberg form +* (Workspace: none needed) +* + IF( ILV .OR. .NOT.WANTSN ) THEN +* +* Eigenvectors requested -- work on whole matrix. +* + CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, IERR ) + ELSE + CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA, + $ B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR ) + END IF +* +* Perform QZ algorithm (Compute eigenvalues, and optionally, the +* Schur forms and Schur vectors) +* (Complex Workspace: need N) +* (Real Workspace: need N) +* + IWRK = ITAU + IF( ILV .OR. .NOT.WANTSN ) THEN + CHTEMP = 'S' + ELSE + CHTEMP = 'E' + END IF +* + CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, + $ ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ), + $ LWORK+1-IWRK, RWORK, IERR ) + IF( IERR.NE.0 ) THEN + IF( IERR.GT.0 .AND. IERR.LE.N ) THEN + INFO = IERR + ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN + INFO = IERR - N + ELSE + INFO = N + 1 + END IF + GO TO 90 + END IF +* +* Compute Eigenvectors and estimate condition numbers if desired +* ZTGEVC: (Complex Workspace: need 2*N ) +* (Real Workspace: need 2*N ) +* ZTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B') +* (Integer Workspace: need N+2 ) +* + IF( ILV .OR. .NOT.WANTSN ) THEN + IF( ILV ) THEN + IF( ILVL ) THEN + IF( ILVR ) THEN + CHTEMP = 'B' + ELSE + CHTEMP = 'L' + END IF + ELSE + CHTEMP = 'R' + END IF +* + CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK, + $ IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + IF( .NOT.WANTSN ) THEN +* +* compute eigenvectors (DTGEVC) and estimate condition +* numbers (DTGSNA). Note that the definition of the condition +* number is not invariant under transformation (u,v) to +* (Q*u, Z*v), where (u,v) are eigenvectors of the generalized +* Schur form (S,T), Q and Z are orthogonal matrices. In order +* to avoid using extra 2*N*N workspace, we have to +* re-calculate eigenvectors and estimate the condition numbers +* one at a time. +* + DO 20 I = 1, N +* + DO 10 J = 1, N + BWORK( J ) = .FALSE. + 10 CONTINUE + BWORK( I ) = .TRUE. +* + IWRK = N + 1 + IWRK1 = IWRK + N +* + IF( WANTSE .OR. WANTSB ) THEN + CALL ZTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, 1, M, + $ WORK( IWRK1 ), RWORK, IERR ) + IF( IERR.NE.0 ) THEN + INFO = N + 2 + GO TO 90 + END IF + END IF +* + CALL ZTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB, + $ WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ), + $ RCONDV( I ), 1, M, WORK( IWRK1 ), + $ LWORK-IWRK1+1, IWORK, IERR ) +* + 20 CONTINUE + END IF + END IF +* +* Undo balancing on VL and VR and normalization +* (Workspace: none needed) +* + IF( ILVL ) THEN + CALL ZGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL, + $ LDVL, IERR ) +* + DO 50 JC = 1, N + TEMP = ZERO + DO 30 JR = 1, N + TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) ) + 30 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 50 + TEMP = ONE / TEMP + DO 40 JR = 1, N + VL( JR, JC ) = VL( JR, JC )*TEMP + 40 CONTINUE + 50 CONTINUE + END IF +* + IF( ILVR ) THEN + CALL ZGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR, + $ LDVR, IERR ) + DO 80 JC = 1, N + TEMP = ZERO + DO 60 JR = 1, N + TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) ) + 60 CONTINUE + IF( TEMP.LT.SMLNUM ) + $ GO TO 80 + TEMP = ONE / TEMP + DO 70 JR = 1, N + VR( JR, JC ) = VR( JR, JC )*TEMP + 70 CONTINUE + 80 CONTINUE + END IF +* +* Undo scaling if necessary +* + 90 CONTINUE +* + IF( ILASCL ) + $ CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR ) +* + IF( ILBSCL ) + $ CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR ) +* + WORK( 1 ) = MAXWRK + RETURN +* +* End of ZGGEVX +* + END diff --git a/dspl/liblapack/SRC/zggglm.f b/dspl/liblapack/SRC/zggglm.f new file mode 100644 index 0000000..d6a30ce --- /dev/null +++ b/dspl/liblapack/SRC/zggglm.f @@ -0,0 +1,349 @@ +*> \brief \b ZGGGLM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGGLM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), +* $ X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: +*> +*> minimize || y ||_2 subject to d = A*x + B*y +*> x +*> +*> where A is an N-by-M matrix, B is an N-by-P matrix, and d is a +*> given N-vector. It is assumed that M <= N <= M+P, and +*> +*> rank(A) = M and rank( A B ) = N. +*> +*> Under these assumptions, the constrained equation is always +*> consistent, and there is a unique solution x and a minimal 2-norm +*> solution y, which is obtained using a generalized QR factorization +*> of the matrices (A, B) given by +*> +*> A = Q*(R), B = Q*T*Z. +*> (0) +*> +*> In particular, if matrix B is square nonsingular, then the problem +*> GLM is equivalent to the following weighted linear least squares +*> problem +*> +*> minimize || inv(B)*(d-A*x) ||_2 +*> x +*> +*> where inv(B) denotes the inverse of B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= N-M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the upper triangular part of the array A contains +*> the M-by-M upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> On entry, D is the left hand side of the GLM equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (P) +*> +*> On exit, X and Y are the solutions of the GLM problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N+M+P). +*> For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> ZGEQRF, ZGERQF, ZUNMQR and ZUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with A in the +*> generalized QR factorization of the pair (A, B) is +*> singular, so that rank(A) < M; the least squares +*> solution could not be computed. +*> = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal +*> factor T associated with B in the generalized QR +*> factorization of the pair (A, B) is singular, so that +*> rank( A B ) < N; the least squares solution could not +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), D( * ), WORK( * ), + $ X( * ), Y( * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3, + $ NB4, NP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEMV, ZGGQRF, ZTRTRS, ZUNMQR, + $ ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NP = MIN( N, P ) + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', N, M, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = M + NP + MAX( N, P )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGGLM', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GQR factorization of matrices A and B: +* +* Q**H*A = ( R11 ) M, Q**H*B*Z**H = ( T11 T12 ) M +* ( 0 ) N-M ( 0 T22 ) N-M +* M M+P-N N-M +* +* where R11 and T22 are upper triangular, and Q and Z are +* unitary. +* + CALL ZGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ), + $ WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = WORK( M+NP+1 ) +* +* Update left-hand-side vector d = Q**H*d = ( d1 ) M +* ( d2 ) N-M +* + CALL ZUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK, + $ D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* +* Solve T22*y2 = d2 for y2 +* + IF( N.GT.M ) THEN + CALL ZTRTRS( 'Upper', 'No transpose', 'Non unit', N-M, 1, + $ B( M+1, M+P-N+1 ), LDB, D( M+1 ), N-M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* + CALL ZCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 ) + END IF +* +* Set y1 = 0 +* + DO 10 I = 1, M + P - N + Y( I ) = CZERO + 10 CONTINUE +* +* Update d1 = d1 - T12*y2 +* + CALL ZGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB, + $ Y( M+P-N+1 ), 1, CONE, D, 1 ) +* +* Solve triangular system: R11*x = d1 +* + IF( M.GT.0 ) THEN + CALL ZTRTRS( 'Upper', 'No Transpose', 'Non unit', M, 1, A, LDA, + $ D, M, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Copy D to X +* + CALL ZCOPY( M, D, 1, X, 1 ) + END IF +* +* Backward transformation y = Z**H *y +* + CALL ZUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP, + $ B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y, + $ MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO ) + WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) ) +* + RETURN +* +* End of ZGGGLM +* + END diff --git a/dspl/liblapack/SRC/zgghd3.f b/dspl/liblapack/SRC/zgghd3.f new file mode 100644 index 0000000..4b0e782 --- /dev/null +++ b/dspl/liblapack/SRC/zgghd3.f @@ -0,0 +1,895 @@ +*> \brief \b ZGGHD3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGHD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGHD3 reduces a pair of complex matrices (A,B) to generalized upper +*> Hessenberg form using unitary transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the unitary matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**H*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**H*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**H*x. +*> +*> The unitary matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H +*> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H +*> If Q1 is the unitary matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then ZGGHD3 reduces the original +*> problem to generalized Hessenberg form. +*> +*> This is a blocked variant of CGGHRD, using matrix-matrix +*> multiplications for parts of the computation to enhance performance. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> unitary matrix Z is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to ZGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**H B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1, typically +*> from the QR factorization of B. +*> On exit, if COMPQ='I', the unitary matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1. +*> On exit, if COMPZ='I', the unitary matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= 6*N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg form and maintains B in +*> using a blocked variant of Moler and Stewart's original algorithm, +*> as described by Kagstrom, Kressner, Quintana-Orti, and Quintana-Orti +*> (BIT 2008). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGHD3( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N, LWORK +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL BLK22, INITQ, INITZ, LQUERY, WANTQ, WANTZ + CHARACTER*1 COMPQ2, COMPZ2 + INTEGER COLA, I, IERR, J, J0, JCOL, JJ, JROW, K, + $ KACC22, LEN, LWKOPT, N2NB, NB, NBLST, NBMIN, + $ NH, NNB, NX, PPW, PPWO, PW, TOP, TOPQ + DOUBLE PRECISION C + COMPLEX*16 C1, C2, CTEMP, S, S1, S2, TEMP, TEMP1, TEMP2, + $ TEMP3 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGGHRD, ZLARTG, ZLASET, ZUNM22, ZROT, ZGEMM, + $ ZGEMV, ZTRMV, ZLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + NB = ILAENV( 1, 'ZGGHD3', ' ', N, ILO, IHI, -1 ) + LWKOPT = MAX( 6*N*NB, 1 ) + WORK( 1 ) = DCMPLX( LWKOPT ) + INITQ = LSAME( COMPQ, 'I' ) + WANTQ = INITQ .OR. LSAME( COMPQ, 'V' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( WANTQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( WANTZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGHD3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( INITQ ) + $ CALL ZLASET( 'All', N, N, CZERO, CONE, Q, LDQ ) + IF( INITZ ) + $ CALL ZLASET( 'All', N, N, CZERO, CONE, Z, LDZ ) +* +* Zero out lower triangle of B. +* + IF( N.GT.1 ) + $ CALL ZLASET( 'Lower', N-1, N-1, CZERO, CZERO, B(2, 1), LDB ) +* +* Quick return if possible +* + NH = IHI - ILO + 1 + IF( NH.LE.1 ) THEN + WORK( 1 ) = CONE + RETURN + END IF +* +* Determine the blocksize. +* + NBMIN = ILAENV( 2, 'ZGGHD3', ' ', N, ILO, IHI, -1 ) + IF( NB.GT.1 .AND. NB.LT.NH ) THEN +* +* Determine when to use unblocked instead of blocked code. +* + NX = MAX( NB, ILAENV( 3, 'ZGGHD3', ' ', N, ILO, IHI, -1 ) ) + IF( NX.LT.NH ) THEN +* +* Determine if workspace is large enough for blocked code. +* + IF( LWORK.LT.LWKOPT ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code. +* + NBMIN = MAX( 2, ILAENV( 2, 'ZGGHD3', ' ', N, ILO, IHI, + $ -1 ) ) + IF( LWORK.GE.6*N*NBMIN ) THEN + NB = LWORK / ( 6*N ) + ELSE + NB = 1 + END IF + END IF + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN +* +* Use unblocked code below +* + JCOL = ILO +* + ELSE +* +* Use blocked code +* + KACC22 = ILAENV( 16, 'ZGGHD3', ' ', N, ILO, IHI, -1 ) + BLK22 = KACC22.EQ.2 + DO JCOL = ILO, IHI-2, NB + NNB = MIN( NB, IHI-JCOL-1 ) +* +* Initialize small unitary factors that will hold the +* accumulated Givens rotations in workspace. +* N2NB denotes the number of 2*NNB-by-2*NNB factors +* NBLST denotes the (possibly smaller) order of the last +* factor. +* + N2NB = ( IHI-JCOL-1 ) / NNB - 1 + NBLST = IHI - JCOL - N2NB*NNB + CALL ZLASET( 'All', NBLST, NBLST, CZERO, CONE, WORK, NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL ZLASET( 'All', 2*NNB, 2*NNB, CZERO, CONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Reduce columns JCOL:JCOL+NNB-1 of A to Hessenberg form. +* + DO J = JCOL, JCOL+NNB-1 +* +* Reduce Jth column of A. Store cosines and sines in Jth +* column of A and B, respectively. +* + DO I = IHI, J+2, -1 + TEMP = A( I-1, J ) + CALL ZLARTG( TEMP, A( I, J ), C, S, A( I-1, J ) ) + A( I, J ) = DCMPLX( C ) + B( I, J ) = S + END DO +* +* Accumulate Givens rotations into workspace array. +* + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + CTEMP = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = CTEMP*TEMP - S*WORK( JJ ) + WORK( JJ ) = DCONJG( S )*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + CTEMP = A( I, J ) + S = B( I, J ) + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = CTEMP*TEMP - S*WORK( JJ ) + WORK( JJ ) = DCONJG( S )*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO +* +* TOP denotes the number of top rows in A and B that will +* not be updated during the next steps. +* + IF( JCOL.LE.2 ) THEN + TOP = 0 + ELSE + TOP = JCOL + END IF +* +* Propagate transformations through B and replace stored +* left sines/cosines by right sines/cosines. +* + DO JJ = N, J+1, -1 +* +* Update JJth column of B. +* + DO I = MIN( JJ+1, IHI ), J+2, -1 + CTEMP = A( I, J ) + S = B( I, J ) + TEMP = B( I, JJ ) + B( I, JJ ) = CTEMP*TEMP - DCONJG( S )*B( I-1, JJ ) + B( I-1, JJ ) = S*TEMP + CTEMP*B( I-1, JJ ) + END DO +* +* Annihilate B( JJ+1, JJ ). +* + IF( JJ.LT.IHI ) THEN + TEMP = B( JJ+1, JJ+1 ) + CALL ZLARTG( TEMP, B( JJ+1, JJ ), C, S, + $ B( JJ+1, JJ+1 ) ) + B( JJ+1, JJ ) = CZERO + CALL ZROT( JJ-TOP, B( TOP+1, JJ+1 ), 1, + $ B( TOP+1, JJ ), 1, C, S ) + A( JJ+1, J ) = DCMPLX( C ) + B( JJ+1, J ) = -DCONJG( S ) + END IF + END DO +* +* Update A by transformations from right. +* + JJ = MOD( IHI-J-1, 3 ) + DO I = IHI-J-3, JJ+1, -3 + CTEMP = A( J+1+I, J ) + S = -B( J+1+I, J ) + C1 = A( J+2+I, J ) + S1 = -B( J+2+I, J ) + C2 = A( J+3+I, J ) + S2 = -B( J+3+I, J ) +* + DO K = TOP+1, IHI + TEMP = A( K, J+I ) + TEMP1 = A( K, J+I+1 ) + TEMP2 = A( K, J+I+2 ) + TEMP3 = A( K, J+I+3 ) + A( K, J+I+3 ) = C2*TEMP3 + DCONJG( S2 )*TEMP2 + TEMP2 = -S2*TEMP3 + C2*TEMP2 + A( K, J+I+2 ) = C1*TEMP2 + DCONJG( S1 )*TEMP1 + TEMP1 = -S1*TEMP2 + C1*TEMP1 + A( K, J+I+1 ) = CTEMP*TEMP1 + DCONJG( S )*TEMP + A( K, J+I ) = -S*TEMP1 + CTEMP*TEMP + END DO + END DO +* + IF( JJ.GT.0 ) THEN + DO I = JJ, 1, -1 + C = DBLE( A( J+1+I, J ) ) + CALL ZROT( IHI-TOP, A( TOP+1, J+I+1 ), 1, + $ A( TOP+1, J+I ), 1, C, + $ -DCONJG( B( J+1+I, J ) ) ) + END DO + END IF +* +* Update (J+1)th column of A by transformations from left. +* + IF ( J .LT. JCOL + NNB - 1 ) THEN + LEN = 1 + J - JCOL +* +* Multiply with the trailing accumulated unitary +* matrix, which takes the form +* +* [ U11 U12 ] +* U = [ ], +* [ U21 U22 ] +* +* where U21 is a LEN-by-LEN matrix and U12 is lower +* triangular. +* + JROW = IHI - NBLST + 1 + CALL ZGEMV( 'Conjugate', NBLST, LEN, CONE, WORK, + $ NBLST, A( JROW, J+1 ), 1, CZERO, + $ WORK( PW ), 1 ) + PPW = PW + LEN + DO I = JROW, JROW+NBLST-LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL ZTRMV( 'Lower', 'Conjugate', 'Non-unit', + $ NBLST-LEN, WORK( LEN*NBLST + 1 ), NBLST, + $ WORK( PW+LEN ), 1 ) + CALL ZGEMV( 'Conjugate', LEN, NBLST-LEN, CONE, + $ WORK( (LEN+1)*NBLST - LEN + 1 ), NBLST, + $ A( JROW+NBLST-LEN, J+1 ), 1, CONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+NBLST-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO +* +* Multiply with the other accumulated unitary +* matrices, which take the form +* +* [ U11 U12 0 ] +* [ ] +* U = [ U21 U22 0 ], +* [ ] +* [ 0 0 I ] +* +* where I denotes the (NNB-LEN)-by-(NNB-LEN) identity +* matrix, U21 is a LEN-by-LEN upper triangular matrix +* and U12 is an NNB-by-NNB lower triangular matrix. +* + PPWO = 1 + NBLST*NBLST + J0 = JROW - NNB + DO JROW = J0, JCOL+1, -NNB + PPW = PW + LEN + DO I = JROW, JROW+NNB-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + PPW = PW + DO I = JROW+NNB, JROW+NNB+LEN-1 + WORK( PPW ) = A( I, J+1 ) + PPW = PPW + 1 + END DO + CALL ZTRMV( 'Upper', 'Conjugate', 'Non-unit', LEN, + $ WORK( PPWO + NNB ), 2*NNB, WORK( PW ), + $ 1 ) + CALL ZTRMV( 'Lower', 'Conjugate', 'Non-unit', NNB, + $ WORK( PPWO + 2*LEN*NNB ), + $ 2*NNB, WORK( PW + LEN ), 1 ) + CALL ZGEMV( 'Conjugate', NNB, LEN, CONE, + $ WORK( PPWO ), 2*NNB, A( JROW, J+1 ), 1, + $ CONE, WORK( PW ), 1 ) + CALL ZGEMV( 'Conjugate', LEN, NNB, CONE, + $ WORK( PPWO + 2*LEN*NNB + NNB ), 2*NNB, + $ A( JROW+NNB, J+1 ), 1, CONE, + $ WORK( PW+LEN ), 1 ) + PPW = PW + DO I = JROW, JROW+LEN+NNB-1 + A( I, J+1 ) = WORK( PPW ) + PPW = PPW + 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO +* +* Apply accumulated unitary matrices to A. +* + COLA = N - JCOL - NNB + 1 + J = IHI - NBLST + 1 + CALL ZGEMM( 'Conjugate', 'No Transpose', NBLST, + $ COLA, NBLST, CONE, WORK, NBLST, + $ A( J, JCOL+NNB ), LDA, CZERO, WORK( PW ), + $ NBLST ) + CALL ZLACPY( 'All', NBLST, COLA, WORK( PW ), NBLST, + $ A( J, JCOL+NNB ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of +* +* [ U11 U12 ] +* U = [ ] +* [ U21 U22 ], +* +* where all blocks are NNB-by-NNB, U21 is upper +* triangular and U12 is lower triangular. +* + CALL ZUNM22( 'Left', 'Conjugate', 2*NNB, COLA, NNB, + $ NNB, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL ZGEMM( 'Conjugate', 'No Transpose', 2*NNB, + $ COLA, 2*NNB, CONE, WORK( PPWO ), 2*NNB, + $ A( J, JCOL+NNB ), LDA, CZERO, WORK( PW ), + $ 2*NNB ) + CALL ZLACPY( 'All', 2*NNB, COLA, WORK( PW ), 2*NNB, + $ A( J, JCOL+NNB ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* +* Apply accumulated unitary matrices to Q. +* + IF( WANTQ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL ZGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, CONE, Q( TOPQ, J ), LDQ, + $ WORK, NBLST, CZERO, WORK( PW ), NH ) + CALL ZLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL ZUNM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Q( TOPQ, J ), LDQ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, CONE, Q( TOPQ, J ), LDQ, + $ WORK( PPWO ), 2*NNB, CZERO, WORK( PW ), + $ NH ) + CALL ZLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Q( TOPQ, J ), LDQ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Accumulate right Givens rotations if required. +* + IF ( WANTZ .OR. TOP.GT.0 ) THEN +* +* Initialize small unitary factors that will hold the +* accumulated Givens rotations in workspace. +* + CALL ZLASET( 'All', NBLST, NBLST, CZERO, CONE, WORK, + $ NBLST ) + PW = NBLST * NBLST + 1 + DO I = 1, N2NB + CALL ZLASET( 'All', 2*NNB, 2*NNB, CZERO, CONE, + $ WORK( PW ), 2*NNB ) + PW = PW + 4*NNB*NNB + END DO +* +* Accumulate Givens rotations into workspace array. +* + DO J = JCOL, JCOL+NNB-1 + PPW = ( NBLST + 1 )*( NBLST - 2 ) - J + JCOL + 1 + LEN = 2 + J - JCOL + JROW = J + N2NB*NNB + 2 + DO I = IHI, JROW, -1 + CTEMP = A( I, J ) + A( I, J ) = CZERO + S = B( I, J ) + B( I, J ) = CZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + NBLST ) + WORK( JJ + NBLST ) = CTEMP*TEMP - + $ DCONJG( S )*WORK( JJ ) + WORK( JJ ) = S*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - NBLST - 1 + END DO +* + PPWO = NBLST*NBLST + ( NNB+J-JCOL-1 )*2*NNB + NNB + J0 = JROW - NNB + DO JROW = J0, J+2, -NNB + PPW = PPWO + LEN = 2 + J - JCOL + DO I = JROW+NNB-1, JROW, -1 + CTEMP = A( I, J ) + A( I, J ) = CZERO + S = B( I, J ) + B( I, J ) = CZERO + DO JJ = PPW, PPW+LEN-1 + TEMP = WORK( JJ + 2*NNB ) + WORK( JJ + 2*NNB ) = CTEMP*TEMP - + $ DCONJG( S )*WORK( JJ ) + WORK( JJ ) = S*TEMP + CTEMP*WORK( JJ ) + END DO + LEN = LEN + 1 + PPW = PPW - 2*NNB - 1 + END DO + PPWO = PPWO + 4*NNB*NNB + END DO + END DO + ELSE +* + CALL ZLASET( 'Lower', IHI - JCOL - 1, NNB, CZERO, CZERO, + $ A( JCOL + 2, JCOL ), LDA ) + CALL ZLASET( 'Lower', IHI - JCOL - 1, NNB, CZERO, CZERO, + $ B( JCOL + 2, JCOL ), LDB ) + END IF +* +* Apply accumulated unitary matrices to A and B. +* + IF ( TOP.GT.0 ) THEN + J = IHI - NBLST + 1 + CALL ZGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, CONE, A( 1, J ), LDA, + $ WORK, NBLST, CZERO, WORK( PW ), TOP ) + CALL ZLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL ZUNM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ A( 1, J ), LDA, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, CONE, A( 1, J ), LDA, + $ WORK( PPWO ), 2*NNB, CZERO, + $ WORK( PW ), TOP ) + CALL ZLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ A( 1, J ), LDA ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO +* + J = IHI - NBLST + 1 + CALL ZGEMM( 'No Transpose', 'No Transpose', TOP, + $ NBLST, NBLST, CONE, B( 1, J ), LDB, + $ WORK, NBLST, CZERO, WORK( PW ), TOP ) + CALL ZLACPY( 'All', TOP, NBLST, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL ZUNM22( 'Right', 'No Transpose', TOP, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ B( 1, J ), LDB, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', TOP, + $ 2*NNB, 2*NNB, CONE, B( 1, J ), LDB, + $ WORK( PPWO ), 2*NNB, CZERO, + $ WORK( PW ), TOP ) + CALL ZLACPY( 'All', TOP, 2*NNB, WORK( PW ), TOP, + $ B( 1, J ), LDB ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF +* +* Apply accumulated unitary matrices to Z. +* + IF( WANTZ ) THEN + J = IHI - NBLST + 1 + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + ELSE + TOPQ = 1 + NH = N + END IF + CALL ZGEMM( 'No Transpose', 'No Transpose', NH, + $ NBLST, NBLST, CONE, Z( TOPQ, J ), LDZ, + $ WORK, NBLST, CZERO, WORK( PW ), NH ) + CALL ZLACPY( 'All', NH, NBLST, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + PPWO = NBLST*NBLST + 1 + J0 = J - NNB + DO J = J0, JCOL+1, -NNB + IF ( INITQ ) THEN + TOPQ = MAX( 2, J - JCOL + 1 ) + NH = IHI - TOPQ + 1 + END IF + IF ( BLK22 ) THEN +* +* Exploit the structure of U. +* + CALL ZUNM22( 'Right', 'No Transpose', NH, 2*NNB, + $ NNB, NNB, WORK( PPWO ), 2*NNB, + $ Z( TOPQ, J ), LDZ, WORK( PW ), + $ LWORK-PW+1, IERR ) + ELSE +* +* Ignore the structure of U. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', NH, + $ 2*NNB, 2*NNB, CONE, Z( TOPQ, J ), LDZ, + $ WORK( PPWO ), 2*NNB, CZERO, WORK( PW ), + $ NH ) + CALL ZLACPY( 'All', NH, 2*NNB, WORK( PW ), NH, + $ Z( TOPQ, J ), LDZ ) + END IF + PPWO = PPWO + 4*NNB*NNB + END DO + END IF + END DO + END IF +* +* Use unblocked code to reduce the rest of the matrix +* Avoid re-initialization of modified Q and Z. +* + COMPQ2 = COMPQ + COMPZ2 = COMPZ + IF ( JCOL.NE.ILO ) THEN + IF ( WANTQ ) + $ COMPQ2 = 'V' + IF ( WANTZ ) + $ COMPZ2 = 'V' + END IF +* + IF ( JCOL.LT.IHI ) + $ CALL ZGGHRD( COMPQ2, COMPZ2, N, JCOL, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, IERR ) + WORK( 1 ) = DCMPLX( LWKOPT ) +* + RETURN +* +* End of ZGGHD3 +* + END diff --git a/dspl/liblapack/SRC/zgghrd.f b/dspl/liblapack/SRC/zgghrd.f new file mode 100644 index 0000000..e9e8703 --- /dev/null +++ b/dspl/liblapack/SRC/zgghrd.f @@ -0,0 +1,361 @@ +*> \brief \b ZGGHRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGHRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, +* LDQ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ +* INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper +*> Hessenberg form using unitary transformations, where A is a +*> general matrix and B is upper triangular. The form of the +*> generalized eigenvalue problem is +*> A*x = lambda*B*x, +*> and B is typically made upper triangular by computing its QR +*> factorization and moving the unitary matrix Q to the left side +*> of the equation. +*> +*> This subroutine simultaneously reduces A to a Hessenberg matrix H: +*> Q**H*A*Z = H +*> and transforms B to another upper triangular matrix T: +*> Q**H*B*Z = T +*> in order to reduce the problem to its standard form +*> H*y = lambda*T*y +*> where y = Z**H*x. +*> +*> The unitary matrices Q and Z are determined as products of Givens +*> rotations. They may either be formed explicitly, or they may be +*> postmultiplied into input matrices Q1 and Z1, so that +*> Q1 * A * Z1**H = (Q1*Q) * H * (Z1*Z)**H +*> Q1 * B * Z1**H = (Q1*Q) * T * (Z1*Z)**H +*> If Q1 is the unitary matrix from the QR factorization of B in the +*> original equation A*x = lambda*B*x, then ZGGHRD reduces the original +*> problem to generalized Hessenberg form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': do not compute Q; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry, +*> and the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': do not compute Z; +*> = 'I': Z is initialized to the unit matrix, and the +*> unitary matrix Z is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry, +*> and the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI mark the rows and columns of A which are to be +*> reduced. It is assumed that A is already upper triangular +*> in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are +*> normally set by a previous call to ZGGBAL; otherwise they +*> should be set to 1 and N respectively. +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the N-by-N general matrix to be reduced. +*> On exit, the upper triangle and the first subdiagonal of A +*> are overwritten with the upper Hessenberg matrix H, and the +*> rest is set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the N-by-N upper triangular matrix B. +*> On exit, the upper triangular matrix T = Q**H B Z. The +*> elements below the diagonal are set to zero. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1, typically +*> from the QR factorization of B. +*> On exit, if COMPQ='I', the unitary matrix Q, and if +*> COMPQ = 'V', the product Q1*Q. +*> Not referenced if COMPQ='N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1. +*> On exit, if COMPZ='I', the unitary matrix Z, and if +*> COMPZ = 'V', the product Z1*Z. +*> Not referenced if COMPZ='N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. +*> LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine reduces A to Hessenberg and B to triangular form by +*> an unblocked reduction, as described in _Matrix_Computations_, +*> by Golub and van Loan (Johns Hopkins Press). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q, + $ LDQ, Z, LDZ, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ + INTEGER IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ILQ, ILZ + INTEGER ICOMPQ, ICOMPZ, JCOL, JROW + DOUBLE PRECISION C + COMPLEX*16 CTEMP, S +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode COMPQ +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* +* Decode COMPZ +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Test the input parameters. +* + INFO = 0 + IF( ICOMPQ.LE.0 ) THEN + INFO = -1 + ELSE IF( ICOMPZ.LE.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 ) THEN + INFO = -4 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN + INFO = -11 + ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGHRD', -INFO ) + RETURN + END IF +* +* Initialize Q and Z if desired. +* + IF( ICOMPQ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* +* Zero out lower triangle of B +* + DO 20 JCOL = 1, N - 1 + DO 10 JROW = JCOL + 1, N + B( JROW, JCOL ) = CZERO + 10 CONTINUE + 20 CONTINUE +* +* Reduce A and B +* + DO 40 JCOL = ILO, IHI - 2 +* + DO 30 JROW = IHI, JCOL + 2, -1 +* +* Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL) +* + CTEMP = A( JROW-1, JCOL ) + CALL ZLARTG( CTEMP, A( JROW, JCOL ), C, S, + $ A( JROW-1, JCOL ) ) + A( JROW, JCOL ) = CZERO + CALL ZROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA, + $ A( JROW, JCOL+1 ), LDA, C, S ) + CALL ZROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB, + $ B( JROW, JROW-1 ), LDB, C, S ) + IF( ILQ ) + $ CALL ZROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, + $ DCONJG( S ) ) +* +* Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1) +* + CTEMP = B( JROW, JROW ) + CALL ZLARTG( CTEMP, B( JROW, JROW-1 ), C, S, + $ B( JROW, JROW ) ) + B( JROW, JROW-1 ) = CZERO + CALL ZROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S ) + CALL ZROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C, + $ S ) + IF( ILZ ) + $ CALL ZROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S ) + 30 CONTINUE + 40 CONTINUE +* + RETURN +* +* End of ZGGHRD +* + END diff --git a/dspl/liblapack/SRC/zgglse.f b/dspl/liblapack/SRC/zgglse.f new file mode 100644 index 0000000..1b5f468 --- /dev/null +++ b/dspl/liblapack/SRC/zgglse.f @@ -0,0 +1,355 @@ +*> \brief ZGGLSE solves overdetermined or underdetermined systems for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGLSE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( * ), D( * ), +* $ WORK( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGLSE solves the linear equality-constrained least squares (LSE) +*> problem: +*> +*> minimize || c - A*x ||_2 subject to B*x = d +*> +*> where A is an M-by-N matrix, B is a P-by-N matrix, c is a given +*> M-vector, and d is a given P-vector. It is assumed that +*> P <= N <= M+P, and +*> +*> rank(B) = P and rank( (A) ) = N. +*> ( (B) ) +*> +*> These conditions ensure that the LSE problem has a unique solution, +*> which is obtained using a generalized RQ factorization of the +*> matrices (B, A) given by +*> +*> B = (0 R)*Q, A = Z*T*Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. 0 <= P <= N <= M+P. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(M,N)-by-N upper trapezoidal matrix T. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the upper triangle of the subarray B(1:P,N-P+1:N) +*> contains the P-by-P upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (M) +*> On entry, C contains the right hand side vector for the +*> least squares part of the LSE problem. +*> On exit, the residual sum of squares for the solution +*> is given by the sum of squares of elements N-P+1 to M of +*> vector C. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (P) +*> On entry, D contains the right hand side vector for the +*> constrained equation. +*> On exit, D is destroyed. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On exit, X is the solution of the LSE problem. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M+N+P). +*> For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, +*> where NB is an upper bound for the optimal blocksizes for +*> ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the upper triangular factor R associated with B in the +*> generalized RQ factorization of the pair (B, A) is +*> singular, so that rank(B) < P; the least squares +*> solution could not be computed. +*> = 2: the (N-P) by (N-P) part of the upper trapezoidal factor +*> T associated with A in the generalized RQ factorization +*> of the pair (B, A) is singular, so that +*> rank( (A) ) < N; the least squares solution could not +*> ( (B) ) +*> be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsolve +* +* ===================================================================== + SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( * ), D( * ), + $ WORK( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKMIN, LWKOPT, MN, NB, NB1, NB2, NB3, + $ NB4, NR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGGRQF, ZTRMV, + $ ZTRTRS, ZUNMQR, ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -7 + END IF +* +* Calculate workspace +* + IF( INFO.EQ.0) THEN + IF( N.EQ.0 ) THEN + LWKMIN = 1 + LWKOPT = 1 + ELSE + NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, P, -1 ) + NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3, NB4 ) + LWKMIN = M + N + P + LWKOPT = P + MN + MAX( M, N )*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGLSE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the GRQ factorization of matrices B and A: +* +* B*Q**H = ( 0 T12 ) P Z**H*A*Q**H = ( R11 R12 ) N-P +* N-P P ( 0 R22 ) M+P-N +* N-P P +* +* where T12 and R11 are upper triangular, and Q and Z are +* unitary. +* + CALL ZGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ), + $ WORK( P+MN+1 ), LWORK-P-MN, INFO ) + LOPT = WORK( P+MN+1 ) +* +* Update c = Z**H *c = ( c1 ) N-P +* ( c2 ) M+P-N +* + CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA, + $ WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ), + $ LWORK-P-MN, INFO ) + LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* +* Solve T12*x2 = d for x2 +* + IF( P.GT.0 ) THEN + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', P, 1, + $ B( 1, N-P+1 ), LDB, D, P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 1 + RETURN + END IF +* +* Put the solution in X +* + CALL ZCOPY( P, D, 1, X( N-P+1 ), 1 ) +* +* Update c1 +* + CALL ZGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, + $ D, 1, CONE, C, 1 ) + END IF +* +* Solve R11*x1 = c1 for x1 +* + IF( N.GT.P ) THEN + CALL ZTRTRS( 'Upper', 'No transpose', 'Non-unit', N-P, 1, + $ A, LDA, C, N-P, INFO ) +* + IF( INFO.GT.0 ) THEN + INFO = 2 + RETURN + END IF +* +* Put the solutions in X +* + CALL ZCOPY( N-P, C, 1, X, 1 ) + END IF +* +* Compute the residual vector: +* + IF( M.LT.N ) THEN + NR = M + P - N + IF( NR.GT.0 ) + $ CALL ZGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ), + $ LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 ) + ELSE + NR = P + END IF + IF( NR.GT.0 ) THEN + CALL ZTRMV( 'Upper', 'No transpose', 'Non unit', NR, + $ A( N-P+1, N-P+1 ), LDA, D, 1 ) + CALL ZAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 ) + END IF +* +* Backward transformation x = Q**H*x +* + CALL ZUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB, + $ WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO ) + WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) ) +* + RETURN +* +* End of ZGGLSE +* + END diff --git a/dspl/liblapack/SRC/zggqrf.f b/dspl/liblapack/SRC/zggqrf.f new file mode 100644 index 0000000..ba04d07 --- /dev/null +++ b/dspl/liblapack/SRC/zggqrf.f @@ -0,0 +1,299 @@ +*> \brief \b ZGGQRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGQRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGQRF computes a generalized QR factorization of an N-by-M matrix A +*> and an N-by-P matrix B: +*> +*> A = Q*R, B = Q*T*Z, +*> +*> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix, +*> and R and T assume one of the forms: +*> +*> if N >= M, R = ( R11 ) M , or if N < M, R = ( R11 R12 ) N, +*> ( 0 ) N-M N M-N +*> M +*> +*> where R11 is upper triangular, and +*> +*> if N <= P, T = ( 0 T12 ) N, or if N > P, T = ( T11 ) N-P, +*> P-N N ( T21 ) P +*> P +*> +*> where T12 or T21 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GQR factorization +*> of A and B implicitly gives the QR factorization of inv(B)*A: +*> +*> inv(B)*A = Z**H * (inv(T)*R) +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the +*> conjugate transpose of matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of columns of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of columns of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) +*> On entry, the N-by-M matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(N,M)-by-M upper trapezoidal matrix R (R is +*> upper triangular if N >= M); the elements below the diagonal, +*> with the array TAUA, represent the unitary matrix Q as a +*> product of min(N,M) elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is COMPLEX*16 array, dimension (min(N,M)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,P) +*> On entry, the N-by-P matrix B. +*> On exit, if N <= P, the upper triangle of the subarray +*> B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; +*> if N > P, the elements on and above the (N-P)-th subdiagonal +*> contain the N-by-P upper trapezoidal matrix T; the remaining +*> elements, with the array TAUB, represent the unitary +*> matrix Z as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is COMPLEX*16 array, dimension (min(N,P)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the QR factorization +*> of an N-by-M matrix, NB2 is the optimal blocksize for the +*> RQ factorization of an N-by-P matrix, and NB3 is the optimal +*> blocksize for a call of ZUNMQR. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(n,m). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**H +*> +*> where taua is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine ZUNGQR. +*> To use Q to update another matrix, use LAPACK subroutine ZUNMQR. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(n,p). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**H +*> +*> where taub is a complex scalar, and v is a complex vector with +*> v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in +*> B(n-k+i,1:p-k+i-1), and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine ZUNGRQ. +*> To use Z to update another matrix, use LAPACK subroutine ZUNMRQ. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGERQF, ZUNMQR +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 ) + NB2 = ILAENV( 1, 'ZGERQF', ' ', N, P, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( P.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGQRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* QR factorization of N-by-M matrix A: A = Q*R +* + CALL ZGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := Q**H*B. +* + CALL ZUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A, + $ LDA, TAUA, B, LDB, WORK, LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* RQ factorization of N-by-P matrix B: B = T*Z. +* + CALL ZGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of ZGGQRF +* + END diff --git a/dspl/liblapack/SRC/zggrqf.f b/dspl/liblapack/SRC/zggrqf.f new file mode 100644 index 0000000..27970e8 --- /dev/null +++ b/dspl/liblapack/SRC/zggrqf.f @@ -0,0 +1,299 @@ +*> \brief \b ZGGRQF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGRQF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A +*> and a P-by-N matrix B: +*> +*> A = R*Q, B = Z*T*Q, +*> +*> where Q is an N-by-N unitary matrix, Z is a P-by-P unitary +*> matrix, and R and T assume one of the forms: +*> +*> if M <= N, R = ( 0 R12 ) M, or if M > N, R = ( R11 ) M-N, +*> N-M M ( R21 ) N +*> N +*> +*> where R12 or R21 is upper triangular, and +*> +*> if P >= N, T = ( T11 ) N , or if P < N, T = ( T11 T12 ) P, +*> ( 0 ) P-N P N-P +*> N +*> +*> where T11 is upper triangular. +*> +*> In particular, if B is square and nonsingular, the GRQ factorization +*> of A and B implicitly gives the RQ factorization of A*inv(B): +*> +*> A*inv(B) = (R*inv(T))*Z**H +*> +*> where inv(B) denotes the inverse of the matrix B, and Z**H denotes the +*> conjugate transpose of the matrix Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, if M <= N, the upper triangle of the subarray +*> A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R; +*> if M > N, the elements on and above the (M-N)-th subdiagonal +*> contain the M-by-N upper trapezoidal matrix R; the remaining +*> elements, with the array TAUA, represent the unitary +*> matrix Q as a product of elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAUA +*> \verbatim +*> TAUA is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q (see Further Details). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, the elements on and above the diagonal of the array +*> contain the min(P,N)-by-N upper trapezoidal matrix T (T is +*> upper triangular if P >= N); the elements below the diagonal, +*> with the array TAUB, represent the unitary matrix Z as a +*> product of elementary reflectors (see Further Details). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] TAUB +*> \verbatim +*> TAUB is COMPLEX*16 array, dimension (min(P,N)) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Z (see Further Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N,M,P). +*> For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3), +*> where NB1 is the optimal blocksize for the RQ factorization +*> of an M-by-N matrix, NB2 is the optimal blocksize for the +*> QR factorization of a P-by-N matrix, and NB3 is the optimal +*> blocksize for a call of ZUNMRQ. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO=-i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = min(m,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taua * v * v**H +*> +*> where taua is a complex scalar, and v is a complex vector with +*> v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in +*> A(m-k+i,1:n-k+i-1), and taua in TAUA(i). +*> To form Q explicitly, use LAPACK subroutine ZUNGRQ. +*> To use Q to update another matrix, use LAPACK subroutine ZUNMRQ. +*> +*> The matrix Z is represented as a product of elementary reflectors +*> +*> Z = H(1) H(2) . . . H(k), where k = min(p,n). +*> +*> Each H(i) has the form +*> +*> H(i) = I - taub * v * v**H +*> +*> where taub is a complex scalar, and v is a complex vector with +*> v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i), +*> and taub in TAUB(i). +*> To form Z explicitly, use LAPACK subroutine ZUNGQR. +*> To use Z to update another matrix, use LAPACK subroutine ZUNMQR. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LWORK, M, N, P +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LOPT, LWKOPT, NB, NB1, NB2, NB3 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQRF, ZGERQF, ZUNMRQ +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + NB1 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + NB2 = ILAENV( 1, 'ZGEQRF', ' ', P, N, -1, -1 ) + NB3 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 ) + NB = MAX( NB1, NB2, NB3 ) + LWKOPT = MAX( N, M, P )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( P.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGRQF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* RQ factorization of M-by-N matrix A: A = R*Q +* + CALL ZGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO ) + LOPT = WORK( 1 ) +* +* Update B := B*Q**H +* + CALL ZUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ), + $ A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK, + $ LWORK, INFO ) + LOPT = MAX( LOPT, INT( WORK( 1 ) ) ) +* +* QR factorization of P-by-N matrix B: B = Z*T +* + CALL ZGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO ) + WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) ) +* + RETURN +* +* End of ZGGRQF +* + END diff --git a/dspl/liblapack/SRC/zggsvd3.f b/dspl/liblapack/SRC/zggsvd3.f new file mode 100644 index 0000000..fd22d4f --- /dev/null +++ b/dspl/liblapack/SRC/zggsvd3.f @@ -0,0 +1,505 @@ +*> \brief ZGGSVD3 computes the singular value decomposition (SVD) for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGSVD3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, +* LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK, +* LWORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGSVD3 computes the generalized singular value decomposition (GSVD) +*> of an M-by-N complex matrix A and P-by-N complex matrix B: +*> +*> U**H*A*Q = D1*( 0 R ), V**H*B*Q = D2*( 0 R ) +*> +*> where U, V and Q are unitary matrices. +*> Let K+L = the effective numerical rank of the +*> matrix (A**H,B**H)**H, then R is a (K+L)-by-(K+L) nonsingular upper +*> triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal" +*> matrices and of the following structures, respectively: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) +*> L ( 0 0 R22 ) +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The routine computes C, S, R, and optionally the unitary +*> transformation matrices U, V and Q. +*> +*> In particular, if B is an N-by-N nonsingular matrix, then the GSVD of +*> A and B implicitly gives the SVD of A*inv(B): +*> A*inv(B) = U*(D1*inv(D2))*V**H. +*> If ( A**H,B**H)**H has orthonormal columns, then the GSVD of A and B is also +*> equal to the CS decomposition of A and B. Furthermore, the GSVD can +*> be used to derive the solution of the eigenvalue problem: +*> A**H*A x = lambda* B**H*B x. +*> In some literature, the GSVD of A and B is presented in the form +*> U**H*A*X = ( 0 D1 ), V**H*B*X = ( 0 D2 ) +*> where U and V are orthogonal and X is nonsingular, and D1 and D2 are +*> ``diagonal''. The former GSVD form can be converted to the latter +*> form by taking the nonsingular matrix X as +*> +*> X = Q*( I 0 ) +*> ( 0 inv(R) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular matrix R, or part of R. +*> See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains part of the triangular matrix R if +*> M-K-L < 0. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = C, +*> BETA(K+1:K+L) = S, +*> or if M-K-L < 0, +*> ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0 +*> BETA(K+1:M) =S, BETA(M+1:K+L) =1 +*> and +*> ALPHA(K+L+1:N) = 0 +*> BETA(K+L+1:N) = 0 +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,M) +*> If JOBU = 'U', U contains the M-by-M unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,P) +*> If JOBV = 'V', V contains the P-by-P unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> On exit, IWORK stores the sorting information. More +*> precisely, the following loop will sort ALPHA +*> for I = K+1, min(M,K+L) +*> swap ALPHA(I) and ALPHA(IWORK(I)) +*> endfor +*> such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, the Jacobi-type procedure failed to +*> converge. For further details, see subroutine ZTGSJA. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> TOLA DOUBLE PRECISION +*> TOLB DOUBLE PRECISION +*> TOLA and TOLB are the thresholds to determine the effective +*> rank of (A**H,B**H)**H. Generally, they are set to +*> TOLA = MAX(M,N)*norm(A)*MACHEPS, +*> TOLB = MAX(P,N)*norm(B)*MACHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup complex16GEsing +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Huan Ren, Computer Science Division, University of +*> California at Berkeley, USA +*> +* +*> \par Further Details: +* ===================== +*> +*> ZGGSVD3 replaces the deprecated subroutine ZGGSVD. +*> +* ===================================================================== + SUBROUTINE ZGGSVD3( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B, + $ LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, LWORK, RWORK, IWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION ALPHA( * ), BETA( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ, WANTU, WANTV, LQUERY + INTEGER I, IBND, ISUB, J, NCYCLE, LWKOPT + DOUBLE PRECISION ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + EXTERNAL LSAME, DLAMCH, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, XERBLA, ZGGSVP3, ZTGSJA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( P.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL ZGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK, -1, INFO ) + LWKOPT = N + INT( WORK( 1 ) ) + LWKOPT = MAX( 2*N, LWKOPT ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = DCMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVD3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* Compute the Frobenius norm of matrices A and B +* + ANORM = ZLANGE( '1', M, N, A, LDA, RWORK ) + BNORM = ZLANGE( '1', P, N, B, LDB, RWORK ) +* +* Get machine precision and set up threshold for determining +* the effective numerical rank of the matrices A and B. +* + ULP = DLAMCH( 'Precision' ) + UNFL = DLAMCH( 'Safe Minimum' ) + TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP + TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP +* + CALL ZGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA, + $ TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK, + $ WORK, WORK( N+1 ), LWORK-N, INFO ) +* +* Compute the GSVD of two upper "triangular" matrices +* + CALL ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB, + $ TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, + $ WORK, NCYCLE, INFO ) +* +* Sort the singular values and store the pivot indices in IWORK +* Copy ALPHA to RWORK, then sort ALPHA in RWORK +* + CALL DCOPY( N, ALPHA, 1, RWORK, 1 ) + IBND = MIN( L, M-K ) + DO 20 I = 1, IBND +* +* Scan for largest ALPHA(K+I) +* + ISUB = I + SMAX = RWORK( K+I ) + DO 10 J = I + 1, IBND + TEMP = RWORK( K+J ) + IF( TEMP.GT.SMAX ) THEN + ISUB = J + SMAX = TEMP + END IF + 10 CONTINUE + IF( ISUB.NE.I ) THEN + RWORK( K+ISUB ) = RWORK( K+I ) + RWORK( K+I ) = SMAX + IWORK( K+I ) = K + ISUB + ELSE + IWORK( K+I ) = K + I + END IF + 20 CONTINUE +* + WORK( 1 ) = DCMPLX( LWKOPT ) + RETURN +* +* End of ZGGSVD3 +* + END diff --git a/dspl/liblapack/SRC/zggsvp3.f b/dspl/liblapack/SRC/zggsvp3.f new file mode 100644 index 0000000..986bc47 --- /dev/null +++ b/dspl/liblapack/SRC/zggsvp3.f @@ -0,0 +1,579 @@ +*> \brief \b ZGGSVP3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGGSVP3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, +* TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, +* IWORK, RWORK, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, LWORK +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGGSVP3 computes unitary matrices U, V and Q such that +*> +*> N-K-L K L +*> U**H*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> V**H*B*Q = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective +*> numerical rank of the (M+P)-by-N matrix (A**H,B**H)**H. +*> +*> This decomposition is the preprocessing step for computing the +*> Generalized Singular Value Decomposition (GSVD), see subroutine +*> ZGGSVD3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': Unitary matrix U is computed; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': Unitary matrix V is computed; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Unitary matrix Q is computed; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A contains the triangular (or trapezoidal) matrix +*> described in the Purpose section. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, B contains the triangular matrix described in +*> the Purpose section. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the thresholds to determine the effective +*> numerical rank of matrix B and a subblock of A. Generally, +*> they are set to +*> TOLA = MAX(M,N)*norm(A)*MAZHEPS, +*> TOLB = MAX(P,N)*norm(B)*MAZHEPS. +*> The size of TOLA and TOLB may affect the size of backward +*> errors of the decomposition. +*> \endverbatim +*> +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[out] L +*> \verbatim +*> L is INTEGER +*> +*> On exit, K and L specify the dimension of the subblocks +*> described in Purpose section. +*> K + L = effective numerical rank of (A**H,B**H)**H. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,M) +*> If JOBU = 'U', U contains the unitary matrix U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,P) +*> If JOBV = 'V', V contains the unitary matrix V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> If JOBQ = 'Q', Q contains the unitary matrix Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date August 2015 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +* +*> \verbatim +*> +*> The subroutine uses LAPACK subroutine ZGEQP3 for the QR factorization +*> with column pivoting to detect the effective numerical rank of the +*> a matrix. It may be replaced by a better rank determination strategy. +*> +*> ZGGSVP3 replaces the deprecated subroutine ZGGSVP. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGGSVP3( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, + $ TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ, + $ IWORK, RWORK, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* August 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P, + $ LWORK + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, WANTQ, WANTU, WANTV, LQUERY + INTEGER I, J, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEQP3, ZGEQR2, ZGERQ2, ZLACPY, ZLAPMT, + $ ZLASET, ZUNG2R, ZUNM2R, ZUNMR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTU = LSAME( JOBU, 'U' ) + WANTV = LSAME( JOBV, 'V' ) + WANTQ = LSAME( JOBQ, 'Q' ) + FORWRD = .TRUE. + LQUERY = ( LWORK.EQ.-1 ) + LWKOPT = 1 +* +* Test the input arguments +* + INFO = 0 + IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -10 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -16 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -18 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -20 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -24 + END IF +* +* Compute workspace +* + IF( INFO.EQ.0 ) THEN + CALL ZGEQP3( P, N, B, LDB, IWORK, TAU, WORK, -1, RWORK, INFO ) + LWKOPT = INT( WORK ( 1 ) ) + IF( WANTV ) THEN + LWKOPT = MAX( LWKOPT, P ) + END IF + LWKOPT = MAX( LWKOPT, MIN( N, P ) ) + LWKOPT = MAX( LWKOPT, M ) + IF( WANTQ ) THEN + LWKOPT = MAX( LWKOPT, N ) + END IF + CALL ZGEQP3( M, N, A, LDA, IWORK, TAU, WORK, -1, RWORK, INFO ) + LWKOPT = MAX( LWKOPT, INT( WORK ( 1 ) ) ) + LWKOPT = MAX( 1, LWKOPT ) + WORK( 1 ) = DCMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGGSVP3', -INFO ) + RETURN + END IF + IF( LQUERY ) THEN + RETURN + ENDIF +* +* QR with column pivoting of B: B*P = V*( S11 S12 ) +* ( 0 0 ) +* + DO 10 I = 1, N + IWORK( I ) = 0 + 10 CONTINUE + CALL ZGEQP3( P, N, B, LDB, IWORK, TAU, WORK, LWORK, RWORK, INFO ) +* +* Update A := A*P +* + CALL ZLAPMT( FORWRD, M, N, A, LDA, IWORK ) +* +* Determine the effective rank of matrix B. +* + L = 0 + DO 20 I = 1, MIN( P, N ) + IF( ABS( B( I, I ) ).GT.TOLB ) + $ L = L + 1 + 20 CONTINUE +* + IF( WANTV ) THEN +* +* Copy the details of V, and form V. +* + CALL ZLASET( 'Full', P, P, CZERO, CZERO, V, LDV ) + IF( P.GT.1 ) + $ CALL ZLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ), + $ LDV ) + CALL ZUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO ) + END IF +* +* Clean up B +* + DO 40 J = 1, L - 1 + DO 30 I = J + 1, L + B( I, J ) = CZERO + 30 CONTINUE + 40 CONTINUE + IF( P.GT.L ) + $ CALL ZLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB ) +* + IF( WANTQ ) THEN +* +* Set Q = I and Update Q := Q*P +* + CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + CALL ZLAPMT( FORWRD, N, N, Q, LDQ, IWORK ) + END IF +* + IF( P.GE.L .AND. N.NE.L ) THEN +* +* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z +* + CALL ZGERQ2( L, N, B, LDB, TAU, WORK, INFO ) +* +* Update A := A*Z**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB, + $ TAU, A, LDA, WORK, INFO ) + IF( WANTQ ) THEN +* +* Update Q := Q*Z**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N, L, B, + $ LDB, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up B +* + CALL ZLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB ) + DO 60 J = N - L + 1, N + DO 50 I = J - N + L + 1, L + B( I, J ) = CZERO + 50 CONTINUE + 60 CONTINUE +* + END IF +* +* Let N-L L +* A = ( A11 A12 ) M, +* +* then the following does the complete QR decomposition of A11: +* +* A11 = U*( 0 T12 )*P1**H +* ( 0 0 ) +* + DO 70 I = 1, N - L + IWORK( I ) = 0 + 70 CONTINUE + CALL ZGEQP3( M, N-L, A, LDA, IWORK, TAU, WORK, LWORK, RWORK, + $ INFO ) +* +* Determine the effective rank of A11 +* + K = 0 + DO 80 I = 1, MIN( M, N-L ) + IF( ABS( A( I, I ) ).GT.TOLA ) + $ K = K + 1 + 80 CONTINUE +* +* Update A12 := U**H*A12, where A12 = A( 1:M, N-L+1:N ) +* + CALL ZUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ), + $ A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Copy the details of U, and form U +* + CALL ZLASET( 'Full', M, M, CZERO, CZERO, U, LDU ) + IF( M.GT.1 ) + $ CALL ZLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ), + $ LDU ) + CALL ZUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO ) + END IF +* + IF( WANTQ ) THEN +* +* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 +* + CALL ZLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK ) + END IF +* +* Clean up A: set the strictly lower triangular part of +* A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. +* + DO 100 J = 1, K - 1 + DO 90 I = J + 1, K + A( I, J ) = CZERO + 90 CONTINUE + 100 CONTINUE + IF( M.GT.K ) + $ CALL ZLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA ) +* + IF( N-L.GT.K ) THEN +* +* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 +* + CALL ZGERQ2( K, N-L, A, LDA, TAU, WORK, INFO ) +* + IF( WANTQ ) THEN +* +* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1**H +* + CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A, + $ LDA, TAU, Q, LDQ, WORK, INFO ) + END IF +* +* Clean up A +* + CALL ZLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA ) + DO 120 J = N - L - K + 1, N - L + DO 110 I = J - N + L + K + 1, K + A( I, J ) = CZERO + 110 CONTINUE + 120 CONTINUE +* + END IF +* + IF( M.GT.K ) THEN +* +* QR factorization of A( K+1:M,N-L+1:N ) +* + CALL ZGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO ) +* + IF( WANTU ) THEN +* +* Update U(:,K+1:M) := U(:,K+1:M)*U1 +* + CALL ZUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ), + $ A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU, + $ WORK, INFO ) + END IF +* +* Clean up +* + DO 140 J = N - L + 1, N + DO 130 I = J - N + K + L + 1, M + A( I, J ) = CZERO + 130 CONTINUE + 140 CONTINUE +* + END IF +* + WORK( 1 ) = DCMPLX( LWKOPT ) + RETURN +* +* End of ZGGSVP3 +* + END diff --git a/dspl/liblapack/SRC/zgsvj0.f b/dspl/liblapack/SRC/zgsvj0.f new file mode 100644 index 0000000..c4a6bd3 --- /dev/null +++ b/dspl/liblapack/SRC/zgsvj0.f @@ -0,0 +1,935 @@ +*> \brief ZGSVJ0 pre-processor for the routine zgesvj. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGSVJ0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, +* SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP +* DOUBLE PRECISION EPS, SFMIN, TOL +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) +* DOUBLE PRECISION SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGSVJ0 is called from ZGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but +*> it does not check convergence (stopping criterion). Few tuning +*> parameters (marked by [TP]) are available for the implementer. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * diag(D_onexit) represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The array D accumulates the scaling factors from the complex scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix A_onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> EPS = DLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is DOUBLE PRECISION +*> SFMIN = DLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +*> +*> \par Further Details: +* ===================== +*> +*> ZGSVJ0 is used just to enable ZGESVJ to call a simplified version of +*> itself to work on a submatrix of the original matrix. +*> +*> Contributor: +* ============= +*> +*> Zlatko Drmac (Zagreb, Croatia) +*> +*> \par Bugs, Examples and Comments: +* ============================ +*> +*> Please report all bugs and send interesting test examples and comments to +*> drmac@math.hr. Thank you. +* +* ===================================================================== + SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS, + $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* + IMPLICIT NONE +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP + DOUBLE PRECISION EPS, SFMIN, TOL + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) + DOUBLE PRECISION SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = (0.0D0, 0.0D0), CONE = (1.0D0, 0.0D0) ) +* .. +* .. Local Scalars .. + COMPLEX*16 AAPQ, OMPQ + DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, ROOTEPS, + $ ROOTSFMIN, ROOTTOL, SMALL, SN, T, TEMP1, THETA, + $ THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1, + $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, NBL, + $ NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, CONJG, DBLE, MIN, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + COMPLEX*16 ZDOTC + INTEGER IDAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, LSAME, ZDOTC, DZNRM2 +* .. +* .. +* .. External Subroutines .. +* .. +* from BLAS + EXTERNAL ZCOPY, ZROT, ZSWAP, ZAXPY +* from LAPACK + EXTERNAL ZLASCL, ZLASSQ, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( LDA.LT.M ) THEN + INFO = -5 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -8 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -10 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -13 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -14 + ELSE IF( LWORK.LT.M ) THEN + INFO = -16 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGSVJ0', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Row-cyclic Jacobi SVD algorithm with column pivoting .. +* + EMPTSW = ( N*( N-1 ) ) / 2 + NOTROT = 0 +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + + SWBAND = 0 +*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective +* if ZGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure +* works on pivots inside a band-like region around the diagonal. +* The boundaries are determined dynamically, based on the number of +* pivots above a threshold. +* + KBL = MIN( 8, N ) +*[TP] KBL is a tuning parameter that defines the tile size in the +* tiling of the p-q loops of pivot pairs. In general, an optimal +* value of KBL depends on the matrix dimensions and on the +* parameters of the computer's memory. +* + NBL = N / KBL + IF( ( NBL*KBL ).NE.N )NBL = NBL + 1 +* + BLSKIP = KBL**2 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. +* + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. +* + LKAHEAD = 1 +*[TP] LKAHEAD is a tuning parameter. +* +* Quasi block transformations, using the lower (upper) triangular +* structure of the input matrix. The quasi-block-cycling usually +* invokes cubic convergence. Big part of this cycle is done inside +* canonical subspaces of dimensions less than M. +* +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBL +* + igl = ( ibr-1 )*KBL + 1 +* + DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr ) +* + igl = igl + ir1*KBL +* + DO 2001 p = igl, MIN( igl+KBL-1, N-1 ) +* +* .. de Rijk's pivoting +* + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, + $ V( 1, q ), 1 ) + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D(p) + D(p) = D(q) + D(q) = AAPQ + END IF +* + IF( ir1.EQ.0 ) THEN +* +* Column norms are periodically updated by explicit +* norm computation. +* Caveat: +* Unfortunately, some BLAS implementations compute SNCRM2(M,A(1,p),1) +* as SQRT(S=ZDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to +* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to +* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold). +* Hence, DZNRM2 cannot be trusted, not even in the case when +* the true norm is far from the under(over)flow boundaries. +* If properly implemented DZNRM2 is available, the IF-THEN-ELSE-END IF +* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )". +* + IF( ( SVA( p ).LT.ROOTBIG ) .AND. + $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN + SVA( p ) = DZNRM2( M, A( 1, p ), 1 ) + ELSE + TEMP1 = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, TEMP1, AAPP ) + SVA( p ) = TEMP1*SQRT( AAPP ) + END IF + AAPP = SVA( p ) + ELSE + AAPP = SVA( p ) + END IF +* + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2002 q = p + 1, MIN( igl+KBL-1, N ) +* + AAQQ = SVA( q ) +* + IF( AAQQ.GT.ZERO ) THEN +* + AAPP0 = AAPP + IF( AAQQ.GE.ONE ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK, LDA, IERR ) + AAPQ = ZDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + ROTOK = AAPP.LE.( AAQQ / SMALL ) + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAPP ) / AAQQ + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = ZDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) +* +* .. rotate +*[RTD] ROTATED = ROTATED + ONE +* + IF( ir1.EQ.0 ) THEN + NOTROT = 0 + PSKIPPED = 0 + ISWROT = ISWROT + 1 + END IF +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/AAPQ1 +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN +* + T = HALF / THETA + CS = ONE + + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF ( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) +* + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS +* + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF ( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, M, + $ 1, WORK, LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, M, + $ 1, A( 1, q ), LDA, IERR ) + CALL ZAXPY( M, -AAPQ, WORK, 1, + $ A( 1, q ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, M, + $ 1, A( 1, q ), LDA, IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* recompute SVA(q), SVA(p). +* + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DZNRM2( M, A( 1, q ), 1 ) + ELSE + T = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DZNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* + ELSE +* A(:,p) and A(:,q) already numerically orthogonal + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + END IF + ELSE +* A(:,q) is zero column + IF( ir1.EQ.0 )NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + IF( ir1.EQ.0 )AAPP = -AAPP + NOTROT = 0 + GO TO 2103 + END IF +* + 2002 CONTINUE +* END q-LOOP +* + 2103 CONTINUE +* bailed out of q-loop +* + SVA( p ) = AAPP +* + ELSE + SVA( p ) = AAPP + IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) ) + $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p + END IF +* + 2001 CONTINUE +* end of the p-loop +* end of doing the block ( ibr, ibr ) + 1002 CONTINUE +* end of ir1-loop +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* + DO 2010 jbc = ibr + 1, NBL +* + jgl = ( jbc-1 )*KBL + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = ZDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = ZDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL ZAXPY( M, -AAPQ, WORK, + $ 1, A( 1, q ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL ZAXPY( M, -CONJG(AAPQ), + $ WORK, 1, A( 1, p ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DZNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DZNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DZNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( DBLE( N ) )* + $ TOL ) .AND. ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector SVA() of column norms. + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D( p ) + D( p ) = D( q ) + D( q ) = AAPQ + CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* + RETURN +* .. +* .. END OF ZGSVJ0 +* .. + END diff --git a/dspl/liblapack/SRC/zgsvj1.f b/dspl/liblapack/SRC/zgsvj1.f new file mode 100644 index 0000000..91e39ca --- /dev/null +++ b/dspl/liblapack/SRC/zgsvj1.f @@ -0,0 +1,706 @@ +*> \brief \b ZGSVJ1 pre-processor for the routine zgesvj, applies Jacobi rotations targeting only particular pivots. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGSVJ1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, +* EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION EPS, SFMIN, TOL +* INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP +* CHARACTER*1 JOBV +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) +* DOUBLE PRECISION SVA( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGSVJ1 is called from ZGESVJ as a pre-processor and that is its main +*> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but +*> it targets only particular pivots and it does not check convergence +*> (stopping criterion). Few tunning parameters (marked by [TP]) are +*> available for the implementer. +*> +*> Further Details +*> ~~~~~~~~~~~~~~~ +*> ZGSVJ1 applies few sweeps of Jacobi rotations in the column space of +*> the input M-by-N matrix A. The pivot pairs are taken from the (1,2) +*> off-diagonal block in the corresponding N-by-N Gram matrix A^T * A. The +*> block-entries (tiles) of the (1,2) off-diagonal block are marked by the +*> [x]'s in the following scheme: +*> +*> | * * * [x] [x] [x]| +*> | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +*> | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> |[x] [x] [x] * * * | +*> +*> In terms of the columns of A, the first N1 columns are rotated 'against' +*> the remaining N-N1 columns, trying to increase the angle between the +*> corresponding subspaces. The off-diagonal block is N1-by(N-N1) and it is +*> tiled using quadratic tiles of side KBL. Here, KBL is a tunning parmeter. +*> The number of sweeps is given in NSWEEP and the orthogonality threshold +*> is given in TOL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> Specifies whether the output from this procedure is used +*> to compute the matrix V: +*> = 'V': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the N-by-N array V. +*> (See the description of V.) +*> = 'A': the product of the Jacobi rotations is accumulated +*> by postmulyiplying the MV-by-N array V. +*> (See the descriptions of MV and V.) +*> = 'N': the Jacobi rotations are not accumulated. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the input matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the input matrix A. +*> M >= N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \verbatim +*> N1 is INTEGER +*> N1 specifies the 2 x 2 block partition, the first N1 columns are +*> rotated 'against' the remaining N-N1 columns of A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, M-by-N matrix A, such that A*diag(D) represents +*> the input matrix. +*> On exit, +*> A_onexit * D_onexit represents the input matrix A*diag(D) +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, D, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The array D accumulates the scaling factors from the fast scaled +*> Jacobi rotations. +*> On entry, A*diag(D) represents the input matrix. +*> On exit, A_onexit*diag(D_onexit) represents the input matrix +*> post-multiplied by a sequence of Jacobi rotations, where the +*> rotation threshold and the total number of sweeps are given in +*> TOL and NSWEEP, respectively. +*> (See the descriptions of N1, A, TOL and NSWEEP.) +*> \endverbatim +*> +*> \param[in,out] SVA +*> \verbatim +*> SVA is DOUBLE PRECISION array, dimension (N) +*> On entry, SVA contains the Euclidean norms of the columns of +*> the matrix A*diag(D). +*> On exit, SVA contains the Euclidean norms of the columns of +*> the matrix onexit*diag(D_onexit). +*> \endverbatim +*> +*> \param[in] MV +*> \verbatim +*> MV is INTEGER +*> If JOBV .EQ. 'A', then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then MV is not referenced. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,N) +*> If JOBV .EQ. 'V' then N rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV .EQ. 'A' then MV rows of V are post-multipled by a +*> sequence of Jacobi rotations. +*> If JOBV = 'N', then V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V, LDV >= 1. +*> If JOBV = 'V', LDV .GE. N. +*> If JOBV = 'A', LDV .GE. MV. +*> \endverbatim +*> +*> \param[in] EPS +*> \verbatim +*> EPS is DOUBLE PRECISION +*> EPS = DLAMCH('Epsilon') +*> \endverbatim +*> +*> \param[in] SFMIN +*> \verbatim +*> SFMIN is DOUBLE PRECISION +*> SFMIN = DLAMCH('Safe Minimum') +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> TOL is the threshold for Jacobi rotations. For a pair +*> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is +*> applied only if ABS(COS(angle(A(:,p),A(:,q)))) .GT. TOL. +*> \endverbatim +*> +*> \param[in] NSWEEP +*> \verbatim +*> NSWEEP is INTEGER +*> NSWEEP is the number of sweeps of Jacobi rotations to be +*> performed. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> LWORK is the dimension of WORK. LWORK .GE. M. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0 : successful exit. +*> < 0 : if INFO = -i, then the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributor: +* ================== +*> +*> Zlatko Drmac (Zagreb, Croatia) +* +* ===================================================================== + SUBROUTINE ZGSVJ1( JOBV, M, N, N1, A, LDA, D, SVA, MV, V, LDV, + $ EPS, SFMIN, TOL, NSWEEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* + IMPLICIT NONE +* .. Scalar Arguments .. + DOUBLE PRECISION EPS, SFMIN, TOL + INTEGER INFO, LDA, LDV, LWORK, M, MV, N, N1, NSWEEP + CHARACTER*1 JOBV +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK ) + DOUBLE PRECISION SVA( N ) +* .. +* +* ===================================================================== +* +* .. Local Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE + PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0) +* .. +* .. Local Scalars .. + COMPLEX*16 AAPQ, OMPQ + DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG, + $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, + $ ROOTEPS, ROOTSFMIN, ROOTTOL, SMALL, SN, T, + $ TEMP1, THETA, THSIGN + INTEGER BLSKIP, EMPTSW, i, ibr, igl, IERR, IJBLSK, + $ ISWROT, jbc, jgl, KBL, MVL, NOTROT, nblc, nblr, + $ p, PSKIPPED, q, ROWSKIP, SWBAND + LOGICAL APPLV, ROTOK, RSVEC +* .. +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, CONJG, MAX, DBLE, MIN, SIGN, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + COMPLEX*16 ZDOTC + INTEGER IDAMAX + LOGICAL LSAME + EXTERNAL IDAMAX, LSAME, ZDOTC, DZNRM2 +* .. +* .. External Subroutines .. +* .. from BLAS + EXTERNAL ZCOPY, ZROT, ZSWAP, ZAXPY +* .. from LAPACK + EXTERNAL ZLASCL, ZLASSQ, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + APPLV = LSAME( JOBV, 'A' ) + RSVEC = LSAME( JOBV, 'V' ) + IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN + INFO = -3 + ELSE IF( N1.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.M ) THEN + INFO = -6 + ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN + INFO = -9 + ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR. + $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN + INFO = -11 + ELSE IF( TOL.LE.EPS ) THEN + INFO = -14 + ELSE IF( NSWEEP.LT.0 ) THEN + INFO = -15 + ELSE IF( LWORK.LT.M ) THEN + INFO = -17 + ELSE + INFO = 0 + END IF +* +* #:( + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGSVJ1', -INFO ) + RETURN + END IF +* + IF( RSVEC ) THEN + MVL = N + ELSE IF( APPLV ) THEN + MVL = MV + END IF + RSVEC = RSVEC .OR. APPLV + + ROOTEPS = SQRT( EPS ) + ROOTSFMIN = SQRT( SFMIN ) + SMALL = SFMIN / EPS + BIG = ONE / SFMIN + ROOTBIG = ONE / ROOTSFMIN +* LARGE = BIG / SQRT( DBLE( M*N ) ) + BIGTHETA = ONE / ROOTEPS + ROOTTOL = SQRT( TOL ) +* +* .. Initialize the right singular vector matrix .. +* +* RSVEC = LSAME( JOBV, 'Y' ) +* + EMPTSW = N1*( N-N1 ) + NOTROT = 0 +* +* .. Row-cyclic pivot strategy with de Rijk's pivoting .. +* + KBL = MIN( 8, N ) + NBLR = N1 / KBL + IF( ( NBLR*KBL ).NE.N1 )NBLR = NBLR + 1 + +* .. the tiling is nblr-by-nblc [tiles] + + NBLC = ( N-N1 ) / KBL + IF( ( NBLC*KBL ).NE.( N-N1 ) )NBLC = NBLC + 1 + BLSKIP = ( KBL**2 ) + 1 +*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL. + + ROWSKIP = MIN( 5, KBL ) +*[TP] ROWSKIP is a tuning parameter. + SWBAND = 0 +*[TP] SWBAND is a tuning parameter. It is meaningful and effective +* if ZGESVJ is used as a computational routine in the preconditioned +* Jacobi SVD algorithm ZGEJSV. +* +* +* | * * * [x] [x] [x]| +* | * * * [x] [x] [x]| Row-cycling in the nblr-by-nblc [x] blocks. +* | * * * [x] [x] [x]| Row-cyclic pivoting inside each [x] block. +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* |[x] [x] [x] * * * | +* +* + DO 1993 i = 1, NSWEEP +* +* .. go go go ... +* + MXAAPQ = ZERO + MXSINJ = ZERO + ISWROT = 0 +* + NOTROT = 0 + PSKIPPED = 0 +* +* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs +* 1 <= p < q <= N. This is the first step toward a blocked implementation +* of the rotations. New implementation, based on block transformations, +* is under development. +* + DO 2000 ibr = 1, NBLR +* + igl = ( ibr-1 )*KBL + 1 +* + +* +* ... go to the off diagonal blocks +* + igl = ( ibr-1 )*KBL + 1 +* +* DO 2010 jbc = ibr + 1, NBL + DO 2010 jbc = 1, NBLC +* + jgl = ( jbc-1 )*KBL + N1 + 1 +* +* doing the block at ( ibr, jbc ) +* + IJBLSK = 0 + DO 2100 p = igl, MIN( igl+KBL-1, N1 ) +* + AAPP = SVA( p ) + IF( AAPP.GT.ZERO ) THEN +* + PSKIPPED = 0 +* + DO 2200 q = jgl, MIN( jgl+KBL-1, N ) +* + AAQQ = SVA( q ) + IF( AAQQ.GT.ZERO ) THEN + AAPP0 = AAPP +* +* .. M x 2 Jacobi SVD .. +* +* Safe Gram matrix computation +* + IF( AAQQ.GE.ONE ) THEN + IF( AAPP.GE.AAQQ ) THEN + ROTOK = ( SMALL*AAPP ).LE.AAQQ + ELSE + ROTOK = ( SMALL*AAQQ ).LE.AAPP + END IF + IF( AAPP.LT.( BIG / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / AAQQ ) / AAPP + ELSE + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = ZDOTC( M, WORK, 1, + $ A( 1, q ), 1 ) / AAQQ + END IF + ELSE + IF( AAPP.GE.AAQQ ) THEN + ROTOK = AAPP.LE.( AAQQ / SMALL ) + ELSE + ROTOK = AAQQ.LE.( AAPP / SMALL ) + END IF + IF( AAPP.GT.( SMALL / AAQQ ) ) THEN + AAPQ = ( ZDOTC( M, A( 1, p ), 1, + $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) ) + $ / MIN(AAQQ,AAPP) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, + $ ONE, M, 1, + $ WORK, LDA, IERR ) + AAPQ = ZDOTC( M, A( 1, p ), 1, + $ WORK, 1 ) / AAPP + END IF + END IF +* +* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q) + AAPQ1 = -ABS(AAPQ) + MXAAPQ = MAX( MXAAPQ, -AAPQ1 ) +* +* TO rotate or NOT to rotate, THAT is the question ... +* + IF( ABS( AAPQ1 ).GT.TOL ) THEN + OMPQ = AAPQ / ABS(AAPQ) + NOTROT = 0 +*[RTD] ROTATED = ROTATED + 1 + PSKIPPED = 0 + ISWROT = ISWROT + 1 +* + IF( ROTOK ) THEN +* + AQOAP = AAQQ / AAPP + APOAQ = AAPP / AAQQ + THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1 + IF( AAQQ.GT.AAPP0 )THETA = -THETA +* + IF( ABS( THETA ).GT.BIGTHETA ) THEN + T = HALF / THETA + CS = ONE + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*T ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*T ) + END IF + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, ABS( T ) ) + ELSE +* +* .. choose correct signum for THETA and rotate +* + THSIGN = -SIGN( ONE, AAPQ1 ) + IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN + T = ONE / ( THETA+THSIGN* + $ SQRT( ONE+THETA*THETA ) ) + CS = SQRT( ONE / ( ONE+T*T ) ) + SN = T*CS + MXSINJ = MAX( MXSINJ, ABS( SN ) ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE+T*APOAQ*AAPQ1 ) ) + AAPP = AAPP*SQRT( MAX( ZERO, + $ ONE-T*AQOAP*AAPQ1 ) ) +* + CALL ZROT( M, A(1,p), 1, A(1,q), 1, + $ CS, CONJG(OMPQ)*SN ) + IF( RSVEC ) THEN + CALL ZROT( MVL, V(1,p), 1, + $ V(1,q), 1, CS, CONJG(OMPQ)*SN ) + END IF + END IF + D(p) = -D(q) * OMPQ +* + ELSE +* .. have to use modified Gram-Schmidt like transformation + IF( AAPP.GT.AAQQ ) THEN + CALL ZCOPY( M, A( 1, p ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + CALL ZAXPY( M, -AAPQ, WORK, + $ 1, A( 1, q ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, + $ M, 1, A( 1, q ), LDA, + $ IERR ) + SVA( q ) = AAQQ*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + ELSE + CALL ZCOPY( M, A( 1, q ), 1, + $ WORK, 1 ) + CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, + $ M, 1, WORK,LDA, + $ IERR ) + CALL ZLASCL( 'G', 0, 0, AAPP, ONE, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + CALL ZAXPY( M, -CONJG(AAPQ), + $ WORK, 1, A( 1, p ), 1 ) + CALL ZLASCL( 'G', 0, 0, ONE, AAPP, + $ M, 1, A( 1, p ), LDA, + $ IERR ) + SVA( p ) = AAPP*SQRT( MAX( ZERO, + $ ONE-AAPQ1*AAPQ1 ) ) + MXSINJ = MAX( MXSINJ, SFMIN ) + END IF + END IF +* END IF ROTOK THEN ... ELSE +* +* In the case of cancellation in updating SVA(q), SVA(p) +* .. recompute SVA(q), SVA(p) + IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS ) + $ THEN + IF( ( AAQQ.LT.ROOTBIG ) .AND. + $ ( AAQQ.GT.ROOTSFMIN ) ) THEN + SVA( q ) = DZNRM2( M, A( 1, q ), 1) + ELSE + T = ZERO + AAQQ = ONE + CALL ZLASSQ( M, A( 1, q ), 1, T, + $ AAQQ ) + SVA( q ) = T*SQRT( AAQQ ) + END IF + END IF + IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN + IF( ( AAPP.LT.ROOTBIG ) .AND. + $ ( AAPP.GT.ROOTSFMIN ) ) THEN + AAPP = DZNRM2( M, A( 1, p ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, p ), 1, T, + $ AAPP ) + AAPP = T*SQRT( AAPP ) + END IF + SVA( p ) = AAPP + END IF +* end of OK rotation + ELSE + NOTROT = NOTROT + 1 +*[RTD] SKIPPED = SKIPPED + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF + ELSE + NOTROT = NOTROT + 1 + PSKIPPED = PSKIPPED + 1 + IJBLSK = IJBLSK + 1 + END IF +* + IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) ) + $ THEN + SVA( p ) = AAPP + NOTROT = 0 + GO TO 2011 + END IF + IF( ( i.LE.SWBAND ) .AND. + $ ( PSKIPPED.GT.ROWSKIP ) ) THEN + AAPP = -AAPP + NOTROT = 0 + GO TO 2203 + END IF +* + 2200 CONTINUE +* end of the q-loop + 2203 CONTINUE +* + SVA( p ) = AAPP +* + ELSE +* + IF( AAPP.EQ.ZERO )NOTROT = NOTROT + + $ MIN( jgl+KBL-1, N ) - jgl + 1 + IF( AAPP.LT.ZERO )NOTROT = 0 +* + END IF +* + 2100 CONTINUE +* end of the p-loop + 2010 CONTINUE +* end of the jbc-loop + 2011 CONTINUE +*2011 bailed out of the jbc-loop + DO 2012 p = igl, MIN( igl+KBL-1, N ) + SVA( p ) = ABS( SVA( p ) ) + 2012 CONTINUE +*** + 2000 CONTINUE +*2000 :: end of the ibr-loop +* +* .. update SVA(N) + IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) ) + $ THEN + SVA( N ) = DZNRM2( M, A( 1, N ), 1 ) + ELSE + T = ZERO + AAPP = ONE + CALL ZLASSQ( M, A( 1, N ), 1, T, AAPP ) + SVA( N ) = T*SQRT( AAPP ) + END IF +* +* Additional steering devices +* + IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR. + $ ( ISWROT.LE.N ) ) )SWBAND = i +* + IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( DBLE( N ) )* + $ TOL ) .AND. ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN + GO TO 1994 + END IF +* + IF( NOTROT.GE.EMPTSW )GO TO 1994 +* + 1993 CONTINUE +* end i=1:NSWEEP loop +* +* #:( Reaching this point means that the procedure has not converged. + INFO = NSWEEP - 1 + GO TO 1995 +* + 1994 CONTINUE +* #:) Reaching this point means numerical convergence after the i-th +* sweep. +* + INFO = 0 +* #:) INFO = 0 confirms successful iterations. + 1995 CONTINUE +* +* Sort the vector SVA() of column norms. + DO 5991 p = 1, N - 1 + q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1 + IF( p.NE.q ) THEN + TEMP1 = SVA( p ) + SVA( p ) = SVA( q ) + SVA( q ) = TEMP1 + AAPQ = D( p ) + D( p ) = D( q ) + D( q ) = AAPQ + CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 ) + IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 ) + END IF + 5991 CONTINUE +* +* + RETURN +* .. +* .. END OF ZGSVJ1 +* .. + END diff --git a/dspl/liblapack/SRC/zgtcon.f b/dspl/liblapack/SRC/zgtcon.f new file mode 100644 index 0000000..efa6a2c --- /dev/null +++ b/dspl/liblapack/SRC/zgtcon.f @@ -0,0 +1,253 @@ +*> \brief \b ZGTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTCON estimates the reciprocal of the condition number of a complex +*> tridiagonal matrix A using the LU factorization as computed by +*> ZGTTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by ZGTTRF. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> If NORM = '1' or 'O', the 1-norm of the original matrix A. +*> If NORM = 'I', the infinity-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTcomputational +* +* ===================================================================== + SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ONENRM + INTEGER I, KASE, KASE1 + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGTTRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is non-zero. +* + DO 10 I = 1, N + IF( D( I ).EQ.DCMPLX( ZERO ) ) + $ RETURN + 10 CONTINUE +* + AINVNM = ZERO + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 20 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(U)*inv(L). +* + CALL ZGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV, + $ WORK, N, INFO ) + ELSE +* +* Multiply by inv(L**H)*inv(U**H). +* + CALL ZGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2, + $ IPIV, WORK, N, INFO ) + END IF + GO TO 20 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZGTCON +* + END diff --git a/dspl/liblapack/SRC/zgtrfs.f b/dspl/liblapack/SRC/zgtrfs.f new file mode 100644 index 0000000..fedaaee --- /dev/null +++ b/dspl/liblapack/SRC/zgtrfs.f @@ -0,0 +1,487 @@ +*> \brief \b ZGTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, +* IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), +* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is tridiagonal, and provides +*> error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] DLF +*> \verbatim +*> DLF is COMPLEX*16 array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A as computed by ZGTTRF. +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is COMPLEX*16 array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DUF +*> \verbatim +*> DUF is COMPLEX*16 array, dimension (N-1) +*> The (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> The (n-2) elements of the second superdiagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZGTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTcomputational +* +* ===================================================================== + SUBROUTINE ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, + $ IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + CHARACTER TRANSN, TRANST + INTEGER COUNT, I, J, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZGTTRS, ZLACN2, ZLAGTM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 110 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE, + $ WORK, N ) +* +* Compute abs(op(A))*abs(x) + abs(b) for use in the backward +* error bound. +* + IF( NOTRAN ) THEN + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DU( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DU( I ) )*CABS1( X( I+1, J ) ) + 30 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + ELSE + IF( N.EQ.1 ) THEN + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + ELSE + RWORK( 1 ) = CABS1( B( 1, J ) ) + + $ CABS1( D( 1 ) )*CABS1( X( 1, J ) ) + + $ CABS1( DL( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + RWORK( I ) = CABS1( B( I, J ) ) + + $ CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( D( I ) )*CABS1( X( I, J ) ) + + $ CABS1( DL( I ) )*CABS1( X( I+1, J ) ) + 40 CONTINUE + RWORK( N ) = CABS1( B( N, J ) ) + + $ CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) + + $ CABS1( D( N ) )*CABS1( X( N, J ) ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N, + $ INFO ) + CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE +* + KASE = 0 + 70 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + DO 80 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 80 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 90 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 90 CONTINUE + CALL ZGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, + $ N, INFO ) + END IF + GO TO 70 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 100 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 100 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 110 CONTINUE +* + RETURN +* +* End of ZGTRFS +* + END diff --git a/dspl/liblapack/SRC/zgtsv.f b/dspl/liblapack/SRC/zgtsv.f new file mode 100644 index 0000000..96ef9db --- /dev/null +++ b/dspl/liblapack/SRC/zgtsv.f @@ -0,0 +1,244 @@ +*> \brief ZGTSV computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTSV solves the equation +*> +*> A*X = B, +*> +*> where A is an N-by-N tridiagonal matrix, by Gaussian elimination with +*> partial pivoting. +*> +*> Note that the equation A**T *X = B may be solved by interchanging the +*> order of the arguments DU and DL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> On entry, DL must contain the (n-1) subdiagonal elements of +*> A. +*> On exit, DL is overwritten by the (n-2) elements of the +*> second superdiagonal of the upper triangular matrix U from +*> the LU factorization of A, in DL(1), ..., DL(n-2). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> On exit, D is overwritten by the n diagonal elements of U. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> On entry, DU must contain the (n-1) superdiagonal elements +*> of A. +*> On exit, DU is overwritten by the (n-1) elements of the first +*> superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero, and the solution +*> has not been computed. The factorization has not been +*> completed unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTsolve +* +* ===================================================================== + SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J, K + COMPLEX*16 MULT, TEMP, ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTSV ', -INFO ) + RETURN + END IF +* + IF( N.EQ.0 ) + $ RETURN +* + DO 30 K = 1, N - 1 + IF( DL( K ).EQ.ZERO ) THEN +* +* Subdiagonal is zero, no elimination is required. +* + IF( D( K ).EQ.ZERO ) THEN +* +* Diagonal is zero: set INFO = K and return; a unique +* solution can not be found. +* + INFO = K + RETURN + END IF + ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN +* +* No row interchange required +* + MULT = DL( K ) / D( K ) + D( K+1 ) = D( K+1 ) - MULT*DU( K ) + DO 10 J = 1, NRHS + B( K+1, J ) = B( K+1, J ) - MULT*B( K, J ) + 10 CONTINUE + IF( K.LT.( N-1 ) ) + $ DL( K ) = ZERO + ELSE +* +* Interchange rows K and K+1 +* + MULT = D( K ) / DL( K ) + D( K ) = DL( K ) + TEMP = D( K+1 ) + D( K+1 ) = DU( K ) - MULT*TEMP + IF( K.LT.( N-1 ) ) THEN + DL( K ) = DU( K+1 ) + DU( K+1 ) = -MULT*DL( K ) + END IF + DU( K ) = TEMP + DO 20 J = 1, NRHS + TEMP = B( K, J ) + B( K, J ) = B( K+1, J ) + B( K+1, J ) = TEMP - MULT*B( K+1, J ) + 20 CONTINUE + END IF + 30 CONTINUE + IF( D( N ).EQ.ZERO ) THEN + INFO = N + RETURN + END IF +* +* Back solve with the matrix U from the factorization. +* + DO 50 J = 1, NRHS + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 ) + DO 40 K = N - 2, 1, -1 + B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )* + $ B( K+2, J ) ) / D( K ) + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of ZGTSV +* + END diff --git a/dspl/liblapack/SRC/zgtsvx.f b/dspl/liblapack/SRC/zgtsvx.f new file mode 100644 index 0000000..0b24552 --- /dev/null +++ b/dspl/liblapack/SRC/zgtsvx.f @@ -0,0 +1,416 @@ +*> \brief ZGTSVX computes the solution to system of linear equations A * X = B for GT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, +* DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, TRANS +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), +* $ DLF( * ), DU( * ), DU2( * ), DUF( * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTSVX uses the LU factorization to compute the solution to a complex +*> system of linear equations A * X = B, A**T * X = B, or A**H * X = B, +*> where A is a tridiagonal matrix of order N and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the LU decomposition is used to factor the matrix A +*> as A = L * U, where L is a product of permutation and unit lower +*> bidiagonal matrices and U is upper triangular with nonzeros in +*> only the main diagonal and first two superdiagonals. +*> +*> 2. If some U(i,i)=0, so that U is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': DLF, DF, DUF, DU2, and IPIV contain the factored form +*> of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not +*> be modified. +*> = 'N': The matrix will be copied to DLF, DF, and DUF +*> and factored. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) subdiagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The n diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) superdiagonal elements of A. +*> \endverbatim +*> +*> \param[in,out] DLF +*> \verbatim +*> DLF is COMPLEX*16 array, dimension (N-1) +*> If FACT = 'F', then DLF is an input argument and on entry +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A as computed by ZGTTRF. +*> +*> If FACT = 'N', then DLF is an output argument and on exit +*> contains the (n-1) multipliers that define the matrix L from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is COMPLEX*16 array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the upper triangular +*> matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DUF +*> \verbatim +*> DUF is COMPLEX*16 array, dimension (N-1) +*> If FACT = 'F', then DUF is an input argument and on entry +*> contains the (n-1) elements of the first superdiagonal of U. +*> +*> If FACT = 'N', then DUF is an output argument and on exit +*> contains the (n-1) elements of the first superdiagonal of U. +*> \endverbatim +*> +*> \param[in,out] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> If FACT = 'F', then DU2 is an input argument and on entry +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> +*> If FACT = 'N', then DU2 is an output argument and on exit +*> contains the (n-2) elements of the second superdiagonal of +*> U. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains the pivot indices from the LU factorization of A as +*> computed by ZGTTRF. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains the pivot indices from the LU factorization of A; +*> row i of the matrix was interchanged with row IPIV(i). +*> IPIV(i) will always be either i or i+1; IPIV(i) = i indicates +*> a row interchange was not required. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: U(i,i) is exactly zero. The factorization +*> has not been completed unless i = N, but the +*> factor U is exactly singular, so the solution +*> and error bounds could not be computed. +*> RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTsolve +* +* ===================================================================== + SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, + $ DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT, TRANS + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), D( * ), DF( * ), DL( * ), + $ DLF( * ), DU( * ), DU2( * ), DUF( * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT, NOTRAN + CHARACTER NORM + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGT + EXTERNAL LSAME, DLAMCH, ZLANGT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGTCON, ZGTRFS, ZGTTRF, ZGTTRS, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the LU factorization of A. +* + CALL ZCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) THEN + CALL ZCOPY( N-1, DL, 1, DLF, 1 ) + CALL ZCOPY( N-1, DU, 1, DUF, 1 ) + END IF + CALL ZGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + IF( NOTRAN ) THEN + NORM = '1' + ELSE + NORM = 'I' + END IF + ANORM = ZLANGT( NORM, N, DL, D, DU ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX, + $ INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV, + $ B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZGTSVX +* + END diff --git a/dspl/liblapack/SRC/zgttrf.f b/dspl/liblapack/SRC/zgttrf.f new file mode 100644 index 0000000..0d4c48d --- /dev/null +++ b/dspl/liblapack/SRC/zgttrf.f @@ -0,0 +1,243 @@ +*> \brief \b ZGTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTTRF computes an LU factorization of a complex tridiagonal matrix A +*> using elimination with partial pivoting and row interchanges. +*> +*> The factorization has the form +*> A = L * U +*> where L is a product of permutation and unit lower bidiagonal +*> matrices and U is upper triangular with nonzeros in only the main +*> diagonal and first two superdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in,out] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> On entry, DL must contain the (n-1) sub-diagonal elements of +*> A. +*> +*> On exit, DL is overwritten by the (n-1) multipliers that +*> define the matrix L from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> On entry, D must contain the diagonal elements of A. +*> +*> On exit, D is overwritten by the n diagonal elements of the +*> upper triangular matrix U from the LU factorization of A. +*> \endverbatim +*> +*> \param[in,out] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> On entry, DU must contain the (n-1) super-diagonal elements +*> of A. +*> +*> On exit, DU is overwritten by the (n-1) elements of the first +*> super-diagonal of U. +*> \endverbatim +*> +*> \param[out] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> On exit, DU2 is overwritten by the (n-2) elements of the +*> second super-diagonal of U. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, U(k,k) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, and division by zero will occur if it is used +*> to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTcomputational +* +* ===================================================================== + SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 FACT, TEMP, ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'ZGTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize IPIV(i) = i and DU2(i) = 0 +* + DO 10 I = 1, N + IPIV( I ) = I + 10 CONTINUE + DO 20 I = 1, N - 2 + DU2( I ) = ZERO + 20 CONTINUE +* + DO 30 I = 1, N - 2 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN +* +* No row interchange required, eliminate DL(I) +* + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE +* +* Interchange rows I and I+1, eliminate DL(I) +* + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + DU2( I ) = DU( I+1 ) + DU( I+1 ) = -FACT*DU( I+1 ) + IPIV( I ) = I + 1 + END IF + 30 CONTINUE + IF( N.GT.1 ) THEN + I = N - 1 + IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN + IF( CABS1( D( I ) ).NE.ZERO ) THEN + FACT = DL( I ) / D( I ) + DL( I ) = FACT + D( I+1 ) = D( I+1 ) - FACT*DU( I ) + END IF + ELSE + FACT = D( I ) / DL( I ) + D( I ) = DL( I ) + DL( I ) = FACT + TEMP = DU( I ) + DU( I ) = D( I+1 ) + D( I+1 ) = TEMP - FACT*D( I+1 ) + IPIV( I ) = I + 1 + END IF + END IF +* +* Check for a zero on the diagonal of U. +* + DO 40 I = 1, N + IF( CABS1( D( I ) ).EQ.ZERO ) THEN + INFO = I + GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE +* + RETURN +* +* End of ZGTTRF +* + END diff --git a/dspl/liblapack/SRC/zgttrs.f b/dspl/liblapack/SRC/zgttrs.f new file mode 100644 index 0000000..f37c35a --- /dev/null +++ b/dspl/liblapack/SRC/zgttrs.f @@ -0,0 +1,225 @@ +*> \brief \b ZGTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTTRS solves one of the systems of equations +*> A * X = B, A**T * X = B, or A**H * X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by ZGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations. +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTcomputational +* +* ===================================================================== + SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER ITRANS, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGTTS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' ) + IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ. + $ 't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZGTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Decode TRANS +* + IF( NOTRAN ) THEN + ITRANS = 0 + ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN + ITRANS = 1 + ELSE + ITRANS = 2 + END IF +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'ZGTTRS', TRANS, N, NRHS, -1, -1 ) ) + END IF +* + IF( NB.GE.NRHS ) THEN + CALL ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL ZGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ), + $ LDB ) + 10 CONTINUE + END IF +* +* End of ZGTTRS +* + END diff --git a/dspl/liblapack/SRC/zgtts2.f b/dspl/liblapack/SRC/zgtts2.f new file mode 100644 index 0000000..c8c1797 --- /dev/null +++ b/dspl/liblapack/SRC/zgtts2.f @@ -0,0 +1,349 @@ +*> \brief \b ZGTTS2 solves a system of linear equations with a tridiagonal matrix using the LU factorization computed by sgttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZGTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGTTS2 solves one of the systems of equations +*> A * X = B, A**T * X = B, or A**H * X = B, +*> with a tridiagonal matrix A using the LU factorization computed +*> by ZGTTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITRANS +*> \verbatim +*> ITRANS is INTEGER +*> Specifies the form of the system of equations. +*> = 0: A * X = B (No transpose) +*> = 1: A**T * X = B (Transpose) +*> = 2: A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) multipliers that define the matrix L from the +*> LU factorization of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The n diagonal elements of the upper triangular matrix U from +*> the LU factorization of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) elements of the first super-diagonal of U. +*> \endverbatim +*> +*> \param[in] DU2 +*> \verbatim +*> DU2 is COMPLEX*16 array, dimension (N-2) +*> The (n-2) elements of the second super-diagonal of U. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices; for 1 <= i <= n, row i of the matrix was +*> interchanged with row IPIV(i). IPIV(i) will always be either +*> i or i+1; IPIV(i) = i indicates a row interchange was not +*> required. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the matrix of right hand side vectors B. +*> On exit, B is overwritten by the solution vectors X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GTcomputational +* +* ===================================================================== + SUBROUTINE ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER ITRANS, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + COMPLEX*16 TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( ITRANS.EQ.0 ) THEN +* +* Solve A*X = B using the LU factorization of A, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 10 CONTINUE +* +* Solve L*x = b. +* + DO 20 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 20 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 30 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 30 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 60 J = 1, NRHS +* +* Solve L*x = b. +* + DO 40 I = 1, N - 1 + IF( IPIV( I ).EQ.I ) THEN + B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J ) + ELSE + TEMP = B( I, J ) + B( I, J ) = B( I+1, J ) + B( I+1, J ) = TEMP - DL( I )*B( I, J ) + END IF + 40 CONTINUE +* +* Solve U*x = b. +* + B( N, J ) = B( N, J ) / D( N ) + IF( N.GT.1 ) + $ B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / + $ D( N-1 ) + DO 50 I = N - 2, 1, -1 + B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )* + $ B( I+2, J ) ) / D( I ) + 50 CONTINUE + 60 CONTINUE + END IF + ELSE IF( ITRANS.EQ.1 ) THEN +* +* Solve A**T * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 70 CONTINUE +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 80 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )* + $ B( I-2, J ) ) / D( I ) + 80 CONTINUE +* +* Solve L**T * x = b. +* + DO 90 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 90 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 70 + END IF + ELSE + DO 120 J = 1, NRHS +* +* Solve U**T * x = b. +* + B( 1, J ) = B( 1, J ) / D( 1 ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 ) + DO 100 I = 3, N + B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )- + $ DU2( I-2 )*B( I-2, J ) ) / D( I ) + 100 CONTINUE +* +* Solve L**T * x = b. +* + DO 110 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DL( I )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DL( I )*TEMP + B( I, J ) = TEMP + END IF + 110 CONTINUE + 120 CONTINUE + END IF + ELSE +* +* Solve A**H * X = B. +* + IF( NRHS.LE.1 ) THEN + J = 1 + 130 CONTINUE +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) / + $ DCONJG( D( 2 ) ) + DO 140 I = 3, N + B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )*B( I-1, J )- + $ DCONJG( DU2( I-2 ) )*B( I-2, J ) ) / + $ DCONJG( D( I ) ) + 140 CONTINUE +* +* Solve L**H * x = b. +* + DO 150 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DCONJG( DL( I ) )*B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 150 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 130 + END IF + ELSE + DO 180 J = 1, NRHS +* +* Solve U**H * x = b. +* + B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) ) + IF( N.GT.1 ) + $ B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) + $ / DCONJG( D( 2 ) ) + DO 160 I = 3, N + B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )* + $ B( I-1, J )-DCONJG( DU2( I-2 ) )* + $ B( I-2, J ) ) / DCONJG( D( I ) ) + 160 CONTINUE +* +* Solve L**H * x = b. +* + DO 170 I = N - 1, 1, -1 + IF( IPIV( I ).EQ.I ) THEN + B( I, J ) = B( I, J ) - DCONJG( DL( I ) )* + $ B( I+1, J ) + ELSE + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP + B( I, J ) = TEMP + END IF + 170 CONTINUE + 180 CONTINUE + END IF + END IF +* +* End of ZGTTS2 +* + END diff --git a/dspl/liblapack/SRC/zhb2st_kernels.f b/dspl/liblapack/SRC/zhb2st_kernels.f new file mode 100644 index 0000000..a440b5c --- /dev/null +++ b/dspl/liblapack/SRC/zhb2st_kernels.f @@ -0,0 +1,377 @@ +*> \brief \b ZHB2ST_KERNELS +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHB2ST_KERNELS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, +* ST, ED, SWEEP, N, NB, IB, +* A, LDA, V, TAU, LDVT, WORK) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL WANTZ +* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), V( * ), +* TAU( * ), WORK( * ) +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST +*> subroutine. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both +*> Eigenvalue/Eigenvectors. +*> \endverbatim +*> +*> \param[in] TTYPE +*> \verbatim +*> TTYPE is INTEGER +*> \endverbatim +*> +*> \param[in] ST +*> \verbatim +*> ST is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] ED +*> \verbatim +*> ED is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] SWEEP +*> \verbatim +*> SWEEP is INTEGER +*> internal parameter for indices. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER. The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER. The size of the band. +*> \endverbatim +*> +*> \param[in] IB +*> \verbatim +*> IB is INTEGER. +*> \endverbatim +*> +*> \param[in, out] A +*> \verbatim +*> A is COMPLEX*16 array. A pointer to the matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER. The leading dimension of the matrix A. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension 2*n if eigenvalues only are +*> requested or to be queried for vectors. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (2*n). +*> The scalar factors of the Householder reflectors are stored +*> in this array. +*> \endverbatim +*> +*> \param[in] LDVT +*> \verbatim +*> LDVT is INTEGER. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. Workspace of size nb. +*> \endverbatim +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, + $ ST, ED, SWEEP, N, NB, IB, + $ A, LDA, V, TAU, LDVT, WORK) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL WANTZ + INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), V( * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, + $ DPOS, OFDPOS, AJETER + COMPLEX*16 CTMP +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZLARFX, ZLARFY +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MOD +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. +* .. Executable Statements .. +* + AJETER = IB + LDVT + UPPER = LSAME( UPLO, 'U' ) + + IF( UPPER ) THEN + DPOS = 2 * NB + 1 + OFDPOS = 2 * NB + ELSE + DPOS = 1 + OFDPOS = 2 + ENDIF + +* +* Upper case +* + IF( UPPER ) THEN +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 10 I = 1, LM-1 + V( VPOS+I ) = DCONJG( A( OFDPOS-I, ST+I ) ) + A( OFDPOS-I, ST+I ) = ZERO + 10 CONTINUE + CTMP = DCONJG( A( OFDPOS, ST ) ) + CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) + A( OFDPOS, ST ) = CTMP +* + LM = ED - ST + 1 + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.3 ) THEN +* + LM = ED - ST + 1 + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 + IF( LM.GT.0) THEN + CALL ZLARFX( 'Left', LN, LM, V( VPOS ), + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS-NB, J1 ), LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 30 I = 1, LM-1 + V( VPOS+I ) = + $ DCONJG( A( DPOS-NB-I, J1+I ) ) + A( DPOS-NB-I, J1+I ) = ZERO + 30 CONTINUE + CTMP = DCONJG( A( DPOS-NB, J1 ) ) + CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) + A( DPOS-NB, J1 ) = CTMP +* + CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ), + $ TAU( TAUPOS ), + $ A( DPOS-NB+1, J1 ), LDA-1, WORK) + ENDIF + ENDIF +* +* Lower case +* + ELSE +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + ST + TAUPOS = MOD( SWEEP-1, 2 ) * N + ST + ENDIF +* + IF( TTYPE.EQ.1 ) THEN + LM = ED - ST + 1 +* + V( VPOS ) = ONE + DO 20 I = 1, LM-1 + V( VPOS+I ) = A( OFDPOS+I, ST-1 ) + A( OFDPOS+I, ST-1 ) = ZERO + 20 CONTINUE + CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + LM = ED - ST + 1 +* + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.3 ) THEN + LM = ED - ST + 1 +* + CALL ZLARFY( UPLO, LM, V( VPOS ), 1, + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS, ST ), LDA-1, WORK) + + ENDIF +* + IF( TTYPE.EQ.2 ) THEN + J1 = ED+1 + J2 = MIN( ED+NB, N ) + LN = ED-ST+1 + LM = J2-J1+1 +* + IF( LM.GT.0) THEN + CALL ZLARFX( 'Right', LM, LN, V( VPOS ), + $ TAU( TAUPOS ), A( DPOS+NB, ST ), + $ LDA-1, WORK) +* + IF( WANTZ ) THEN + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ELSE + VPOS = MOD( SWEEP-1, 2 ) * N + J1 + TAUPOS = MOD( SWEEP-1, 2 ) * N + J1 + ENDIF +* + V( VPOS ) = ONE + DO 40 I = 1, LM-1 + V( VPOS+I ) = A( DPOS+NB+I, ST ) + A( DPOS+NB+I, ST ) = ZERO + 40 CONTINUE + CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, + $ TAU( TAUPOS ) ) +* + CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ), + $ DCONJG( TAU( TAUPOS ) ), + $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) + + ENDIF + ENDIF + ENDIF +* + RETURN +* +* END OF ZHB2ST_KERNELS +* + END diff --git a/dspl/liblapack/SRC/zhbev.f b/dspl/liblapack/SRC/zhbev.f new file mode 100644 index 0000000..964e684 --- /dev/null +++ b/dspl/liblapack/SRC/zhbev.f @@ -0,0 +1,294 @@ +*> \brief ZHBEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEV computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHBTRD, ZLASCL, ZSTEQR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = AB( 1, 1 ) + ELSE + W( 1 ) = AB( KD+1, 1 ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of ZHBEV +* + END diff --git a/dspl/liblapack/SRC/zhbev_2stage.f b/dspl/liblapack/SRC/zhbev_2stage.f new file mode 100644 index 0000000..583e55c --- /dev/null +++ b/dspl/liblapack/SRC/zhbev_2stage.f @@ -0,0 +1,389 @@ +*> \brief ZHBEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEV_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1,3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEV_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, N, LWORK +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, WANTZ, LQUERY + INTEGER IINFO, IMAX, INDE, INDWRK, INDRWK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR, + $ ZHETRD_2STAGE, ZHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -11 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( LOWER ) THEN + W( 1 ) = DBLE( AB( 1, 1 ) ) + ELSE + W( 1 ) = DBLE( AB( KD+1, 1 ) ) + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHBEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhbevd.f b/dspl/liblapack/SRC/zhbevd.f new file mode 100644 index 0000000..70378b1 --- /dev/null +++ b/dspl/liblapack/SRC/zhbevd.f @@ -0,0 +1,401 @@ +*> \brief ZHBEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZHBTRD, ZLACPY, + $ ZLASCL, ZSTEDC +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AB( 1, 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDWRK + 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBEVD +* + END diff --git a/dspl/liblapack/SRC/zhbevd_2stage.f b/dspl/liblapack/SRC/zhbevd_2stage.f new file mode 100644 index 0000000..be493a1 --- /dev/null +++ b/dspl/liblapack/SRC/zhbevd_2stage.f @@ -0,0 +1,458 @@ +*> \brief ZHBEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVD_2STAGE computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it +*> uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the first +*> superdiagonal and the diagonal of the tridiagonal matrix T +*> are returned in rows KD and KD+1 of AB, and if UPLO = 'L', +*> the diagonal and first subdiagonal of T are returned in the +*> first two rows of AB. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N . +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEVD_2STAGE( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDWK2, INDRWK, ISCALE, + $ LLWORK, INDWK, LHTRD, LWTRD, IB, INDHOUS, + $ LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZGEMM, ZLACPY, + $ ZLASCL, ZSTEDC, ZHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = MAX( N, LHTRD + LWTRD ) + LRWMIN = N + LIWMIN = 1 + END IF + END IF + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( AB( 1, 1 ) ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDHOUS = 1 + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + INDWK2 = INDWK + N*N + LLWK2 = LWORK - INDWK2 + 1 +* + CALL ZHETRD_HB2ST( "N", JOBZ, UPLO, N, KD, AB, LDAB, W, + $ RWORK( INDE ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhbevx.f b/dspl/liblapack/SRC/zhbevx.f new file mode 100644 index 0000000..3966cfa --- /dev/null +++ b/dspl/liblapack/SRC/zhbevx.f @@ -0,0 +1,553 @@ +*> \brief ZHBEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, +* VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL, + $ VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX*16 CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY, + $ ZGEMV, ZHBTRD, ZLACPY, ZLASCL, ZSTEIN, ZSTEQR, + $ ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = DBLE( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = CTMP1 + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of ZHBEVX +* + END diff --git a/dspl/liblapack/SRC/zhbevx_2stage.f b/dspl/liblapack/SRC/zhbevx_2stage.f new file mode 100644 index 0000000..9b1f5fe --- /dev/null +++ b/dspl/liblapack/SRC/zhbevx_2stage.f @@ -0,0 +1,649 @@ +*> \brief ZHBEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, +* Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, +* Z, LDZ, WORK, LWORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian band matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors +*> can be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, AB is overwritten by values generated during the +*> reduction to tridiagonal form. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD + 1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> If JOBZ = 'V', the N-by-N unitary matrix used in the +*> reduction to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'V', then +*> LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AB to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the size of the band. +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHBEVX_2STAGE( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, + $ Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, + $ Z, LDZ, WORK, LWORK, RWORK, IWORK, + $ IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N, LWORK + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, TEST, VALEIG, WANTZ, + $ LQUERY + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1, + $ LLWORK, LWMIN, LHTRD, LWTRD, IB, INDHOUS, + $ J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU + COMPLEX*16 CTMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY, + $ ZGEMV, ZLACPY, ZLASCL, ZSTEIN, ZSTEQR, + $ ZSWAP, ZHETRD_HB2ST +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + IB = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = LHTRD + LWTRD + WORK( 1 ) = LWMIN + ENDIF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -20 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + M = 1 + IF( LOWER ) THEN + CTMP1 = AB( 1, 1 ) + ELSE + CTMP1 = AB( KD+1, 1 ) + END IF + TMP1 = DBLE( CTMP1 ) + IF( VALEIG ) THEN + IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) ) + $ M = 0 + END IF + IF( M.EQ.1 ) THEN + W( 1 ) = DBLE( CTMP1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + ELSE + CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO ) + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHBTRD_HB2ST to reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N +* + INDHOUS = 1 + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_HB2ST( 'N', JOBZ, UPLO, N, KD, AB, LDAB, + $ RWORK( INDD ), RWORK( INDE ), WORK( INDHOUS ), + $ LHTRD, WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHBEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhbgst.f b/dspl/liblapack/SRC/zhbgst.f new file mode 100644 index 0000000..cb1c43b --- /dev/null +++ b/dspl/liblapack/SRC/zhbgst.f @@ -0,0 +1,1470 @@ +*> \brief \b ZHBGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, +* LDX, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBGST reduces a complex Hermitian-definite banded generalized +*> eigenproblem A*x = lambda*B*x to standard form C*y = lambda*y, +*> such that C has the same bandwidth as A. +*> +*> B must have been previously factorized as S**H*S by ZPBSTF, using a +*> split Cholesky factorization. A is overwritten by C = X**H*A*X, where +*> X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the +*> bandwidth of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form the transformation matrix X; +*> = 'V': form X. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the transformed matrix X**H*A*X, stored in the same +*> format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in] BB +*> \verbatim +*> BB is COMPLEX*16 array, dimension (LDBB,N) +*> The banded factor S from the split Cholesky factorization of +*> B, as returned by ZPBSTF, stored in the first kb+1 rows of +*> the array. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,N) +*> If VECT = 'V', the n-by-n matrix X. +*> If VECT = 'N', the array X is not referenced. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. +*> LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X, + $ LDX, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KA, KB, LDAB, LDBB, LDX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + DOUBLE PRECISION ONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ), ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPDATE, UPPER, WANTX + INTEGER I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K, + $ KA1, KB1, KBT, L, M, NR, NRT, NX + DOUBLE PRECISION BII + COMPLEX*16 RA, RA1, T +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGERC, ZGERU, ZLACGV, ZLAR2V, + $ ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + WANTX = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + KA1 = KA + 1 + KB1 = KB + 1 + INFO = 0 + IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + INCA = LDAB*KA1 +* +* Initialize X to the unit matrix, if needed +* + IF( WANTX ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, X, LDX ) +* +* Set M to the splitting point m. It must be the same value as is +* used in ZPBSTF. The chosen value allows the arrays WORK and RWORK +* to be of dimension (N). +* + M = ( N+KB ) / 2 +* +* The routine works in two phases, corresponding to the two halves +* of the split Cholesky factorization of B as S**H*S where +* +* S = ( U ) +* ( M L ) +* +* with U upper triangular of order m, and L lower triangular of +* order n-m. S has the same bandwidth as B. +* +* S is treated as a product of elementary matrices: +* +* S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n) +* +* where S(i) is determined by the i-th row of S. +* +* In phase 1, the index i takes the values n, n-1, ... , m+1; +* in phase 2, it takes the values 1, 2, ... , m. +* +* For each value of i, the current matrix A is updated by forming +* inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside +* the band of A. The bulge is then pushed down toward the bottom of +* A in phase 1, and up toward the top of A in phase 2, by applying +* plane rotations. +* +* There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1 +* of them are linearly independent, so annihilating a bulge requires +* only 2*kb-1 plane rotations. The rotations are divided into a 1st +* set of kb-1 rotations, and a 2nd set of kb rotations. +* +* Wherever possible, rotations are generated and applied in vector +* operations of length NR between the indices J1 and J2 (sometimes +* replaced by modified values NRT, J1T or J2T). +* +* The real cosines and complex sines of the rotations are stored in +* the arrays RWORK and WORK, those of the 1st set in elements +* 2:m-kb-1, and those of the 2nd set in elements m-kb+1:n. +* +* The bulges are not formed explicitly; nonzero elements outside the +* band are created only when they are required for generating new +* rotations; they are stored in the array WORK, in positions where +* they are later overwritten by the sines of the rotations which +* annihilate them. +* +* **************************** Phase 1 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = N, M + 1, -1 +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions downward +* END DO +* UPDATE = .FALSE. +* DO I = M + KA + 1, N - 1 +* apply rotations to push all bulges KA positions downward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = N + 1 + 10 CONTINUE + IF( UPDATE ) THEN + I = I - 1 + KBT = MIN( KB, I-1 ) + I0 = I - 1 + I1 = MIN( N, I+KA ) + I2 = I - KBT + KA1 + IF( I.LT.M+1 ) THEN + UPDATE = .FALSE. + I = I + 1 + I0 = M + IF( KA.EQ.0 ) + $ GO TO 480 + GO TO 10 + END IF + ELSE + I = I + KA + IF( I.GT.N-1 ) + $ GO TO 480 + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( KB1, I ) ) + AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII + DO 20 J = I + 1, I1 + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 20 CONTINUE + DO 30 J = MAX( 1, I-KA ), I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 30 CONTINUE + DO 60 K = I - KBT, I - 1 + DO 40 J = I - KBT, K + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( J-I+KB1, I )* + $ DCONJG( AB( K-I+KA1, I ) ) - + $ DCONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + + $ DBLE( AB( KA1, I ) )* + $ BB( J-I+KB1, I )* + $ DCONJG( BB( K-I+KB1, I ) ) + 40 CONTINUE + DO 50 J = MAX( 1, I-KA ), I - KBT - 1 + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ DCONJG( BB( K-I+KB1, I ) )* + $ AB( J-I+KA1, I ) + 50 CONTINUE + 60 CONTINUE + DO 80 J = I, I1 + DO 70 K = MAX( J-KA, I-KBT ), I - 1 + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( K-I+KB1, I )*AB( I-J+KA1, J ) + 70 CONTINUE + 80 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERC( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), + $ LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+KA1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 130 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i,i-k+ka+1) +* + CALL ZLARTG( AB( K+1, I-K+KA ), RA1, + $ RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k,i-k+ka+1) outside the +* band and store it in WORK(i-k) +* + T = -BB( KB1-K, I )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ DCONJG( WORK( I-K+KA-M ) )* + $ AB( 1, I-K+KA ) + AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( 1, I-K+KA ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 90 J = J2T, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 ) + 90 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1, + $ RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 100 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 100 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 110 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 110 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 120 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), DCONJG( WORK( J-M ) ) ) + 120 CONTINUE + END IF + 130 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt,i-kbt+ka+1) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1 + END IF + END IF +* + DO 170 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 140 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2-L+1 ), INCA, + $ AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ), + $ WORK( J2-KA ), KA1 ) + 140 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 150 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 150 CONTINUE + DO 160 J = J2, J1, KA1 +* +* create nonzero element a(j-ka,j+1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+1 ) + AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 ) + 160 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 170 CONTINUE +* + DO 210 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 180 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA, + $ AB( KA-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 180 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ), + $ AB( KA, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 190 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 190 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 200 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 200 CONTINUE + END IF + 210 CONTINUE +* + DO 230 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 220 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA, + $ AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 220 CONTINUE + 230 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 240 J = N - 1, J2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 240 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( 1, I ) ) + AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII + DO 250 J = I + 1, I1 + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 250 CONTINUE + DO 260 J = MAX( 1, I-KA ), I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 260 CONTINUE + DO 290 K = I - KBT, I - 1 + DO 270 J = I - KBT, K + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( I-J+1, J )*DCONJG( AB( I-K+1, + $ K ) ) - DCONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + DBLE( AB( 1, I ) )* + $ BB( I-J+1, J )*DCONJG( BB( I-K+1, + $ K ) ) + 270 CONTINUE + DO 280 J = MAX( 1, I-KA ), I - KBT - 1 + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ DCONJG( BB( I-K+1, K ) )* + $ AB( I-J+1, J ) + 280 CONTINUE + 290 CONTINUE + DO 310 J = I, I1 + DO 300 K = MAX( J-KA, I-KBT ), I - 1 + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( I-K+1, K )*AB( J-I+1, I ) + 300 CONTINUE + 310 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERU( N-M, KBT, -CONE, X( M+1, I ), 1, + $ BB( KBT+1, I-KBT ), LDBB-1, + $ X( M+1, I-KBT ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions down toward the bottom of the +* band +* + DO 360 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN +* +* generate rotation to annihilate a(i-k+ka+1,i) +* + CALL ZLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ), + $ WORK( I-K+KA-M ), RA ) +* +* create nonzero element a(i-k+ka+1,i-k) outside the +* band and store it in WORK(i-k) +* + T = -BB( K+1, I-K )*RA1 + WORK( I-K ) = RWORK( I-K+KA-M )*T - + $ DCONJG( WORK( I-K+KA-M ) )* + $ AB( KA1, I-K ) + AB( KA1, I-K ) = WORK( I-K+KA-M )*T + + $ RWORK( I-K+KA-M )*AB( KA1, I-K ) + RA1 = RA + END IF + END IF + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MAX( J2, I+2*KA-K+1 ) + ELSE + J2T = J2 + END IF + NRT = ( N-J2T+KA ) / KA1 + DO 320 J = J2T, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j-m) +* + WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 ) + 320 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ), + $ KA1, RWORK( J2T-M ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 330 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 330 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2-M ), WORK( J2-M ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2-M ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 340 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 340 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 350 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J-M ), WORK( J-M ) ) + 350 CONTINUE + END IF + 360 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.LE.N .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i-kbt+ka+1,i-kbt) outside the +* band and store it in WORK(i-kbt) +* + WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1 + END IF + END IF +* + DO 400 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1 + ELSE + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 370 L = KB - K, 1, -1 + NRT = ( N-J2+KA+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA, + $ AB( KA1-L, J2-KA+1 ), INCA, + $ RWORK( J2-KA ), WORK( J2-KA ), KA1 ) + 370 CONTINUE + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + DO 380 J = J1, J2, -KA1 + WORK( J ) = WORK( J-KA ) + RWORK( J ) = RWORK( J-KA ) + 380 CONTINUE + DO 390 J = J2, J1, KA1 +* +* create nonzero element a(j+1,j-ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-KA+1 ) + AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 ) + 390 CONTINUE + IF( UPDATE ) THEN + IF( I-K.LT.N-KA .AND. K.LE.KBT ) + $ WORK( I-K+KA ) = WORK( I-K ) + END IF + 400 CONTINUE +* + DO 440 K = KB, 1, -1 + J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1 + NR = ( N-J2+KA ) / KA1 + J1 = J2 + ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1, + $ RWORK( J2 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 410 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J2-L ), INCA, + $ AB( L+2, J2-L ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 410 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ), + $ INCA, RWORK( J2 ), WORK( J2 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J2 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 420 L = KA - 1, KB - K + 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2 ), + $ WORK( J2 ), KA1 ) + 420 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 430 J = J2, J1, KA1 + CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1, + $ RWORK( J ), WORK( J ) ) + 430 CONTINUE + END IF + 440 CONTINUE +* + DO 460 K = 1, KB - 1 + J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 450 L = KB - K, 1, -1 + NRT = ( N-J2+L ) / KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA, + $ AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ), + $ WORK( J2-M ), KA1 ) + 450 CONTINUE + 460 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 470 J = N - 1, J2 + KA, -1 + RWORK( J-M ) = RWORK( J-KA-M ) + WORK( J-M ) = WORK( J-KA-M ) + 470 CONTINUE + END IF +* + END IF +* + GO TO 10 +* + 480 CONTINUE +* +* **************************** Phase 2 ***************************** +* +* The logical structure of this phase is: +* +* UPDATE = .TRUE. +* DO I = 1, M +* use S(i) to update A and create a new bulge +* apply rotations to push all bulges KA positions upward +* END DO +* UPDATE = .FALSE. +* DO I = M - KA - 1, 2, -1 +* apply rotations to push all bulges KA positions upward +* END DO +* +* To avoid duplicating code, the two loops are merged. +* + UPDATE = .TRUE. + I = 0 + 490 CONTINUE + IF( UPDATE ) THEN + I = I + 1 + KBT = MIN( KB, M-I ) + I0 = I + 1 + I1 = MAX( 1, I-KA ) + I2 = I + KBT - KA1 + IF( I.GT.M ) THEN + UPDATE = .FALSE. + I = I - 1 + I0 = M + 1 + IF( KA.EQ.0 ) + $ RETURN + GO TO 490 + END IF + ELSE + I = I - KA + IF( I.LT.2 ) + $ RETURN + END IF +* + IF( I.LT.M-KBT ) THEN + NX = M + ELSE + NX = N + END IF +* + IF( UPPER ) THEN +* +* Transform A, working with the upper triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( KB1, I ) ) + AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII + DO 500 J = I1, I - 1 + AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII + 500 CONTINUE + DO 510 J = I + 1, MIN( N, I+KA ) + AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII + 510 CONTINUE + DO 540 K = I + 1, I + KBT + DO 520 J = K, I + KBT + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ BB( I-J+KB1, J )* + $ DCONJG( AB( I-K+KA1, K ) ) - + $ DCONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + + $ DBLE( AB( KA1, I ) )* + $ BB( I-J+KB1, J )* + $ DCONJG( BB( I-K+KB1, K ) ) + 520 CONTINUE + DO 530 J = I + KBT + 1, MIN( N, I+KA ) + AB( K-J+KA1, J ) = AB( K-J+KA1, J ) - + $ DCONJG( BB( I-K+KB1, K ) )* + $ AB( I-J+KA1, J ) + 530 CONTINUE + 540 CONTINUE + DO 560 J = I1, I + DO 550 K = I + 1, MIN( J+KA, I+KBT ) + AB( J-K+KA1, K ) = AB( J-K+KA1, K ) - + $ BB( I-K+KB1, K )*AB( J-I+KA1, I ) + 550 CONTINUE + 560 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERU( NX, KBT, -CONE, X( 1, I ), 1, + $ BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i1,i) in RA1 for use in next loop over K +* + RA1 = AB( I1-I+KA1, I ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 610 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i+k-ka-1,i) +* + CALL ZLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ), + $ WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k-ka-1,i+k) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( KB1-K, I+K )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ DCONJG( WORK( I+K-KA ) )* + $ AB( 1, I+K ) + AB( 1, I+K ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( 1, I+K ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 570 J = J1, J2T, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 ) + 570 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the left +* + DO 580 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) + 580 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ), + $ KA1 ) +* + CALL ZLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the right +* + DO 590 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 590 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 600 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), WORK( J ) ) + 600 CONTINUE + END IF + 610 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt-ka-1,i+kbt) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1 + END IF + END IF +* + DO 650 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the right +* + DO 620 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T+KA ), INCA, + $ AB( L+1, J1T+KA-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 620 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 630 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 630 CONTINUE + DO 640 J = J1, J2, KA1 +* +* create nonzero element a(j-1,j+ka) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 ) + AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 ) + 640 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 650 CONTINUE +* + DO 690 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the left +* + DO 660 L = 1, KA - 1 + CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA, + $ AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) + 660 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ), + $ AB( KA, J1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the right +* + DO 670 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 670 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 680 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), WORK( M-KB+J ) ) + 680 CONTINUE + END IF + 690 CONTINUE +* + DO 710 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the right +* + DO 700 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L, J1T ), INCA, + $ AB( L+1, J1T-1 ), INCA, RWORK( J1T ), + $ WORK( J1T ), KA1 ) + 700 CONTINUE + 710 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 720 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 720 CONTINUE + END IF +* + ELSE +* +* Transform A, working with the lower triangle +* + IF( UPDATE ) THEN +* +* Form inv(S(i))**H * A * inv(S(i)) +* + BII = DBLE( BB( 1, I ) ) + AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII + DO 730 J = I1, I - 1 + AB( I-J+1, J ) = AB( I-J+1, J ) / BII + 730 CONTINUE + DO 740 J = I + 1, MIN( N, I+KA ) + AB( J-I+1, I ) = AB( J-I+1, I ) / BII + 740 CONTINUE + DO 770 K = I + 1, I + KBT + DO 750 J = K, I + KBT + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ BB( J-I+1, I )*DCONJG( AB( K-I+1, + $ I ) ) - DCONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + DBLE( AB( 1, I ) )* + $ BB( J-I+1, I )*DCONJG( BB( K-I+1, + $ I ) ) + 750 CONTINUE + DO 760 J = I + KBT + 1, MIN( N, I+KA ) + AB( J-K+1, K ) = AB( J-K+1, K ) - + $ DCONJG( BB( K-I+1, I ) )* + $ AB( J-I+1, I ) + 760 CONTINUE + 770 CONTINUE + DO 790 J = I1, I + DO 780 K = I + 1, MIN( J+KA, I+KBT ) + AB( K-J+1, J ) = AB( K-J+1, J ) - + $ BB( K-I+1, I )*AB( I-J+1, J ) + 780 CONTINUE + 790 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by inv(S(i)) +* + CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 ) + IF( KBT.GT.0 ) + $ CALL ZGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ), + $ 1, X( 1, I+1 ), LDX ) + END IF +* +* store a(i,i1) in RA1 for use in next loop over K +* + RA1 = AB( I-I1+1, I1 ) + END IF +* +* Generate and apply vectors of rotations to chase all the +* existing bulges KA positions up toward the top of the band +* + DO 840 K = 1, KB - 1 + IF( UPDATE ) THEN +* +* Determine the rotations which would annihilate the bulge +* which has in theory just been created +* + IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN +* +* generate rotation to annihilate a(i,i+k-ka-1) +* + CALL ZLARTG( AB( KA1-K, I+K-KA ), RA1, + $ RWORK( I+K-KA ), WORK( I+K-KA ), RA ) +* +* create nonzero element a(i+k,i+k-ka-1) outside the +* band and store it in WORK(m-kb+i+k) +* + T = -BB( K+1, I )*RA1 + WORK( M-KB+I+K ) = RWORK( I+K-KA )*T - + $ DCONJG( WORK( I+K-KA ) )* + $ AB( KA1, I+K-KA ) + AB( KA1, I+K-KA ) = WORK( I+K-KA )*T + + $ RWORK( I+K-KA )*AB( KA1, I+K-KA ) + RA1 = RA + END IF + END IF + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( UPDATE ) THEN + J2T = MIN( J2, I-2*KA+K-1 ) + ELSE + J2T = J2 + END IF + NRT = ( J2T+KA-1 ) / KA1 + DO 800 J = J1, J2T, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(j) +* + WORK( J ) = WORK( J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 ) + 800 CONTINUE +* +* generate rotations in 1st set to annihilate elements which +* have been created outside the band +* + IF( NRT.GT.0 ) + $ CALL ZLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1, + $ RWORK( J1 ), KA1 ) + IF( NR.GT.0 ) THEN +* +* apply rotations in 1st set from the right +* + DO 810 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( J1 ), WORK( J1 ), KA1 ) + 810 CONTINUE +* +* apply rotations in 1st set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( J1 ), + $ WORK( J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( J1 ), KA1 ) + END IF +* +* start applying rotations in 1st set from the left +* + DO 820 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 820 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 1st set +* + DO 830 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( J ), DCONJG( WORK( J ) ) ) + 830 CONTINUE + END IF + 840 CONTINUE +* + IF( UPDATE ) THEN + IF( I2.GT.0 .AND. KBT.GT.0 ) THEN +* +* create nonzero element a(i+kbt,i+kbt-ka-1) outside the +* band and store it in WORK(m-kb+i+kbt) +* + WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1 + END IF + END IF +* + DO 880 K = KB, 1, -1 + IF( UPDATE ) THEN + J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1 + ELSE + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + END IF +* +* finish applying rotations in 2nd set from the left +* + DO 850 L = KB - K, 1, -1 + NRT = ( J2+KA+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA, + $ AB( KA1-L, J1T+L-1 ), INCA, + $ RWORK( M-KB+J1T+KA ), + $ WORK( M-KB+J1T+KA ), KA1 ) + 850 CONTINUE + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + DO 860 J = J1, J2, KA1 + WORK( M-KB+J ) = WORK( M-KB+J+KA ) + RWORK( M-KB+J ) = RWORK( M-KB+J+KA ) + 860 CONTINUE + DO 870 J = J1, J2, KA1 +* +* create nonzero element a(j+ka,j-1) outside the band +* and store it in WORK(m-kb+j) +* + WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 ) + AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 ) + 870 CONTINUE + IF( UPDATE ) THEN + IF( I+K.GT.KA1 .AND. K.LE.KBT ) + $ WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K ) + END IF + 880 CONTINUE +* + DO 920 K = KB, 1, -1 + J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1 + NR = ( J2+KA-1 ) / KA1 + J1 = J2 - ( NR-1 )*KA1 + IF( NR.GT.0 ) THEN +* +* generate rotations in 2nd set to annihilate elements +* which have been created outside the band +* + CALL ZLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ), + $ KA1, RWORK( M-KB+J1 ), KA1 ) +* +* apply rotations in 2nd set from the right +* + DO 890 L = 1, KA - 1 + CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ), + $ INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ), + $ KA1 ) + 890 CONTINUE +* +* apply rotations in 2nd set from both sides to diagonal +* blocks +* + CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ), + $ AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ), + $ WORK( M-KB+J1 ), KA1 ) +* + CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 ) + END IF +* +* start applying rotations in 2nd set from the left +* + DO 900 L = KA - 1, KB - K + 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( M-KB+J1T ), WORK( M-KB+J1T ), + $ KA1 ) + 900 CONTINUE +* + IF( WANTX ) THEN +* +* post-multiply X by product of rotations in 2nd set +* + DO 910 J = J1, J2, KA1 + CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1, + $ RWORK( M-KB+J ), DCONJG( WORK( M-KB+J ) ) ) + 910 CONTINUE + END IF + 920 CONTINUE +* + DO 940 K = 1, KB - 1 + J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1 +* +* finish applying rotations in 1st set from the left +* + DO 930 L = KB - K, 1, -1 + NRT = ( J2+L-1 ) / KA1 + J1T = J2 - ( NRT-1 )*KA1 + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA, + $ AB( KA1-L, J1T-KA1+L ), INCA, + $ RWORK( J1T ), WORK( J1T ), KA1 ) + 930 CONTINUE + 940 CONTINUE +* + IF( KB.GT.1 ) THEN + DO 950 J = 2, I2 - KA + RWORK( J ) = RWORK( J+KA ) + WORK( J ) = WORK( J+KA ) + 950 CONTINUE + END IF +* + END IF +* + GO TO 490 +* +* End of ZHBGST +* + END diff --git a/dspl/liblapack/SRC/zhbgv.f b/dspl/liblapack/SRC/zhbgv.f new file mode 100644 index 0000000..d7e8978 --- /dev/null +++ b/dspl/liblapack/SRC/zhbgv.f @@ -0,0 +1,287 @@ +*> \brief \b ZHBGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, +* LDZ, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX*16 array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by ZPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z, + $ LDZ, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWRK +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSTERF, XERBLA, ZHBGST, ZHBTRD, ZPBSTF, ZSTEQR +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK( INDWRK ), IINFO ) +* +* Reduce to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDWRK ), INFO ) + END IF + RETURN +* +* End of ZHBGV +* + END diff --git a/dspl/liblapack/SRC/zhbgvd.f b/dspl/liblapack/SRC/zhbgvd.f new file mode 100644 index 0000000..b2f7921 --- /dev/null +++ b/dspl/liblapack/SRC/zhbgvd.f @@ -0,0 +1,407 @@ +*> \brief \b ZHBGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, +* Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, +* $ LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX*16 array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by ZPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO=0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: the algorithm failed to converge: +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, + $ Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK, + $ LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER VECT + INTEGER IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK, + $ LLWK2, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DSTERF, XERBLA, ZGEMM, ZHBGST, ZHBTRD, ZLACPY, + $ ZPBSTF, ZSTEDC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1+N + LRWMIN = 1+N + LIWMIN = 1 + ELSE IF( WANTZ ) THEN + LWMIN = 2*N**2 + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KA.LT.0 ) THEN + INFO = -4 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -7 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -16 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + INDE = 1 + INDWRK = INDE + N + INDWK2 = 1 + N*N + LLWK2 = LWORK - INDWK2 + 2 + LLRWK = LRWORK - INDWRK + 2 + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ, + $ WORK, RWORK, IINFO ) +* +* Reduce Hermitian band matrix to tridiagonal form. +* + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z, + $ LDZ, WORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ), + $ LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO, + $ WORK( INDWK2 ), N ) + CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHBGVD +* + END diff --git a/dspl/liblapack/SRC/zhbgvx.f b/dspl/liblapack/SRC/zhbgvx.f new file mode 100644 index 0000000..07526a5 --- /dev/null +++ b/dspl/liblapack/SRC/zhbgvx.f @@ -0,0 +1,535 @@ +*> \brief \b ZHBGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, +* LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, +* LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, +* $ N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), +* $ WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite banded eigenproblem, of +*> the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian +*> and banded, and B is also positive definite. Eigenvalues and +*> eigenvectors can be selected by specifying either all eigenvalues, +*> a range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] KA +*> \verbatim +*> KA is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KA >= 0. +*> \endverbatim +*> +*> \param[in] KB +*> \verbatim +*> KB is INTEGER +*> The number of superdiagonals of the matrix B if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KB >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first ka+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+ka). +*> +*> On exit, the contents of AB are destroyed. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KA+1. +*> \endverbatim +*> +*> \param[in,out] BB +*> \verbatim +*> BB is COMPLEX*16 array, dimension (LDBB, N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix B, stored in the first kb+1 rows of the array. The +*> j-th column of B is stored in the j-th column of the array BB +*> as follows: +*> if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; +*> if UPLO = 'L', BB(1+i-j,j) = B(i,j) for j<=i<=min(n,j+kb). +*> +*> On exit, the factor S from the split Cholesky factorization +*> B = S**H*S, as returned by ZPBSTF. +*> \endverbatim +*> +*> \param[in] LDBB +*> \verbatim +*> LDBB is INTEGER +*> The leading dimension of the array BB. LDBB >= KB+1. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> If JOBZ = 'V', the n-by-n matrix used in the reduction of +*> A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x, +*> and consequently C to tridiagonal form. +*> If JOBZ = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. If JOBZ = 'N', +*> LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors, with the i-th column of Z holding the +*> eigenvector associated with W(i). The eigenvectors are +*> normalized so that Z**H*B*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is: +*> <= N: then i eigenvectors failed to converge. Their +*> indices are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then ZPBSTF +*> returned INFO = i: B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB, + $ LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z, + $ LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M, + $ N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ), + $ WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, UPPER, VALEIG, WANTZ + CHARACTER ORDER, VECT + INTEGER I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP, + $ INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION TMP1 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSTEBZ, DSTERF, XERBLA, ZCOPY, ZGEMV, + $ ZHBGST, ZHBTRD, ZLACPY, ZPBSTF, ZSTEIN, ZSTEQR, + $ ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KA.LT.0 ) THEN + INFO = -5 + ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KA+1 ) THEN + INFO = -8 + ELSE IF( LDBB.LT.KB+1 ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( WANTZ .AND. LDQ.LT.N ) ) THEN + INFO = -12 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -14 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -15 + ELSE IF ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -16 + END IF + END IF + END IF + IF( INFO.EQ.0) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -21 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* +* Form a split Cholesky factorization of B. +* + CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem. +* + CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ, + $ WORK, RWORK, IINFO ) +* +* Solve the standard eigenvalue problem. +* Reduce Hermitian band matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDWRK = 1 + IF( WANTZ ) THEN + VECT = 'U' + ELSE + VECT = 'N' + END IF + CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ), + $ RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZSTEQR. If this fails for some +* eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, +* call ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + DO 20 J = 1, M + CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 ) + CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO, + $ Z( 1, J ), 1 ) + 20 CONTINUE + END IF +* + 30 CONTINUE +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 50 CONTINUE + END IF +* + RETURN +* +* End of ZHBGVX +* + END diff --git a/dspl/liblapack/SRC/zhbtrd.f b/dspl/liblapack/SRC/zhbtrd.f new file mode 100644 index 0000000..7f5af1b --- /dev/null +++ b/dspl/liblapack/SRC/zhbtrd.f @@ -0,0 +1,677 @@ +*> \brief \b ZHBTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHBTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, VECT +* INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHBTRD reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': do not form Q; +*> = 'V': form Q; +*> = 'U': update a matrix X, by forming X*Q. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if VECT = 'U', then Q must contain an N-by-N +*> matrix X; if VECT = 'N' or 'V', then Q need not be set. +*> +*> On exit: +*> if VECT = 'V', Q contains the N-by-N unitary matrix Q; +*> if VECT = 'U', Q contains the product X*Q; +*> if VECT = 'N', the array Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by Linda Kaufman, Bell Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, VECT + INTEGER INFO, KD, LDAB, LDQ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AB( LDAB, * ), Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL INITQ, UPPER, WANTQ + INTEGER I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J, + $ J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1, + $ KDM1, KDN, L, LAST, LEND, NQ, NR, NRT + DOUBLE PRECISION ABST + COMPLEX*16 T, TEMP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLAR2V, ZLARGV, ZLARTG, ZLARTV, + $ ZLASET, ZROT, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INITQ = LSAME( VECT, 'V' ) + WANTQ = INITQ .OR. LSAME( VECT, 'U' ) + UPPER = LSAME( UPLO, 'U' ) + KD1 = KD + 1 + KDM1 = KD - 1 + INCX = LDAB - 1 + IQEND = 1 +* + INFO = 0 + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD1 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHBTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize Q to the unit matrix, if needed +* + IF( INITQ ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Wherever possible, plane rotations are generated and applied in +* vector operations of length NR over the index set J1:J2:KD1. +* +* The real cosines and complex sines of the plane rotations are +* stored in the arrays D and WORK. +* + INCA = KD1*LDAB + KDN = MIN( N-1, KD ) + IF( UPPER ) THEN +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the upper triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( KD1, 1 ) = DBLE( AB( KD1, 1 ) ) + DO 90 I = 1, N - 2 +* +* Reduce i-th row of matrix to tridiagonal form +* + DO 80 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL ZLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ), + $ KD1, D( J1 ), KD1 ) +* +* apply rotations from the right +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + IF( NR.GE.2*KD-1 ) THEN + DO 10 L = 1, KD - 1 + CALL ZLARTV( NR, AB( L+1, J1-1 ), INCA, + $ AB( L, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 10 CONTINUE +* + ELSE + JEND = J1 + ( NR-1 )*KD1 + DO 20 JINC = J1, JEND, KD1 + CALL ZROT( KDM1, AB( 2, JINC-1 ), 1, + $ AB( 1, JINC ), 1, D( JINC ), + $ WORK( JINC ) ) + 20 CONTINUE + END IF + END IF +* +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i,i+k-1) +* within the band +* + CALL ZLARTG( AB( KD-K+3, I+K-2 ), + $ AB( KD-K+2, I+K-1 ), D( I+K-1 ), + $ WORK( I+K-1 ), TEMP ) + AB( KD-K+3, I+K-2 ) = TEMP +* +* apply rotation from the right +* + CALL ZROT( K-3, AB( KD-K+4, I+K-2 ), 1, + $ AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL ZLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ), + $ AB( KD, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the left +* + IF( NR.GT.0 ) THEN + CALL ZLACGV( NR, WORK( J1 ), KD1 ) + IF( 2*KD-1.LT.NR ) THEN +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + DO 30 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( KD-L, J1+L ), INCA, + $ AB( KD-L+1, J1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 30 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 40 JIN = J1, J1END, KD1 + CALL ZROT( KD-1, AB( KD-1, JIN+1 ), INCX, + $ AB( KD, JIN+1 ), INCX, + $ D( JIN ), WORK( JIN ) ) + 40 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL ZROT( LEND, AB( KD-1, LAST+1 ), INCX, + $ AB( KD, LAST+1 ), INCX, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 50 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), DCONJG( WORK( J ) ) ) + 50 CONTINUE + ELSE +* + DO 60 J = J1, J2, KD1 + CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), DCONJG( WORK( J ) ) ) + 60 CONTINUE + END IF +* + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 70 J = J1, J2, KD1 +* +* create nonzero element a(j-1,j+kd) outside the band +* and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( 1, J+KD ) + AB( 1, J+KD ) = D( J )*AB( 1, J+KD ) + 70 CONTINUE + 80 CONTINUE + 90 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 100 I = 1, N - 1 + T = AB( KD, I+1 ) + ABST = ABS( T ) + AB( KD, I+1 ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( KD, I+2 ) = AB( KD, I+2 )*T + IF( WANTQ ) THEN + CALL ZSCAL( N, DCONJG( T ), Q( 1, I+1 ), 1 ) + END IF + 100 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 110 I = 1, N - 1 + E( I ) = ZERO + 110 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 120 I = 1, N + D( I ) = AB( KD1, I ) + 120 CONTINUE +* + ELSE +* + IF( KD.GT.1 ) THEN +* +* Reduce to complex Hermitian tridiagonal form, working with +* the lower triangle +* + NR = 0 + J1 = KDN + 2 + J2 = 1 +* + AB( 1, 1 ) = DBLE( AB( 1, 1 ) ) + DO 210 I = 1, N - 2 +* +* Reduce i-th column of matrix to tridiagonal form +* + DO 200 K = KDN + 1, 2, -1 + J1 = J1 + KDN + J2 = J2 + KDN +* + IF( NR.GT.0 ) THEN +* +* generate plane rotations to annihilate nonzero +* elements which have been created outside the band +* + CALL ZLARGV( NR, AB( KD1, J1-KD1 ), INCA, + $ WORK( J1 ), KD1, D( J1 ), KD1 ) +* +* apply plane rotations from one side +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + IF( NR.GT.2*KD-1 ) THEN + DO 130 L = 1, KD - 1 + CALL ZLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA, + $ AB( KD1-L+1, J1-KD1+L ), INCA, + $ D( J1 ), WORK( J1 ), KD1 ) + 130 CONTINUE + ELSE + JEND = J1 + KD1*( NR-1 ) + DO 140 JINC = J1, JEND, KD1 + CALL ZROT( KDM1, AB( KD, JINC-KD ), INCX, + $ AB( KD1, JINC-KD ), INCX, + $ D( JINC ), WORK( JINC ) ) + 140 CONTINUE + END IF +* + END IF +* + IF( K.GT.2 ) THEN + IF( K.LE.N-I+1 ) THEN +* +* generate plane rotation to annihilate a(i+k-1,i) +* within the band +* + CALL ZLARTG( AB( K-1, I ), AB( K, I ), + $ D( I+K-1 ), WORK( I+K-1 ), TEMP ) + AB( K-1, I ) = TEMP +* +* apply rotation from the left +* + CALL ZROT( K-3, AB( K-2, I+1 ), LDAB-1, + $ AB( K-1, I+1 ), LDAB-1, D( I+K-1 ), + $ WORK( I+K-1 ) ) + END IF + NR = NR + 1 + J1 = J1 - KDN - 1 + END IF +* +* apply plane rotations from both sides to diagonal +* blocks +* + IF( NR.GT.0 ) + $ CALL ZLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ), + $ AB( 2, J1-1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) +* +* apply plane rotations from the right +* +* +* Dependent on the the number of diagonals either +* ZLARTV or ZROT is used +* + IF( NR.GT.0 ) THEN + CALL ZLACGV( NR, WORK( J1 ), KD1 ) + IF( NR.GT.2*KD-1 ) THEN + DO 150 L = 1, KD - 1 + IF( J2+L.GT.N ) THEN + NRT = NR - 1 + ELSE + NRT = NR + END IF + IF( NRT.GT.0 ) + $ CALL ZLARTV( NRT, AB( L+2, J1-1 ), INCA, + $ AB( L+1, J1 ), INCA, D( J1 ), + $ WORK( J1 ), KD1 ) + 150 CONTINUE + ELSE + J1END = J1 + KD1*( NR-2 ) + IF( J1END.GE.J1 ) THEN + DO 160 J1INC = J1, J1END, KD1 + CALL ZROT( KDM1, AB( 3, J1INC-1 ), 1, + $ AB( 2, J1INC ), 1, D( J1INC ), + $ WORK( J1INC ) ) + 160 CONTINUE + END IF + LEND = MIN( KDM1, N-J2 ) + LAST = J1END + KD1 + IF( LEND.GT.0 ) + $ CALL ZROT( LEND, AB( 3, LAST-1 ), 1, + $ AB( 2, LAST ), 1, D( LAST ), + $ WORK( LAST ) ) + END IF + END IF +* +* +* + IF( WANTQ ) THEN +* +* accumulate product of plane rotations in Q +* + IF( INITQ ) THEN +* +* take advantage of the fact that Q was +* initially the Identity matrix +* + IQEND = MAX( IQEND, J2 ) + I2 = MAX( 0, K-3 ) + IQAEND = 1 + I*KD + IF( K.EQ.2 ) + $ IQAEND = IQAEND + KD + IQAEND = MIN( IQAEND, IQEND ) + DO 170 J = J1, J2, KD1 + IBL = I - I2 / KDM1 + I2 = I2 + 1 + IQB = MAX( 1, J-IBL ) + NQ = 1 + IQAEND - IQB + IQAEND = MIN( IQAEND+KD, IQEND ) + CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ), + $ 1, D( J ), WORK( J ) ) + 170 CONTINUE + ELSE +* + DO 180 J = J1, J2, KD1 + CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1, + $ D( J ), WORK( J ) ) + 180 CONTINUE + END IF + END IF +* + IF( J2+KDN.GT.N ) THEN +* +* adjust J2 to keep within the bounds of the matrix +* + NR = NR - 1 + J2 = J2 - KDN - 1 + END IF +* + DO 190 J = J1, J2, KD1 +* +* create nonzero element a(j+kd,j-1) outside the +* band and store it in WORK +* + WORK( J+KD ) = WORK( J )*AB( KD1, J ) + AB( KD1, J ) = D( J )*AB( KD1, J ) + 190 CONTINUE + 200 CONTINUE + 210 CONTINUE + END IF +* + IF( KD.GT.0 ) THEN +* +* make off-diagonal elements real and copy them to E +* + DO 220 I = 1, N - 1 + T = AB( 2, I ) + ABST = ABS( T ) + AB( 2, I ) = ABST + E( I ) = ABST + IF( ABST.NE.ZERO ) THEN + T = T / ABST + ELSE + T = CONE + END IF + IF( I.LT.N-1 ) + $ AB( 2, I+1 ) = AB( 2, I+1 )*T + IF( WANTQ ) THEN + CALL ZSCAL( N, T, Q( 1, I+1 ), 1 ) + END IF + 220 CONTINUE + ELSE +* +* set E to zero if original matrix was diagonal +* + DO 230 I = 1, N - 1 + E( I ) = ZERO + 230 CONTINUE + END IF +* +* copy diagonal elements to D +* + DO 240 I = 1, N + D( I ) = AB( 1, I ) + 240 CONTINUE + END IF +* + RETURN +* +* End of ZHBTRD +* + END diff --git a/dspl/liblapack/SRC/zhecon.f b/dspl/liblapack/SRC/zhecon.f new file mode 100644 index 0000000..bb8ee6d --- /dev/null +++ b/dspl/liblapack/SRC/zhecon.f @@ -0,0 +1,239 @@ +*> \brief \b ZHECON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHECON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHECON estimates the reciprocal of the condition number of a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHETRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHECON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL ZHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHECON +* + END diff --git a/dspl/liblapack/SRC/zhecon_3.f b/dspl/liblapack/SRC/zhecon_3.f new file mode 100644 index 0000000..8c3a9f3 --- /dev/null +++ b/dspl/liblapack/SRC/zhecon_3.f @@ -0,0 +1,285 @@ +*> \brief \b ZHECON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHECON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* COMPLEX*16 A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHECON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian matrix A using the factorization +*> computed by ZHETRF_RK or ZHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver ZHETRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by ZHETRF_RK and ZHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_RK or ZHETRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHECON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZHETRS_3, ZLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHECON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL ZHETRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHECON_3 +* + END diff --git a/dspl/liblapack/SRC/zhecon_rook.f b/dspl/liblapack/SRC/zhecon_rook.f new file mode 100644 index 0000000..6dabdcf --- /dev/null +++ b/dspl/liblapack/SRC/zhecon_rook.f @@ -0,0 +1,253 @@ +*> \brief ZHECON_ROOK estimates the reciprocal of the condition number fort HE matrices using factorization obtained with one of the bounded diagonal pivoting methods (max 2 interchanges) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHECON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHECON_ROOK estimates the reciprocal of the condition number of a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by CHETRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHECON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZHETRS_ROOK, ZLACN2, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHECON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL ZHETRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHECON_ROOK +* + END diff --git a/dspl/liblapack/SRC/zheequb.f b/dspl/liblapack/SRC/zheequb.f new file mode 100644 index 0000000..d698232 --- /dev/null +++ b/dspl/liblapack/SRC/zheequb.f @@ -0,0 +1,343 @@ +*> \brief \b ZHEEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), WORK( * ) +* DOUBLE PRECISION S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEQUB computes row and column scalings intended to equilibrate a +*> Hermitian matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The N-by-N Hermitian matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16HEcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ) + DOUBLE PRECISION S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + DOUBLE PRECISION AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME + EXTERNAL DLAMCH, LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, INT, LOG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZHEEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0D0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0D0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0D0 + SUMSQ = 0.0D0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0D0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0D0 + DO I = N+1, N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL ZLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = CABS1( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = DLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = DLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/zheev.f b/dspl/liblapack/SRC/zheev.f new file mode 100644 index 0000000..3e87778 --- /dev/null +++ b/dspl/liblapack/SRC/zheev.f @@ -0,0 +1,298 @@ +*> \brief ZHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEV computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N-1). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for ZHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEeigen +* +* ===================================================================== + SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLASCL, ZSTEQR, + $ ZUNGTR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUNGTR to generate the unitary matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEEV +* + END diff --git a/dspl/liblapack/SRC/zheev_2stage.f b/dspl/liblapack/SRC/zheev_2stage.f new file mode 100644 index 0000000..d80a80d --- /dev/null +++ b/dspl/liblapack/SRC/zheev_2stage.f @@ -0,0 +1,355 @@ +*> \brief ZHEEV_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEV_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLASCL, ZSTEQR, + $ ZUNGTR, ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( A( 1, 1 ) ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUNGTR to generate the unitary matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEEV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zheevd.f b/dspl/liblapack/SRC/zheevd.f new file mode 100644 index 0000000..cbe9a39 --- /dev/null +++ b/dspl/liblapack/SRC/zheevd.f @@ -0,0 +1,398 @@ +*> \brief ZHEEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, +* LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N + 1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +* ===================================================================== + SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LOPT, LROPT, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLACPY, ZLASCL, + $ ZSTEDC, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = MAX( LWMIN, N + + $ ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) ) + LROPT = LRWMIN + LIOPT = LIWMIN + END IF + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + INDRWK = INDE + N + INDWK2 = INDWRK + N*N + LLWORK = LWORK - INDWRK + 1 + LLWRK2 = LWORK - INDWK2 + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call ZUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of ZHEEVD +* + END diff --git a/dspl/liblapack/SRC/zheevd_2stage.f b/dspl/liblapack/SRC/zheevd_2stage.f new file mode 100644 index 0000000..1bf5fec --- /dev/null +++ b/dspl/liblapack/SRC/zheevd_2stage.f @@ -0,0 +1,455 @@ +*> \brief ZHEEVD_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVD_2STAGE computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. If eigenvectors are desired, it uses a +*> divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N+1 +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N+1 +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N + N**2 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i and JOBZ = 'N', then the algorithm failed +*> to converge; i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> if INFO = i and JOBZ = 'V', then the algorithm failed +*> to compute an eigenvalue while working on the submatrix +*> lying in rows and columns INFO/(N+1) through +*> mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> Modified description of INFO. Sven, 16 Feb 05. +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVD_2STAGE( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2, + $ INDWRK, ISCALE, LIWMIN, LLRWK, LLWORK, + $ LLWRK2, LRWMIN, LWMIN, + $ LHTRD, LWTRD, KD, IB, INDHOUS + + + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZLACPY, ZLASCL, + $ ZSTEDC, ZUNMTR, ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LHTRD + LWTRD + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = DBLE( A( 1, 1 ) ) + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDRWK = INDE + N + LLRWK = LRWORK - INDRWK + 1 + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 + INDWK2 = INDWRK + N*N + LLWRK2 = LWORK - INDWK2 + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, W, RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDHOUS ), LHTRD, + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the +* tridiagonal matrix, then call ZUNMTR to multiply it to the +* Householder transformations represented as Householder vectors in +* A. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N, + $ WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK, + $ IWORK, LIWORK, INFO ) + CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ), + $ WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO ) + CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zheevr.f b/dspl/liblapack/SRC/zheevr.f new file mode 100644 index 0000000..810373c --- /dev/null +++ b/dspl/liblapack/SRC/zheevr.f @@ -0,0 +1,724 @@ +*> \brief ZHEEVR computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVR computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> ZHEEVR first reduces the matrix A to tridiagonal form T with a call +*> to ZHETRD. Then, whenever possible, ZHEEVR calls ZSTEMR to compute +*> eigenspectrum using Relatively Robust Representations. ZSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : ZHEEVR calls ZSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of ZSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> ZSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of ZSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by ZUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the max of the blocksize for ZHETRD and for +*> ZUNMTR as returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +* ===================================================================== + SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWKOPT, LWMIN, NB, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD, ZSTEMR, ZSTEIN, ZSWAP, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) + LWMIN = MAX( 1, 2*N ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( ( NB+1 )*N, LWMIN ) + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or ZSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in ZHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDWK = INDTAU + N + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from ZHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by ZSTEMR (the DSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and ZSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* DSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ), + $ WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or ZSTEMR and ZUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL ZSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* Also call DSTEBZ and ZSTEIN if ZSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVR +* + END diff --git a/dspl/liblapack/SRC/zheevr_2stage.f b/dspl/liblapack/SRC/zheevr_2stage.f new file mode 100644 index 0000000..ab7f337 --- /dev/null +++ b/dspl/liblapack/SRC/zheevr_2stage.f @@ -0,0 +1,779 @@ +*> \brief ZHEEVR_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVR_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, +* WORK, LWORK, RWORK, LRWORK, IWORK, +* LIWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, +* $ M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVR_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> +*> ZHEEVR_2STAGE first reduces the matrix A to tridiagonal form T with a call +*> to ZHETRD. Then, whenever possible, ZHEEVR_2STAGE calls ZSTEMR to compute +*> eigenspectrum using Relatively Robust Representations. ZSTEMR +*> computes eigenvalues by the dqds algorithm, while orthogonal +*> eigenvectors are computed from various "good" L D L^T representations +*> (also known as Relatively Robust Representations). Gram-Schmidt +*> orthogonalization is avoided as far as possible. More specifically, +*> the various steps of the algorithm are as follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> The desired accuracy of the output can be specified by the input +*> parameter ABSTOL. +*> +*> For more details, see DSTEMR's documentation and: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> +*> Note 1 : ZHEEVR_2STAGE calls ZSTEMR when the full spectrum is requested +*> on machines which conform to the ieee-754 floating point standard. +*> ZHEEVR_2STAGE calls DSTEBZ and ZSTEIN on non-ieee machines and +*> when partial spectrum requests are made. +*> +*> Normal execution of ZSTEMR may create NaNs and infinities and +*> hence may abort due to a floating point exception in environments +*> which do not handle NaNs and infinities in the ieee standard default +*> manner. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and +*> ZSTEIN are called +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> +*> If high relative accuracy is important, set ABSTOL to +*> DLAMCH( 'Safe minimum' ). Doing so will guarantee that +*> eigenvalues are computed to high relative accuracy when +*> possible in future releases. The current code does not +*> make any guarantees about high relative accuracy, but +*> furutre releases will. See J. Barlow and J. Demmel, +*> "Computing Accurate Eigensystems of Scaled Diagonally +*> Dominant Matrices", LAPACK Working Note #7, for a discussion +*> of which matrices define their eigenvalues to high relative +*> accuracy. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is an output of ZSTEMR (tridiagonal +*> matrix). The support of the eigenvectors of A is typically +*> 1:N because of the unitary transformations applied by ZUNMTR. +*> Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 26*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal +*> (and minimal) LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The length of the array RWORK. LRWORK >= max(1,24*N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal +*> (and minimal) LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N). +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: Internal error +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Ken Stanley, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Jason Riedy, Computer Science Division, University of +*> California at Berkeley, USA \n +*> +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVR_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ, + $ WORK, LWORK, RWORK, LRWORK, IWORK, + $ LIWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK, + $ M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ, TRYRAC + CHARACTER ORDER + INTEGER I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP, + $ INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK, + $ INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ, + $ LIWMIN, LLWORK, LLRWORK, LLWRKN, LRWMIN, + $ LWMIN, NSPLIT, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, DLAMCH, ZLANSY, ILAENV, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD_2STAGE, ZSTEMR, ZSTEIN, ZSWAP, ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + IEEEOK = ILAENV( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 ) +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR. + $ ( LIWORK.EQ.-1 ) ) +* + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + LRWMIN = MAX( 1, 24*N ) + LIWMIN = MAX( 1, 10*N ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -22 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVR_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( N.EQ.1 ) THEN + WORK( 1 ) = 2 + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) THEN + Z( 1, 1 ) = ONE + ISUPPZ( 1 ) = 1 + ISUPPZ( 2 ) = 1 + END IF + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF (VALEIG) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANSY( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF + +* Initialize indices into workspaces. Note: The IWORK indices are +* used only if DSTERF or ZSTEMR fail. + +* WORK(INDTAU:INDTAU+N-1) stores the complex scalar factors of the +* elementary reflectors used in ZHETRD. + INDTAU = 1 +* INDWK is the starting offset of the remaining complex workspace, +* and LLWORK is the remaining complex workspace size. + INDHOUS = INDTAU + N + INDWK = INDHOUS + LHTRD + LLWORK = LWORK - INDWK + 1 + +* RWORK(INDRD:INDRD+N-1) stores the real tridiagonal's diagonal +* entries. + INDRD = 1 +* RWORK(INDRE:INDRE+N-1) stores the off-diagonal entries of the +* tridiagonal matrix from ZHETRD. + INDRE = INDRD + N +* RWORK(INDRDD:INDRDD+N-1) is a copy of the diagonal entries over +* -written by ZSTEMR (the DSTERF path copies the diagonal to W). + INDRDD = INDRE + N +* RWORK(INDREE:INDREE+N-1) is a copy of the off-diagonal entries over +* -written while computing the eigenvalues in DSTERF and ZSTEMR. + INDREE = INDRDD + N +* INDRWK is the starting offset of the left-over real workspace, and +* LLRWORK is the remaining workspace size. + INDRWK = INDREE + N + LLRWORK = LRWORK - INDRWK + 1 + +* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and +* stores the block indices of each of the M<=N eigenvalues. + INDIBL = 1 +* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and +* stores the starting and finishing indices of each block. + INDISP = INDIBL + N +* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors +* that corresponding to eigenvectors that fail to converge in +* ZSTEIN. This information is discarded; if any fail, the driver +* returns INFO > 0. + INDIFL = INDISP + N +* INDIWO is the offset of the remaining integer workspace. + INDIWO = INDIFL + N + +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDRD ), + $ RWORK( INDRE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, + $ WORK( INDWK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired +* then call DSTERF or ZSTEMR and ZUNMTR. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG.OR.TEST ) .AND. ( IEEEOK.EQ.1 ) ) THEN + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N, RWORK( INDRD ), 1, W, 1 ) + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DSTERF( N, W, RWORK( INDREE ), INFO ) + ELSE + CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 ) + CALL DCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 ) +* + IF (ABSTOL .LE. TWO*N*EPS) THEN + TRYRAC = .TRUE. + ELSE + TRYRAC = .FALSE. + END IF + CALL ZSTEMR( JOBZ, 'A', N, RWORK( INDRDD ), + $ RWORK( INDREE ), VL, VU, IL, IU, M, W, + $ Z, LDZ, N, ISUPPZ, TRYRAC, + $ RWORK( INDRWK ), LLRWORK, + $ IWORK, LIWORK, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEMR. +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, + $ WORK( INDTAU ), Z, LDZ, WORK( INDWKN ), + $ LLWRKN, IINFO ) + END IF + END IF +* +* + IF( INFO.EQ.0 ) THEN + M = N + GO TO 30 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* Also call DSTEBZ and ZSTEIN if ZSTEMR fails. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWO ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ), + $ INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWKN = INDWK + LLWRKN = LWORK - INDWKN + 1 + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWKN ), LLWRKN, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 30 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 50 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 40 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 40 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + END IF + 50 CONTINUE + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZHEEVR_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zheevx.f b/dspl/liblapack/SRC/zheevx.f new file mode 100644 index 0000000..3e11875 --- /dev/null +++ b/dspl/liblapack/SRC/zheevx.f @@ -0,0 +1,564 @@ +*> \brief ZHEEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise 2*N. +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the max of the blocksize for ZHETRD and for +*> ZUNMTR as returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +* ===================================================================== + SUBROUTINE ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, LWKMIN, LWKOPT, NB, + $ NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHETRD, ZLACPY, ZSTEIN, ZSTEQR, ZSWAP, ZUNGTR, + $ ZUNMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWKMIN = 1 + WORK( 1 ) = LWKMIN + ELSE + LWKMIN = 2*N + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT + END IF +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = A( 1, 1 ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = A( 1, 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL ZUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEEVX +* + END diff --git a/dspl/liblapack/SRC/zheevx_2stage.f b/dspl/liblapack/SRC/zheevx_2stage.f new file mode 100644 index 0000000..a70c870 --- /dev/null +++ b/dspl/liblapack/SRC/zheevx_2stage.f @@ -0,0 +1,622 @@ +*> \brief ZHEEVX_2STAGE computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEVX_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEVX_2STAGE computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A using the 2stage technique for +*> the reduction to tridiagonal. Eigenvalues and eigenvectors can +*> be selected by specifying either a range of values or a range of +*> indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing A to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, 8*N, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEEVX_2STAGE( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDA, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, TEST, VALEIG, + $ WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, LLWORK, + $ NSPLIT, LWMIN, LHTRD, LWTRD, KD, IB, INDHOUS + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZLACPY, ZSTEIN, ZSTEQR, ZSWAP, ZUNGTR, ZUNMTR, + $ ZHETRD_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + LOWER = LSAME( UPLO, 'L' ) + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -8 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -10 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + WORK( 1 ) = LWMIN + ELSE + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, + $ N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, + $ N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN + END IF +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) + $ INFO = -17 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEVX_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + ELSE IF( VALEIG ) THEN + IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) ) + $ THEN + M = 1 + W( 1 ) = DBLE( A( 1, 1 ) ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + END IF + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + IF( LOWER ) THEN + DO 10 J = 1, N + CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 ) + 10 CONTINUE + ELSE + DO 20 J = 1, N + CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 ) + 20 CONTINUE + END IF + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHETRD_2STAGE to reduce Hermitian matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDHOUS = INDTAU + N + INDWRK = INDHOUS + LHTRD + LLWORK = LWORK - INDWRK + 1 +* + CALL ZHETRD_2STAGE( JOBZ, UPLO, N, A, LDA, RWORK( INDD ), + $ RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDHOUS ), LHTRD, WORK( INDWRK ), + $ LLWORK, IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal to +* zero, then call DSTERF or ZUNGTR and ZSTEQR. If this fails for +* some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF( INDEIG ) THEN + IF( IL.EQ.1 .AND. IU.EQ.N ) THEN + TEST = .TRUE. + END IF + END IF + IF( ( ALLEIG .OR. TEST ) .AND. ( ABSTOL.LE.ZERO ) ) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZLACPY( 'A', N, N, A, LDA, Z, LDZ ) + CALL ZUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + IFAIL( I ) = 0 + 30 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 40 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z, + $ LDZ, WORK( INDWRK ), LLWORK, IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 40 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 60 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 50 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 60 CONTINUE + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEEVX_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhegs2.f b/dspl/liblapack/SRC/zhegs2.f new file mode 100644 index 0000000..0bdc653 --- /dev/null +++ b/dspl/liblapack/SRC/zhegs2.f @@ -0,0 +1,296 @@ +*> \brief \b ZHEGS2 reduces a Hermitian definite generalized eigenproblem to standard form, using the factorization results obtained from cpotrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGS2 reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H *A*L. +*> +*> B must have been previously factorized as U**H *U or L*L**H by ZPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H *A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored, and how B has been factorized. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K + DOUBLE PRECISION AKK, BKK + COMPLEX*16 CT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDSCAL, ZHER2, ZLACGV, ZTRMV, + $ ZTRSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGS2', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* + DO 10 K = 1, N +* +* Update the upper triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA ) + CT = -HALF*AKK + CALL ZLACGV( N-K, A( K, K+1 ), LDA ) + CALL ZLACGV( N-K, B( K, K+1 ), LDB ) + CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA, + $ B( K, K+1 ), LDB, A( K+1, K+1 ), LDA ) + CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZLACGV( N-K, B( K, K+1 ), LDB ) + CALL ZTRSV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-K, B( K+1, K+1 ), LDB, A( K, K+1 ), + $ LDA ) + CALL ZLACGV( N-K, A( K, K+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* + DO 20 K = 1, N +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = A( K, K ) + BKK = B( K, K ) + AKK = AKK / BKK**2 + A( K, K ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, A( K+1, K ), 1 ) + CT = -HALF*AKK + CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL ZHER2( UPLO, N-K, -CONE, A( K+1, K ), 1, + $ B( K+1, K ), 1, A( K+1, K+1 ), LDA ) + CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 ) + CALL ZTRSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ B( K+1, K+1 ), LDB, A( K+1, K ), 1 ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* + DO 30 K = 1, N +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL ZTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B, + $ LDB, A( 1, K ), 1 ) + CT = HALF*AKK + CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL ZHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1, + $ A, LDA ) + CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 ) + CALL ZDSCAL( K-1, BKK, A( 1, K ), 1 ) + A( K, K ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**H *A*L +* + DO 40 K = 1, N +* +* Update the lower triangle of A(1:k,1:k) +* + AKK = A( K, K ) + BKK = B( K, K ) + CALL ZLACGV( K-1, A( K, 1 ), LDA ) + CALL ZTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1, + $ B, LDB, A( K, 1 ), LDA ) + CT = HALF*AKK + CALL ZLACGV( K-1, B( K, 1 ), LDB ) + CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL ZHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ), + $ LDB, A, LDA ) + CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA ) + CALL ZLACGV( K-1, B( K, 1 ), LDB ) + CALL ZDSCAL( K-1, BKK, A( K, 1 ), LDA ) + CALL ZLACGV( K-1, A( K, 1 ), LDA ) + A( K, K ) = AKK*BKK**2 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of ZHEGS2 +* + END diff --git a/dspl/liblapack/SRC/zhegst.f b/dspl/liblapack/SRC/zhegst.f new file mode 100644 index 0000000..d0c08a8 --- /dev/null +++ b/dspl/liblapack/SRC/zhegst.f @@ -0,0 +1,331 @@ +*> \brief \b ZHEGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGST reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +*> +*> B must have been previously factorized as U**H*U or L*L**H by ZPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**H*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> The triangular factor from the Cholesky factorization of B, +*> as returned by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, LDA, LDB, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE, HALF + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KB, NB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEGS2, ZHEMM, ZHER2K, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGST', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZHEGST', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + ELSE +* +* Use blocked code +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* + DO 10 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(k:n,k:n) +* + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL ZTRSM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K, K ), LDB, A( K, K+KB ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL ZHER2K( UPLO, 'Conjugate transpose', N-K-KB+1, + $ KB, -CONE, A( K, K+KB ), LDA, + $ B( K, K+KB ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF, + $ A( K, K ), LDA, B( K, K+KB ), LDB, + $ CONE, A( K, K+KB ), LDA ) + CALL ZTRSM( 'Right', UPLO, 'No transpose', + $ 'Non-unit', KB, N-K-KB+1, CONE, + $ B( K+KB, K+KB ), LDB, A( K, K+KB ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* + DO 20 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(k:n,k:n) +* + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + IF( K+KB.LE.N ) THEN + CALL ZTRSM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K, K ), LDB, A( K+KB, K ), LDA ) + CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL ZHER2K( UPLO, 'No transpose', N-K-KB+1, KB, + $ -CONE, A( K+KB, K ), LDA, + $ B( K+KB, K ), LDB, ONE, + $ A( K+KB, K+KB ), LDA ) + CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF, + $ A( K, K ), LDA, B( K+KB, K ), LDB, + $ CONE, A( K+KB, K ), LDA ) + CALL ZTRSM( 'Left', UPLO, 'No transpose', + $ 'Non-unit', N-K-KB+1, KB, CONE, + $ B( K+KB, K+KB ), LDB, A( K+KB, K ), + $ LDA ) + END IF + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* + DO 30 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL ZTRMM( 'Left', UPLO, 'No transpose', 'Non-unit', + $ K-1, KB, CONE, B, LDB, A( 1, K ), LDA ) + CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL ZHER2K( UPLO, 'No transpose', K-1, KB, CONE, + $ A( 1, K ), LDA, B( 1, K ), LDB, ONE, A, + $ LDA ) + CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ), + $ LDA, B( 1, K ), LDB, CONE, A( 1, K ), + $ LDA ) + CALL ZTRMM( 'Right', UPLO, 'Conjugate transpose', + $ 'Non-unit', K-1, KB, CONE, B( K, K ), LDB, + $ A( 1, K ), LDA ) + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 30 CONTINUE + ELSE +* +* Compute L**H*A*L +* + DO 40 K = 1, N, NB + KB = MIN( N-K+1, NB ) +* +* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) +* + CALL ZTRMM( 'Right', UPLO, 'No transpose', 'Non-unit', + $ KB, K-1, CONE, B, LDB, A( K, 1 ), LDA ) + CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL ZHER2K( UPLO, 'Conjugate transpose', K-1, KB, + $ CONE, A( K, 1 ), LDA, B( K, 1 ), LDB, + $ ONE, A, LDA ) + CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ), + $ LDA, B( K, 1 ), LDB, CONE, A( K, 1 ), + $ LDA ) + CALL ZTRMM( 'Left', UPLO, 'Conjugate transpose', + $ 'Non-unit', KB, K-1, CONE, B( K, K ), LDB, + $ A( K, 1 ), LDA ) + CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA, + $ B( K, K ), LDB, INFO ) + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of ZHEGST +* + END diff --git a/dspl/liblapack/SRC/zhegv.f b/dspl/liblapack/SRC/zhegv.f new file mode 100644 index 0000000..761b5d0 --- /dev/null +++ b/dspl/liblapack/SRC/zhegv.f @@ -0,0 +1,321 @@ +*> \brief \b ZHEGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N-1). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for ZHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPOTRF or ZHEEV returned an error code: +*> <= N: if INFO = i, ZHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEeigen +* +* ===================================================================== + SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEV, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N - 1 ) .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEGV +* + END diff --git a/dspl/liblapack/SRC/zhegv_2stage.f b/dspl/liblapack/SRC/zhegv_2stage.f new file mode 100644 index 0000000..53f1b8d --- /dev/null +++ b/dspl/liblapack/SRC/zhegv_2stage.f @@ -0,0 +1,379 @@ +*> \brief \b ZHEGV_2STAGE +* +* @precisions fortran z -> c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGV_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, +* WORK, LWORK, RWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian and B is also +*> positive definite. +*> This routine use the 2stage technique for the reduction to tridiagonal +*> which showed higher performance on recent architecture and for large +*> sizes N>2000. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> Not available in this release. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the Hermitian positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= 1, when N <= 1; +*> otherwise +*> If JOBZ = 'N' and N > 1, LWORK must be queried. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N + N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N + N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPOTRF or ZHEEV returned an error code: +*> <= N: if INFO = i, ZHEEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> All details about the 2stage techniques are available in: +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, + $ WORK, LWORK, RWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM, + $ ZHEEV_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 ) + LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 ) + LWMIN = N + LHTRD + LWTRD + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGV_2STAGE ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, + $ WORK, LWORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZHEGV_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhegvd.f b/dspl/liblapack/SRC/zhegvd.f new file mode 100644 index 0000000..b9bb053 --- /dev/null +++ b/dspl/liblapack/SRC/zhegvd.f @@ -0,0 +1,412 @@ +*> \brief \b ZHEGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian and B is also positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the Hermitian matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N + 1. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N + N**2. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If N <= 1, LIWORK >= 1. +*> If JOBZ = 'N' and N > 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPOTRF or ZHEEVD returned an error code: +*> <= N: if INFO = i and JOBZ = 'N', then the algorithm +*> failed to converge; i off-diagonal elements of an +*> intermediate tridiagonal form did not converge to +*> zero; +*> if INFO = i and JOBZ = 'V', then the algorithm +*> failed to compute an eigenvalue while working on +*> the submatrix lying in rows and columns INFO/(N+1) +*> through mod(INFO,N+1); +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified so that no backsubstitution is performed if ZHEEVD fails to +*> converge (NEIG in old code could be greater than N causing out of +*> bounds reference to A - reported by Ralf Meyer). Also corrected the +*> description of INFO and the test on ITYPE. Sven, 16 Feb 05. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +*> +* ===================================================================== + SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEVD, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( N.LE.1 ) THEN + LWMIN = 1 + LRWMIN = 1 + LIWMIN = 1 + ELSE IF( WANTZ ) THEN + LWMIN = 2*N + N*N + LRWMIN = 1 + 5*N + 2*N*N + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + 1 + LRWMIN = N + LIWMIN = 1 + END IF + LOPT = LWMIN + LROPT = LRWMIN + LIOPT = LIWMIN + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK, + $ IWORK, LIWORK, INFO ) + LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) ) + LROPT = MAX( DBLE( LROPT ), DBLE( RWORK( 1 ) ) ) + LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ .AND. INFO.EQ.0 ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, N, CONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, N, CONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LOPT + RWORK( 1 ) = LROPT + IWORK( 1 ) = LIOPT +* + RETURN +* +* End of ZHEGVD +* + END diff --git a/dspl/liblapack/SRC/zhegvx.f b/dspl/liblapack/SRC/zhegvx.f new file mode 100644 index 0000000..268a822 --- /dev/null +++ b/dspl/liblapack/SRC/zhegvx.f @@ -0,0 +1,476 @@ +*> \brief \b ZHEGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, +* VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, +* LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEGVX computes selected eigenvalues, and optionally, eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian and B is also positive definite. +*> Eigenvalues and eigenvectors can be selected by specifying either a +*> range of values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, the lower triangle (if UPLO='L') or the upper +*> triangle (if UPLO='U') of A, including the diagonal, is +*> destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, the Hermitian matrix B. If UPLO = 'U', the +*> leading N-by-N upper triangular part of B contains the +*> upper triangular part of the matrix B. If UPLO = 'L', +*> the leading N-by-N lower triangular part of B contains +*> the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing C to tridiagonal form, where C is the symmetric +*> matrix of the standard symmetric problem to which the +*> generalized problem is transformed. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for ZHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPOTRF or ZHEEVX returned an error code: +*> <= N: if INFO = i, ZHEEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEeigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB, + $ VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, + $ LWORK, RWORK, IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHEEVX, ZHEGST, ZPOTRF, ZTRMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -11 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -13 + END IF + END IF + END IF + IF (INFO.EQ.0) THEN + IF (LDZ.LT.1 .OR. (WANTZ .AND. LDZ.LT.N)) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB + 1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEGVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) THEN + RETURN + END IF +* +* Form a Cholesky factorization of B. +* + CALL ZPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, + $ M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL, + $ INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B, + $ LDB, Z, LDZ ) + END IF + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEGVX +* + END diff --git a/dspl/liblapack/SRC/zherfs.f b/dspl/liblapack/SRC/zherfs.f new file mode 100644 index 0000000..2b4095b --- /dev/null +++ b/dspl/liblapack/SRC/zherfs.f @@ -0,0 +1,446 @@ +*> \brief \b ZHERFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHERFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHERFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**H or +*> A = L*D*L**H as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZHETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHEMV, ZHETRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHERFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZHERFS +* + END diff --git a/dspl/liblapack/SRC/zherfsx.f b/dspl/liblapack/SRC/zherfsx.f new file mode 100644 index 0000000..d176b10 --- /dev/null +++ b/dspl/liblapack/SRC/zherfsx.f @@ -0,0 +1,700 @@ +*> \brief \b ZHERFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHERFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHERFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHECON, ZLA_HERFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, ZLANHE, ZLA_HERCOND_X, ZLA_HERCOND_C + DOUBLE PRECISION DLAMCH, ZLANHE, ZLA_HERCOND_X, ZLA_HERCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS(LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHERFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = ZLANHE( NORM, UPLO, N, A, LDA, RWORK ) + CALL ZHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + CALL ZLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = ZLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = ZLA_HERCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF (RCOND_TMP .LT. ILLRCOND_THRESH) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = ZLA_HERCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ IPIV, X( 1, J ), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( .NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of ZHERFSX +* + END diff --git a/dspl/liblapack/SRC/zhesv.f b/dspl/liblapack/SRC/zhesv.f new file mode 100644 index 0000000..e9f60bf --- /dev/null +++ b/dspl/liblapack/SRC/zhesv.f @@ -0,0 +1,271 @@ +*> \brief ZHESV computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**H or A = L*D*L**H as computed by +*> ZHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by ZHETRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> ZHETRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF, ZHETRS, ZHETRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL ZHETRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV +* + END diff --git a/dspl/liblapack/SRC/zhesv_aa.f b/dspl/liblapack/SRC/zhesv_aa.f new file mode 100644 index 0000000..bbd0fdf --- /dev/null +++ b/dspl/liblapack/SRC/zhesv_aa.f @@ -0,0 +1,252 @@ +*> \brief ZHESV_AA computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESV_AA computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and tridiagonal. The factored form +*> of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**H or A = L*T*L**H as computed by +*> ZHETRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for best +*> performance LWORK >= max(1,N*NB), where NB is the optimal +*> blocksize for ZHETRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_HETRF, LWKOPT_HETRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF_AA, ZHETRS_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_HETRF = INT( WORK(1) ) + CALL ZHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_HETRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV_AA +* + END diff --git a/dspl/liblapack/SRC/zhesv_aa_2stage.f b/dspl/liblapack/SRC/zhesv_aa_2stage.f new file mode 100644 index 0000000..a344400 --- /dev/null +++ b/dspl/liblapack/SRC/zhesv_aa_2stage.f @@ -0,0 +1,284 @@ +*> \brief ZHESV_AA_2STAGE computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is Hermitian and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, LWKOPT, NB, KB, NT, IINFO + COMPLEX PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF_AA_2STAGE, ZHETRS_AA_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhesv_rk.f b/dspl/liblapack/SRC/zhesv_rk.f new file mode 100644 index 0000000..da0e6f2 --- /dev/null +++ b/dspl/liblapack/SRC/zhesv_rk.f @@ -0,0 +1,317 @@ +*> \brief ZHESV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHESV_RK computes the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N Hermitian matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**H)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**H)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZHETRF_RK is called to compute the factorization of a complex +*> Hermitian matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine ZHETRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by ZHETRF_RK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of ZHETRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine ZHETRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of ZHETRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by ZHETRF_RK. +*> +*> For more info see the description of ZHETRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for ZHETRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHESV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF_RK, ZHETRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL ZHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = P*U*D*(U**H)*(P**T) or +* A = P*U*D*(U**H)*(P**T). +* + CALL ZHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL ZHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV_RK +* + END diff --git a/dspl/liblapack/SRC/zhesv_rook.f b/dspl/liblapack/SRC/zhesv_rook.f new file mode 100644 index 0000000..3922b43 --- /dev/null +++ b/dspl/liblapack/SRC/zhesv_rook.f @@ -0,0 +1,295 @@ +*> \brief \b ZHESV_ROOK computes the solution to a system of linear equations A * X = B for HE matrices using the bounded Bunch-Kaufman ("rook") diagonal pivoting method +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESV_ROOK computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The bounded Bunch-Kaufman ("rook") diagonal pivoting method is used +*> to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZHETRF_ROOK is called to compute the factorization of a complex +*> Hermition matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling ZHETRS_ROOK (uses BLAS 2). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**H or A = L*D*L**H as computed by +*> ZHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> ZHETRF_ROOK. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEsolve +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* +* ===================================================================== + SUBROUTINE ZHESV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRF_ROOK, ZHETRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZHETRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL ZHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL ZHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESV_ROOK +* + END diff --git a/dspl/liblapack/SRC/zhesvx.f b/dspl/liblapack/SRC/zhesvx.f new file mode 100644 index 0000000..d706326 --- /dev/null +++ b/dspl/liblapack/SRC/zhesvx.f @@ -0,0 +1,417 @@ +*> \brief ZHESVX computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESVX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, +*> where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form +*> of A. A, AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by ZHETRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by ZHETRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,2*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,2*N,N*NB), where +*> NB is the optimal blocksize for ZHETRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHECON, ZHERFS, ZHETRF, ZHETRS, ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 2*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHE( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHESVX +* + END diff --git a/dspl/liblapack/SRC/zhesvxx.f b/dspl/liblapack/SRC/zhesvxx.f new file mode 100644 index 0000000..375fc07 --- /dev/null +++ b/dspl/liblapack/SRC/zhesvxx.f @@ -0,0 +1,697 @@ +*> \brief ZHESVXX computes the solution to system of linear equations A * X = B for HE matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESVXX uses the diagonal pivoting factorization to compute the +*> solution to a complex*16 system of linear equations A * X = B, where +*> A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. ZHESVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> ZHESVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> ZHESVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what ZHESVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by DSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by ZHETRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16HEsolve +* +* ===================================================================== + SUBROUTINE ZHESVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, ZLA_HERPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLA_HERPVGRW +* .. +* .. External Subroutines .. + EXTERNAL ZHEEQUB, ZHETRF, ZHETRS, ZLACPY, + $ ZLAQHE, XERBLA, ZLASCL2, ZHERFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in ZHERFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until ZHERFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHESVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZHEEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL ZLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZHETRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF( N.GT.0 ) + $ RPVGRW = ZLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF( N.GT.0 ) + $ RPVGRW = ZLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, IPIV, + $ RWORK ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZHERFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL ZLASCL2 ( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of ZHESVXX +* + END diff --git a/dspl/liblapack/SRC/zheswapr.f b/dspl/liblapack/SRC/zheswapr.f new file mode 100644 index 0000000..1eec847 --- /dev/null +++ b/dspl/liblapack/SRC/zheswapr.f @@ -0,0 +1,203 @@ +*> \brief \b ZHESWAPR applies an elementary permutation on the rows and columns of a Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHESWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHESWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHESWAPR applies an elementary permutation on the rows and the columns of +*> a hermitian matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by CSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEauxiliary +* +* ===================================================================== + SUBROUTINE ZHESWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX*16 TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL ZSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 +* - swap A(I2,I1) and A(I1,I2) + + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=DCONJG(A(I1+I,I2)) + A(I1+I,I2)=DCONJG(TMP) + END DO +* + A(I1,I2)=DCONJG(A(I1,I2)) + +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from 1 to I1-1 + CALL ZSWAP ( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 +* - swap A(I2,I1) and A(I1,I2) + + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=DCONJG(A(I2,I1+I)) + A(I2,I1+I)=DCONJG(TMP) + END DO +* + A(I2,I1)=DCONJG(A(I2,I1)) +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + + END SUBROUTINE ZHESWAPR + diff --git a/dspl/liblapack/SRC/zhetd2.f b/dspl/liblapack/SRC/zhetd2.f new file mode 100644 index 0000000..6c5b8aa --- /dev/null +++ b/dspl/liblapack/SRC/zhetd2.f @@ -0,0 +1,334 @@ +*> \brief \b ZHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETD2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETD2 reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX*16 ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZHEMV, ZHER2, ZLARFG +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U') + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = DBLE( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL ZLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL ZHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL ZAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = DBLE( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = DBLE( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL ZHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = DBLE( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of ZHETD2 +* + END diff --git a/dspl/liblapack/SRC/zhetf2.f b/dspl/liblapack/SRC/zhetf2.f new file mode 100644 index 0000000..612d9c5 --- /dev/null +++ b/dspl/liblapack/SRC/zhetf2.f @@ -0,0 +1,661 @@ +*> \brief \b ZHETF2 computes the factorization of a complex Hermitian matrix, using the diagonal pivoting method (unblocked algorithm, calling Level 2 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETF2 computes the factorization of a complex Hermitian matrix A +*> using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**H is the conjugate transpose of U, and D is +*> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.210 and l.393 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + INTEGER IZAMAX + DOUBLE PRECISION DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAPY2, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* Test for interchange +* + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + DO 20 J = KP + 1, KK - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 20 CONTINUE + A( KP, KK ) = DCONJG( A( KP, KK ) ) + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = DBLE( A( K, K ) ) + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = DBLE( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H +* + IF( K.GT.2 ) THEN +* + D = DLAPY2( DBLE( A( K-1, K ) ), + $ DIMAG( A( K-1, K ) ) ) + D22 = DBLE( A( K-1, K-1 ) ) / D + D11 = DBLE( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = A( K-1, K ) / D + D = TT / D +* + DO 40 J = K - 2, 1, -1 + WKM1 = D*( D11*A( J, K-1 )-DCONJG( D12 )* + $ A( J, K ) ) + WK = D*( D22*A( J, K )-D12*A( J, K-1 ) ) + DO 30 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) - + $ A( I, K-1 )*DCONJG( WKM1 ) + 30 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 ) + 40 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 50 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 90 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* Test for interchange +* + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + DO 60 J = KK + 1, KP - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 60 CONTINUE + A( KP, KK ) = DCONJG( A( KP, KK ) ) + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 + IF( KSTEP.EQ.2 ) THEN + A( K, K ) = DBLE( A( K, K ) ) + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = DBLE( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = DLAPY2( DBLE( A( K+1, K ) ), + $ DIMAG( A( K+1, K ) ) ) + D11 = DBLE( A( K+1, K+1 ) ) / D + D22 = DBLE( A( K, K ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = A( K+1, K ) / D + D = TT / D +* + DO 80 J = K + 2, N + WK = D*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = D*( D22*A( J, K+1 )-DCONJG( D21 )* + $ A( J, K ) ) + DO 70 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) - + $ A( I, K+1 )*DCONJG( WKP1 ) + 70 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 ) + 80 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 50 +* + END IF +* + 90 CONTINUE + RETURN +* +* End of ZHETF2 +* + END diff --git a/dspl/liblapack/SRC/zhetf2_rk.f b/dspl/liblapack/SRC/zhetf2_rk.f new file mode 100644 index 0000000..84d3a02 --- /dev/null +++ b/dspl/liblapack/SRC/zhetf2_rk.f @@ -0,0 +1,1039 @@ +*> \brief \b ZHETF2_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHETF2_RK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* ====================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE, UPPER + INTEGER I, II, IMAX, ITEMP, J, JMAX, K, KK, KP, KSTEP, + $ P + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, DTEMP, + $ ROWMAX, TT, SFMIN + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. +* + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the leading submatrix A(1:k,1:k) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.GT.1 ) + $ CALL ZSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 14 J = P + 1, K - 1 + T = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( P, J ) ) + A( P, J ) = T + 14 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = DCONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( K, K ) ) + A( K, K ) = DBLE( A( P, P ) ) + A( P, P ) = R1 +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the leading submatrix A(1:k,1:k) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 15 J = KP + 1, KK - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 15 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = DCONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) +* (5) Swap row elements + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = DBLE( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( DBLE( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = DBLE( A( K, K ) ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* D = |A12| + D = DLAPY2( DBLE( A( K-1, K ) ), + $ DIMAG( A( K-1, K ) ) ) + D11 = A( K, K ) / D + D22 = A( K-1, K-1 ) / D + D12 = A( K-1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WKM1 = TT*( D11*A( J, K-1 )-DCONJG( D12 )* + $ A( J, K ) ) + WK = TT*( D22*A( J, K )-D12*A( J, K-1 ) ) +* +* Perform a rank-2 update of A(1:k-2,1:k-2) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*DCONJG( WK ) - + $ ( A( I, K-1 ) / D )*DCONJG( WKM1 ) + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D + A( J, K-1 ) = WKM1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), ZERO ) +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = CZERO + A( K-1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 42 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the trailing submatrix A(k:n,k:n) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.LT.N ) + $ CALL ZSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 44 J = K + 1, P - 1 + T = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( P, J ) ) + A( P, J ) = T + 44 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = DCONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( K, K ) ) + A( K, K ) = DBLE( A( P, P ) ) + A( P, P ) = R1 +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL ZSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the trailing submatrix A(k:n,k:n) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 45 J = KK + 1, KP - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 45 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = DCONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) +* (5) Swap row elements + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = DBLE( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of A now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* +* Handle division by a small number +* + IF( ABS( DBLE( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL ZDSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = DBLE( A( K, K ) ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* D = |A21| + D = DLAPY2( DBLE( A( K+1, K ) ), + $ DIMAG( A( K+1, K ) ) ) + D11 = DBLE( A( K+1, K+1 ) ) / D + D22 = DBLE( A( K, K ) ) / D + D21 = A( K+1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = TT*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = TT*( D22*A( J, K+1 )-DCONJG( D21 )* + $ A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*DCONJG( WK ) - + $ ( A( I, K+1 ) / D )*DCONJG( WKP1 ) + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D + A( J, K+1 ) = WKP1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), ZERO ) +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = CZERO + A( K+1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of ZHETF2_RK +* + END diff --git a/dspl/liblapack/SRC/zhetf2_rook.f b/dspl/liblapack/SRC/zhetf2_rook.f new file mode 100644 index 0000000..7d524cd --- /dev/null +++ b/dspl/liblapack/SRC/zhetf2_rook.f @@ -0,0 +1,910 @@ +*> \brief \b ZHETF2_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETF2_ROOK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**H is the conjugate transpose of U, and D is +*> Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ====================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE, UPPER + INTEGER I, II, IMAX, ITEMP, J, JMAX, K, KK, KP, KSTEP, + $ P + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, DTEMP, + $ ROWMAX, TT, SFMIN + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. +* + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the leading submatrix A(1:k,1:k) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.GT.1 ) + $ CALL ZSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 14 J = P + 1, K - 1 + T = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( P, J ) ) + A( P, J ) = T + 14 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = DCONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( K, K ) ) + A( K, K ) = DBLE( A( P, P ) ) + A( P, P ) = R1 + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the leading submatrix A(1:k,1:k) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 15 J = KP + 1, KK - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 15 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = DCONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) +* (5) Swap row elements + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K-1, K-1 ) = DBLE( A( K-1, K-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( ABS( DBLE( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = DBLE( A( K, K ) ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZHER( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* D = |A12| + D = DLAPY2( DBLE( A( K-1, K ) ), + $ DIMAG( A( K-1, K ) ) ) + D11 = A( K, K ) / D + D22 = A( K-1, K-1 ) / D + D12 = A( K-1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 30 J = K - 2, 1, -1 +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WKM1 = TT*( D11*A( J, K-1 )-DCONJG( D12 )* + $ A( J, K ) ) + WK = TT*( D22*A( J, K )-D12*A( J, K-1 ) ) +* +* Perform a rank-2 update of A(1:k-2,1:k-2) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*DCONJG( WK ) - + $ ( A( I, K-1 ) / D )*DCONJG( WKM1 ) + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D + A( J, K-1 ) = WKM1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), ZERO ) +* + 30 CONTINUE +* + END IF +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( A( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* BEGIN pivot search loop body +* +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( A( IMAX, IMAX ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 42 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* For only a 2x2 pivot, interchange rows and columns K and P +* in the trailing submatrix A(k:n,k:n) +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* (1) Swap columnar parts + IF( P.LT.N ) + $ CALL ZSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* (2) Swap and conjugate middle parts + DO 44 J = K + 1, P - 1 + T = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( P, J ) ) + A( P, J ) = T + 44 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( P, K ) = DCONJG( A( P, K ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( K, K ) ) + A( K, K ) = DBLE( A( P, P ) ) + A( P, P ) = R1 + END IF +* +* For both 1x1 and 2x2 pivots, interchange rows and +* columns KK and KP in the trailing submatrix A(k:n,k:n) +* + IF( KP.NE.KK ) THEN +* (1) Swap columnar parts + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* (2) Swap and conjugate middle parts + DO 45 J = KK + 1, KP - 1 + T = DCONJG( A( J, KK ) ) + A( J, KK ) = DCONJG( A( KP, J ) ) + A( KP, J ) = T + 45 CONTINUE +* (3) Swap and conjugate corner elements at row-col interserction + A( KP, KK ) = DCONJG( A( KP, KK ) ) +* (4) Swap diagonal elements at row-col intersection + R1 = DBLE( A( KK, KK ) ) + A( KK, KK ) = DBLE( A( KP, KP ) ) + A( KP, KP ) = R1 +* + IF( KSTEP.EQ.2 ) THEN +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) +* (5) Swap row elements + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + ELSE +* (*) Make sure that diagonal element of pivot is real + A( K, K ) = DBLE( A( K, K ) ) + IF( KSTEP.EQ.2 ) + $ A( K+1, K+1 ) = DBLE( A( K+1, K+1 ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of A now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* +* Handle division by a small number +* + IF( ABS( DBLE( A( K, K ) ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = ONE / DBLE( A( K, K ) ) + CALL ZHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL ZDSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = DBLE( A( K, K ) ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZHER( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* D = |A21| + D = DLAPY2( DBLE( A( K+1, K ) ), + $ DIMAG( A( K+1, K ) ) ) + D11 = DBLE( A( K+1, K+1 ) ) / D + D22 = DBLE( A( K, K ) ) / D + D21 = A( K+1, K ) / D + TT = ONE / ( D11*D22-ONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = TT*( D11*A( J, K )-D21*A( J, K+1 ) ) + WKP1 = TT*( D22*A( J, K+1 )-DCONJG( D21 )* + $ A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - + $ ( A( I, K ) / D )*DCONJG( WK ) - + $ ( A( I, K+1 ) / D )*DCONJG( WKP1 ) + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D + A( J, K+1 ) = WKP1 / D +* (*) Make sure that diagonal element of pivot is real + A( J, J ) = DCMPLX( DBLE( A( J, J ) ), ZERO ) +* + 60 CONTINUE +* + END IF +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of ZHETF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/zhetrd.f b/dspl/liblapack/SRC/zhetrd.f new file mode 100644 index 0000000..51c9fc2 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrd.f @@ -0,0 +1,378 @@ +*> \brief \b ZHETRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHER2K, ZHETD2, ZLATRD +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'ZHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'ZHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**H - W*V**H +* + CALL ZHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W**H - W*V**H +* + CALL ZHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRD +* + END diff --git a/dspl/liblapack/SRC/zhetrd_2stage.f b/dspl/liblapack/SRC/zhetrd_2stage.f new file mode 100644 index 0000000..9d6a426 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrd_2stage.f @@ -0,0 +1,337 @@ +*> \brief \b ZHETRD_2STAGE +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, +* HOUS2, LHOUS2, WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER VECT, UPLO +* INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), +* HOUS2( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD_2STAGE reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q1**H Q2**H* A * Q2 * Q1 = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> in particular for the second stage (Band to +*> tridiagonal) and thus LHOUS2 is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate Q1 Q2 or to apply Q1 Q2, +*> then LHOUS2 is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the band superdiagonal +*> of A are overwritten by the corresponding elements of the +*> internal band-diagonal matrix AB, and the elements above +*> the KD superdiagonal, with the array TAU, represent the unitary +*> matrix Q1 as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and band subdiagonal of A are over- +*> written by the corresponding elements of the internal band-diagonal +*> matrix AB, and the elements below the KD subdiagonal, with +*> the array TAU, represent the unitary matrix Q1 as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-KD) +*> The scalar factors of the elementary reflectors of +*> the first stage (see Further Details). +*> \endverbatim +*> +*> \param[out] HOUS2 +*> \verbatim +*> HOUS2 is COMPLEX*16 array, dimension LHOUS2, that +*> store the Householder representation of the stage2 +*> band to tridiagonal. +*> \endverbatim +*> +*> \param[in] LHOUS2 +*> \verbatim +*> LHOUS2 is INTEGER +*> The dimension of the array HOUS2. LHOUS2 = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS2 array, returns +*> this value as the first entry of the HOUS2 array, and no error +*> message related to LHOUS2 is issued by XERBLA. +*> LHOUS2 = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS2=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = max(stage1,stage2) + (KD+1)*N +*> = N*KD + N*max(KD+1,FACTOPTNB) +*> + max(2*KD*KD, KD*NTHREADS) +*> + (KD+1)*N +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_2STAGE( VECT, UPLO, N, A, LDA, D, E, TAU, + $ HOUS2, LHOUS2, WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER VECT, UPLO + INTEGER N, LDA, LWORK, LHOUS2, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), + $ HOUS2( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTQ + INTEGER KD, IB, LWMIN, LHMIN, LWRK, LDAB, WPOS, ABPOS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETRD_HE2HB, ZHETRD_HB2ST +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV2STAGE + EXTERNAL LSAME, ILAENV2STAGE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS2.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', VECT, N, -1, -1, -1 ) + IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', VECT, N, KD, IB, -1 ) +* WRITE(*,*),'ZHETRD_2STAGE N KD UPLO LHMIN LWMIN ',N, KD, UPLO, +* $ LHMIN, LWMIN +* + IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LHOUS2.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_2STAGE', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDAB = KD+1 + LWRK = LWORK-LDAB*N + ABPOS = 1 + WPOS = ABPOS + LDAB*N + CALL ZHETRD_HE2HB( UPLO, N, KD, A, LDA, WORK( ABPOS ), LDAB, + $ TAU, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HE2HB', -INFO ) + RETURN + END IF + CALL ZHETRD_HB2ST( 'Y', VECT, UPLO, N, KD, + $ WORK( ABPOS ), LDAB, D, E, + $ HOUS2, LHOUS2, WORK( WPOS ), LWRK, INFO ) + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HB2ST', -INFO ) + RETURN + END IF +* +* + HOUS2( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhetrd_hb2st.F b/dspl/liblapack/SRC/zhetrd_hb2st.F new file mode 100644 index 0000000..0739062 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrd_hb2st.F @@ -0,0 +1,587 @@ +*> \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD_HB2ST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, +* D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* #if defined(_OPENMP) +* use omp_lib +* #endif +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER STAGE1, UPLO, VECT +* INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] STAGE +*> \verbatim +*> STAGE is CHARACTER*1 +*> = 'N': "No": to mention that the stage 1 of the reduction +*> from dense to band using the zhetrd_he2hb routine +*> was not called before this routine to reproduce AB. +*> In other term this routine is called as standalone. +*> = 'Y': "Yes": to mention that the stage 1 of the +*> reduction from dense to band using the zhetrd_he2hb +*> routine has been called to produce AB (e.g., AB is +*> the output of zhetrd_he2hb. +*> \endverbatim +*> +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'N': No need for the Housholder representation, +*> and thus LHOUS is of size max(1, 4*N); +*> = 'V': the Householder representation is needed to +*> either generate or to apply Q later on, +*> then LHOUS is to be queried and computed. +*> (NOT AVAILABLE IN THIS RELEASE). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> On exit, the diagonal elements of AB are overwritten by the +*> diagonal elements of the tridiagonal matrix T; if KD > 0, the +*> elements on the first superdiagonal (if UPLO = 'U') or the +*> first subdiagonal (if UPLO = 'L') are overwritten by the +*> off-diagonal elements of T; the rest of AB is overwritten by +*> values generated during the reduction. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] HOUS +*> \verbatim +*> HOUS is COMPLEX*16 array, dimension LHOUS, that +*> store the Householder representation. +*> \endverbatim +*> +*> \param[in] LHOUS +*> \verbatim +*> LHOUS is INTEGER +*> The dimension of the array HOUS. LHOUS = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a query is assumed; the routine +*> only calculates the optimal size of the HOUS array, returns +*> this value as the first entry of the HOUS array, and no error +*> message related to LHOUS is issued by XERBLA. +*> LHOUS = MAX(1, dimension) where +*> dimension = 4*N if VECT='N' +*> not available now if VECT='H' +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK = MAX(1, dimension) +*> If LWORK = -1, or LHOUS=-1, +*> then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK = MAX(1, dimension) where +*> dimension = (2KD+1)*N + KD*NTHREADS +*> where KD is the blocking size of the reduction, +*> FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice +*> NTHREADS is the number of threads used when +*> openMP compilation is enabled, otherwise =1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, + $ D, E, HOUS, LHOUS, WORK, LWORK, INFO ) +* +* +#if defined(_OPENMP) + use omp_lib +#endif +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER STAGE1, UPLO, VECT + INTEGER N, KD, LDAB, LHOUS, LWORK, INFO +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AB( LDAB, * ), HOUS( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RZERO + COMPLEX*16 ZERO, ONE + PARAMETER ( RZERO = 0.0D+0, + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ, UPPER, AFTERS1 + INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, + $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED, + $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID, + $ NBTILES, TTYPE, TID, NTHREADS, DEBUG, + $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, + $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU, + $ SIZEV, SIZETAU, LDV, LHMIN, LWMIN + DOUBLE PRECISION ABSTMP + COMPLEX*16 TMP +* .. +* .. External Subroutines .. + EXTERNAL ZHB2ST_KERNELS, ZLACPY, ZLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX, CEILING, DBLE, REAL +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required. +* Test the input parameters +* + DEBUG = 0 + INFO = 0 + AFTERS1 = LSAME( STAGE1, 'Y' ) + WANTQ = LSAME( VECT, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 ) +* +* Determine the block size, the workspace size and the hous size. +* + IB = ILAENV( 18, 'ZHETRD_HB2ST', VECT, N, KD, -1, -1 ) + LHMIN = ILAENV( 19, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 ) + LWMIN = ILAENV( 20, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 ) +* + IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.(KD+1) ) THEN + INFO = -7 + ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HB2ST', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine pointer position +* + LDV = KD + IB + SIZETAU = 2 * N + SIZEV = 2 * N + INDTAU = 1 + INDV = INDTAU + SIZETAU + LDA = 2 * KD + 1 + SIZEA = LDA * N + INDA = 1 + INDW = INDA + SIZEA + NTHREADS = 1 + TID = 0 +* + IF( UPPER ) THEN + APOS = INDA + KD + AWPOS = INDA + DPOS = APOS + KD + OFDPOS = DPOS - 1 + ABDPOS = KD + 1 + ABOFDPOS = KD + ELSE + APOS = INDA + AWPOS = INDA + KD + 1 + DPOS = APOS + OFDPOS = DPOS + 1 + ABDPOS = 1 + ABOFDPOS = 2 + + ENDIF +* +* Case KD=0: +* The matrix is diagonal. We just copy it (convert to "real" for +* complex because D is double and the imaginary part should be 0) +* and store it in D. A sequential code here is better or +* in a parallel environment it might need two cores for D and E +* + IF( KD.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = DBLE( AB( ABDPOS, I ) ) + 30 CONTINUE + DO 40 I = 1, N-1 + E( I ) = RZERO + 40 CONTINUE +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Case KD=1: +* The matrix is already Tridiagonal. We have to make diagonal +* and offdiagonal elements real, and store them in D and E. +* For that, for real precision just copy the diag and offdiag +* to D and E while for the COMPLEX case the bulge chasing is +* performed to convert the hermetian tridiagonal to symmetric +* tridiagonal. A simpler coversion formula might be used, but then +* updating the Q matrix will be required and based if Q is generated +* or not this might complicate the story. +* + IF( KD.EQ.1 ) THEN + DO 50 I = 1, N + D( I ) = DBLE( AB( ABDPOS, I ) ) + 50 CONTINUE +* +* make off-diagonal elements real and copy them to E +* + IF( UPPER ) THEN + DO 60 I = 1, N - 1 + TMP = AB( ABOFDPOS, I+1 ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I+1 ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP +C IF( WANTZ ) THEN +C CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 ) +C END IF + 60 CONTINUE + ELSE + DO 70 I = 1, N - 1 + TMP = AB( ABOFDPOS, I ) + ABSTMP = ABS( TMP ) + AB( ABOFDPOS, I ) = ABSTMP + E( I ) = ABSTMP + IF( ABSTMP.NE.RZERO ) THEN + TMP = TMP / ABSTMP + ELSE + TMP = ONE + END IF + IF( I.LT.N-1 ) + $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP +C IF( WANTQ ) THEN +C CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 ) +C END IF + 70 CONTINUE + ENDIF +* + HOUS( 1 ) = 1 + WORK( 1 ) = 1 + RETURN + END IF +* +* Main code start here. +* Reduce the hermitian band of A to a tridiagonal matrix. +* + THGRSIZ = N + GRSIZ = 1 + SHIFT = 3 + NBTILES = CEILING( REAL(N)/REAL(KD) ) + STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) ) + THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) ) +* + CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA ) + CALL ZLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA ) +* +* +* openMP parallelisation start here +* +#if defined(_OPENMP) +!$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND ) +!$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) +!$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND ) +!$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK) +!$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA ) +!$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT ) +!$OMP MASTER +#endif +* +* main bulge chasing loop +* + DO 100 THGRID = 1, THGRNB + STT = (THGRID-1)*THGRSIZ+1 + THED = MIN( (STT + THGRSIZ -1), (N-1)) + DO 110 I = STT, N-1 + ED = MIN( I, THED ) + IF( STT.GT.ED ) EXIT + DO 120 M = 1, STEPERCOL + ST = STT + DO 130 SWEEPID = ST, ED + DO 140 K = 1, GRSIZ + MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ) + $ + (M-1)*GRSIZ + K + IF ( MYID.EQ.1 ) THEN + TTYPE = 1 + ELSE + TTYPE = MOD( MYID, 2 ) + 2 + ENDIF + + IF( TTYPE.EQ.2 ) THEN + COLPT = (MYID/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + BLKLASTIND = COLPT + ELSE + COLPT = ((MYID+1)/2)*KD + SWEEPID + STIND = COLPT-KD+1 + EDIND = MIN(COLPT,N) + IF( ( STIND.GE.EDIND-1 ).AND. + $ ( EDIND.EQ.N ) ) THEN + BLKLASTIND = N + ELSE + BLKLASTIND = 0 + ENDIF + ENDIF +* +* Call the kernel +* +#if defined(_OPENMP) + IF( TTYPE.NE.1 ) THEN +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(in:WORK(MYID-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ELSE +!$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1)) +!$OMP$ DEPEND(out:WORK(MYID)) + TID = OMP_GET_THREAD_NUM() + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +!$OMP END TASK + ENDIF +#else + CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, + $ STIND, EDIND, SWEEPID, N, KD, IB, + $ WORK ( INDA ), LDA, + $ HOUS( INDV ), HOUS( INDTAU ), LDV, + $ WORK( INDW + TID*KD ) ) +#endif + IF ( BLKLASTIND.GE.(N-1) ) THEN + STT = STT + 1 + EXIT + ENDIF + 140 CONTINUE + 130 CONTINUE + 120 CONTINUE + 110 CONTINUE + 100 CONTINUE +* +#if defined(_OPENMP) +!$OMP END MASTER +!$OMP END PARALLEL +#endif +* +* Copy the diagonal from A to D. Note that D is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + DO 150 I = 1, N + D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) ) + 150 CONTINUE +* +* Copy the off diagonal from A to E. Note that E is REAL thus only +* the Real part is needed, the imaginary part should be zero. +* + IF( UPPER ) THEN + DO 160 I = 1, N-1 + E( I ) = DBLE( WORK( OFDPOS+I*LDA ) ) + 160 CONTINUE + ELSE + DO 170 I = 1, N-1 + E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) ) + 170 CONTINUE + ENDIF +* + HOUS( 1 ) = LHMIN + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_HB2ST +* + END + diff --git a/dspl/liblapack/SRC/zhetrd_he2hb.f b/dspl/liblapack/SRC/zhetrd_he2hb.f new file mode 100644 index 0000000..e35578b --- /dev/null +++ b/dspl/liblapack/SRC/zhetrd_he2hb.f @@ -0,0 +1,517 @@ +*> \brief \b ZHETRD_HE2HB +* +* @precisions fortran z -> s d c +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD_HE2HB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, +* WORK, LWORK, INFO ) +* +* IMPLICIT NONE +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AB( LDAB, * ), +* TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian +*> band-diagonal form AB by a unitary similarity transformation: +*> Q**H * A * Q = AB. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the reduced matrix if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> The reduced matrix is stored in the array AB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On exit, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-KD) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, if INFO = 0, or if LWORK=-1, +*> WORK(1) returns the size of LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK which should be calculated +*> by a workspace query. LWORK = MAX(1, LWORK_QUERY) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD +*> where FACTOPTNB is the blocking used by the QR or LQ +*> algorithm, usually FACTOPTNB=128 is a good choice otherwise +*> putting LWORK=-1 will provide the size of WORK. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Implemented by Azzam Haidar. +*> +*> All details are available on technical report, SC11, SC13 papers. +*> +*> Azzam Haidar, Hatem Ltaief, and Jack Dongarra. +*> Parallel reduction to condensed forms for symmetric eigenvalue problems +*> using aggregated fine-grained and memory-aware kernels. In Proceedings +*> of 2011 International Conference for High Performance Computing, +*> Networking, Storage and Analysis (SC '11), New York, NY, USA, +*> Article 8 , 11 pages. +*> http://doi.acm.org/10.1145/2063384.2063394 +*> +*> A. Haidar, J. Kurzak, P. Luszczek, 2013. +*> An improved parallel singular value algorithm and its implementation +*> for multicore hardware, In Proceedings of 2013 International Conference +*> for High Performance Computing, Networking, Storage and Analysis (SC '13). +*> Denver, Colorado, USA, 2013. +*> Article 90, 12 pages. +*> http://doi.acm.org/10.1145/2503210.2503292 +*> +*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. +*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure +*> calculations based on fine-grained memory aware tasks. +*> International Journal of High Performance Computing Applications. +*> Volume 28 Issue 2, Pages 196-209, May 2014. +*> http://hpc.sagepub.com/content/28/2/196 +*> +*> \endverbatim +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in +*> A(i,i+kd+1:n), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(k), where k = n-kd. +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in +*> A(i+kd+2:n,i), and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( ab ab/v1 v1 v1 v1 ) ( ab ) +*> ( ab ab/v2 v2 v2 ) ( ab/v1 ab ) +*> ( ab ab/v3 v3 ) ( v1 ab/v2 ab ) +*> ( ab ab/v4 ) ( v1 v2 ab/v3 ab ) +*> ( ab ) ( v1 v2 v3 ab/v4 ab ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, + $ WORK, LWORK, INFO ) +* + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAB, LWORK, N, KD +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AB( LDAB, * ), + $ TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION RONE + COMPLEX*16 ZERO, ONE, HALF + PARAMETER ( RONE = 1.0D+0, + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, J, IINFO, LWMIN, PN, PK, LK, + $ LDT, LDW, LDS2, LDS1, + $ LS2, LS1, LW, LT, + $ TPOS, WPOS, S2POS, S1POS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY, + $ ZLARFT, ZGELQF, ZGEQRF, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Determine the minimal workspace size required +* and test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + LWMIN = ILAENV( 20, 'ZHETRD_HE2HB', '', N, KD, -1, -1 ) + + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD_HE2HB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWMIN + RETURN + END IF +* +* Quick return if possible +* Copy the upper/lower portion of A into AB +* + IF( N.LE.KD+1 ) THEN + IF( UPPER ) THEN + DO 100 I = 1, N + LK = MIN( KD+1, I ) + CALL ZCOPY( LK, A( I-LK+1, I ), 1, + $ AB( KD+1-LK+1, I ), 1 ) + 100 CONTINUE + ELSE + DO 110 I = 1, N + LK = MIN( KD+1, N-I+1 ) + CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 ) + 110 CONTINUE + ENDIF + WORK( 1 ) = 1 + RETURN + END IF +* +* Determine the pointer position for the workspace +* + LDT = KD + LDS1 = KD + LT = LDT*KD + LW = N*KD + LS1 = LDS1*KD + LS2 = LWMIN - LT - LW - LS1 +* LS2 = N*MAX(KD,FACTOPTNB) + TPOS = 1 + WPOS = TPOS + LT + S1POS = WPOS + LW + S2POS = S1POS + LS1 + IF( UPPER ) THEN + LDW = KD + LDS2 = KD + ELSE + LDW = N + LDS2 = N + ENDIF +* +* +* Set the workspace of the triangular matrix T to zero once such a +* way everytime T is generated the upper/lower portion will be always zero +* + CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT ) +* + IF( UPPER ) THEN + DO 10 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the LQ factorization of the current block +* + CALL ZGELQF( KD, PN, A( I, I+KD ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 20 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 20 CONTINUE +* + CALL ZLASET( 'Lower', PK, PK, ZERO, ONE, + $ A( I, I+KD ), LDA ) +* +* Form the matrix T +* + CALL ZLARFT( 'Forward', 'Rowwise', PN, PK, + $ A( I, I+KD ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK, + $ ONE, WORK( TPOS ), LDT, + $ A( I, I+KD ), LDA, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL ZHEMM( 'Right', UPLO, PK, PN, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN, + $ ONE, WORK( WPOS ), LDW, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK, + $ -HALF, WORK( S1POS ), LDS1, + $ A( I, I+KD ), LDA, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V'*W - W'*V +* + CALL ZHER2K( UPLO, 'Conjugate', PN, PK, + $ -ONE, A( I, I+KD ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) + 10 CONTINUE +* +* Copy the upper band to AB which is the band storage matrix +* + DO 30 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 ) + 30 CONTINUE +* + ELSE +* +* Reduce the lower triangle of A to lower band matrix +* + DO 40 I = 1, N - KD, KD + PN = N-I-KD+1 + PK = MIN( N-I-KD+1, KD ) +* +* Compute the QR factorization of the current block +* + CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA, + $ TAU( I ), WORK( S2POS ), LS2, IINFO ) +* +* Copy the upper portion of A into AB +* + DO 50 J = I, I+PK-1 + LK = MIN( KD, N-J ) + 1 + CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 50 CONTINUE +* + CALL ZLASET( 'Upper', PK, PK, ZERO, ONE, + $ A( I+KD, I ), LDA ) +* +* Form the matrix T +* + CALL ZLARFT( 'Forward', 'Columnwise', PN, PK, + $ A( I+KD, I ), LDA, TAU( I ), + $ WORK( TPOS ), LDT ) +* +* Compute W: +* + CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ ONE, A( I+KD, I ), LDA, + $ WORK( TPOS ), LDT, + $ ZERO, WORK( S2POS ), LDS2 ) +* + CALL ZHEMM( 'Left', UPLO, PN, PK, + $ ONE, A( I+KD, I+KD ), LDA, + $ WORK( S2POS ), LDS2, + $ ZERO, WORK( WPOS ), LDW ) +* + CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN, + $ ONE, WORK( S2POS ), LDS2, + $ WORK( WPOS ), LDW, + $ ZERO, WORK( S1POS ), LDS1 ) +* + CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK, + $ -HALF, A( I+KD, I ), LDA, + $ WORK( S1POS ), LDS1, + $ ONE, WORK( WPOS ), LDW ) +* +* +* Update the unreduced submatrix A(i+kd:n,i+kd:n), using +* an update of the form: A := A - V*W' - W*V' +* + CALL ZHER2K( UPLO, 'No transpose', PN, PK, + $ -ONE, A( I+KD, I ), LDA, + $ WORK( WPOS ), LDW, + $ RONE, A( I+KD, I+KD ), LDA ) +* ================================================================== +* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED +* DO 45 J = I, I+PK-1 +* LK = MIN( KD, N-J ) + 1 +* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 ) +* 45 CONTINUE +* ================================================================== + 40 CONTINUE +* +* Copy the lower band to AB which is the band storage matrix +* + DO 60 J = N-KD+1, N + LK = MIN(KD, N-J) + 1 + CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 ) + 60 CONTINUE + + END IF +* + WORK( 1 ) = LWMIN + RETURN +* +* End of ZHETRD_HE2HB +* + END diff --git a/dspl/liblapack/SRC/zhetrf.f b/dspl/liblapack/SRC/zhetrf.f new file mode 100644 index 0000000..3866abd --- /dev/null +++ b/dspl/liblapack/SRC/zhetrf.f @@ -0,0 +1,357 @@ +*> \brief \b ZHETRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRF computes the factorization of a complex Hermitian matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHETF2, ZLAHEF +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZHETRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLAHEF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZHETF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLAHEF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRF +* + END diff --git a/dspl/liblapack/SRC/zhetrf_aa.f b/dspl/liblapack/SRC/zhetrf_aa.f new file mode 100644 index 0000000..e355aed --- /dev/null +++ b/dspl/liblapack/SRC/zhetrf_aa.f @@ -0,0 +1,469 @@ +*> \brief \b ZHETRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRF_AA computes the factorization of a complex hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**H or A = L*T*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = (0.0D+0, 0.0D+0), ONE = (1.0D+0, 0.0D+0) ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX*16 ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLAHEF_AA, ZGEMM, ZGEMV, ZCOPY, ZSCAL, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'ZHETRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + A( 1, 1 ) = DBLE( A( 1, 1 ) ) + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**H using the upper triangle of A +* ..................................................... +* +* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLAHEF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* if the first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = DCONJG( A( J, J+1 ) ) + A( J, J+1 ) = ONE + CALL ZCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=0 and K2=1 for the first panel, +* and K1=1 and K2=0 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMM( 'Conjugate transpose', 'Transpose', + $ 1, MJ, JB+1, + $ -ONE, A( J1-K2, J3 ), LDA, + $ WORK( (J3-J1+1)+K1*N ), N, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with ZGEMM +* + CALL ZGEMM( 'Conjugate transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( (J3-J1+1)+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = DCONJG( ALPHA ) + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**H using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLAHEF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if the first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = DCONJG( A( J+1, J ) ) + A( J+1, J ) = ONE + CALL ZCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=0 and K2=1 for the first panel, +* and K1=1 and K2=0 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ MJ, 1, JB+1, + $ -ONE, WORK( (J3-J1+1)+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block column with ZGEMM +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( (J3-J1+1)+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = DCONJG( ALPHA ) + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of ZHETRF_AA +* + END diff --git a/dspl/liblapack/SRC/zhetrf_aa_2stage.f b/dspl/liblapack/SRC/zhetrf_aa_2stage.f new file mode 100644 index 0000000..4d62198 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrf_aa_2stage.f @@ -0,0 +1,663 @@ +*> \brief \b ZHETRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRF_AA_2STAGE computes the factorization of a double hermitian matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a hermitian band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), + $ ONE = ( 1.0E+0, 0.0E+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX*16 PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACGV, ZLACPY, + $ ZLASET, ZGBTRF, ZGEMM, ZGETRF, + $ ZHEGST, ZSWAP, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'ZHETRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL ZHEGST( 1, 'Upper', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = DCONJG( TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'Conjugate transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -ONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ ONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call ZGETRF +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB +* +* Copy only L-factor +* + CALL ZCOPY( N-K-(J+1)*NB, + $ WORK( K+1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+K+1 ), LDA ) +* +* Transpose U-factor to be copied back into T(J+1, J) +* + CALL ZLACGV( K, WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB) , LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'U', 'N', 'U', KB, NB, ONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = DCONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL ZLASET( 'Lower', KB, NB, ZERO, ONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) + CALL ZLACGV( I2-I1, A( I1, I1+1 ), LDA ) + CALL ZLACGV( I2-I1-1, A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ NB, KB, JB, + $ ONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ ZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -ONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ ONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ ZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB, + $ -ONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF + IF( J.GT.0 ) THEN + CALL ZHEGST( 1, 'Lower', KB, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + TB( TD+1 + (J*NB+I-1)*LDTB ) + $ = REAL( TB( TD+1 + (J*NB+I-1)*LDTB ) ) + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = DCONJG( TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, KB, + $ ONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'Conjugate transpose', + $ KB, KB, NB+KB, + $ ONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ ZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -ONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ ONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, ZERO, ZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB) , LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'L', 'C', 'U', KB, NB, ONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = DCONJG( TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) ) + END DO + END DO + CALL ZLASET( 'Upper', KB, NB, ZERO, ONE, + $ A( (J+1)*NB+1, J*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) + CALL ZLACGV( I2-I1, A( I1+1, I1 ), 1 ) + CALL ZLACGV( I2-I1-1, A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL ZLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of ZHETRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhetrf_rk.f b/dspl/liblapack/SRC/zhetrf_rk.f new file mode 100644 index 0000000..42b829e --- /dev/null +++ b/dspl/liblapack/SRC/zhetrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b ZHETRF_RK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHETRF_RK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLAHEF_RK, ZHETF2_RK, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZHETRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZHETRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLAHEF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLAHEF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZHETF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL ZSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLAHEF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLAHEF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZHETF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL ZSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRF_RK +* + END diff --git a/dspl/liblapack/SRC/zhetrf_rook.f b/dspl/liblapack/SRC/zhetrf_rook.f new file mode 100644 index 0000000..afbad21 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrf_rook.f @@ -0,0 +1,397 @@ +*> \brief \b ZHETRF_ROOK computes the factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRF_ROOK computes the factorization of a complex Hermitian matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.6.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLAHEF_ROOK, ZHETF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZHETRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZHETRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLAHEF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLAHEF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZHETF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLAHEF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLAHEF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZHETF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/zhetri.f b/dspl/liblapack/SRC/zhetri.f new file mode 100644 index 0000000..020fe65 --- /dev/null +++ b/dspl/liblapack/SRC/zhetri.f @@ -0,0 +1,397 @@ +*> \brief \b ZHETRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRI computes the inverse of a complex Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> ZHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZHETRF. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, ZERO + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + DOUBLE PRECISION AK, AKP1, D, T + COMPLEX*16 AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHEMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = DBLE( A( K, K ) ) / T + AKP1 = DBLE( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + DO 40 J = KP + 1, K - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE + A( KP, K ) = DCONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = DBLE( A( K-1, K-1 ) ) / T + AKP1 = DBLE( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DBLE( ZDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + DO 70 J = K + 1, KP - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 70 CONTINUE + A( KP, K ) = DCONJG( A( KP, K ) ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZHETRI +* + END diff --git a/dspl/liblapack/SRC/zhetri2.f b/dspl/liblapack/SRC/zhetri2.f new file mode 100644 index 0000000..a7acff4 --- /dev/null +++ b/dspl/liblapack/SRC/zhetri2.f @@ -0,0 +1,205 @@ +*> \brief \b ZHETRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRI2 computes the inverse of a COMPLEX*16 hermitian indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> ZHETRF. ZHETRI2 set the LEADING DIMENSION of the workspace +*> before calling ZHETRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZHETRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZHETRI2X, ZHETRI, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL ZHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of ZHETRI2 +* + END diff --git a/dspl/liblapack/SRC/zhetri2x.f b/dspl/liblapack/SRC/zhetri2x.f new file mode 100644 index 0000000..ab35e70 --- /dev/null +++ b/dspl/liblapack/SRC/zhetri2x.f @@ -0,0 +1,590 @@ +*> \brief \b ZHETRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRI2X computes the inverse of a COMPLEX*16 Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> ZHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZHETRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, ZERO + PARAMETER ( ONE = 1.0D+0, + $ CONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + COMPLEX*16 AK, AKKP1, AKP1, D, T + COMPLEX*16 U01_I_J, U01_IP1_J + COMPLEX*16 U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSYCONV, XERBLA, ZTRTRI + EXTERNAL ZGEMM, ZTRMM, ZHESWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL ZSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**H)*inv(D)*inv(U)*P**H. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / REAL ( A( K, K ) ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = ABS ( WORK(K+1,1) ) + AK = REAL ( A( K, K ) ) / T + AKP1 = REAL ( A( K+1, K+1 ) ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = DCONJG (WORK(K,INVD+1) ) + K=K+2 + END IF + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=CONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**H*invD1*U11->U11 +* + CALL ZTRMM('L','U','C','U',NNB, NNB, + $ CONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**H*invD*U01->A(CUT+I,CUT+J) +* + CALL ZGEMM('C','N',NNB,NNB,CUT,CONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) +* +* U11 = U11**H*invD1*U11 + U01**H*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**H*invD0*U01 +* + CALL ZTRMM('L',UPLO,'C','U',CUT, NNB, + $ CONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**H: P * inv(U**H)*inv(D)*inv(U) *P**H +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL ZHESWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**H)*inv(D)*inv(U)*P**H. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = ONE / REAL ( A( K, K ) ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = ABS ( WORK(K-1,1) ) + AK = REAL ( A( K-1, K-1 ) ) / T + AKP1 = REAL ( A( K, K ) ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = DCONJG (WORK(K,INVD+1) ) + K=K-2 + END IF + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GE. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=CONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**H*invD1*L11->L11 +* + CALL ZTRMM('L',UPLO,'C','U',NNB, NNB, + $ CONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**H*invD2*L21->A(CUT+I,CUT+J) +* + CALL ZGEMM('C','N',NNB,NNB,N-NNB-CUT,CONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**H*invD1*L11 + U01**H*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**H*invD2*L21 +* + CALL ZTRMM('L',UPLO,'C','U', N-NNB-CUT, NNB, + $ CONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) + +* Update L21 + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + ELSE +* +* L11 = L11**H*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**H: P * inv(U**H)*inv(D)*inv(U) *P**H +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of ZHETRI2X +* + END + diff --git a/dspl/liblapack/SRC/zhetri_3.f b/dspl/liblapack/SRC/zhetri_3.f new file mode 100644 index 0000000..14be660 --- /dev/null +++ b/dspl/liblapack/SRC/zhetri_3.f @@ -0,0 +1,248 @@ +*> \brief \b ZHETRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHETRI_3 computes the inverse of a complex Hermitian indefinite +*> matrix A using the factorization computed by ZHETRF_RK or ZHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZHETRI_3 sets the leading dimension of the workspace before calling +*> ZHETRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by ZHETRF_RK and ZHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the Hermitian inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_RK or ZHETRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZHETRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'ZHETRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHETRI_3 +* + END diff --git a/dspl/liblapack/SRC/zhetri_3x.f b/dspl/liblapack/SRC/zhetri_3x.f new file mode 100644 index 0000000..d7c00c8 --- /dev/null +++ b/dspl/liblapack/SRC/zhetri_3x.f @@ -0,0 +1,649 @@ +*> \brief \b ZHETRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHETRI_3X computes the inverse of a complex Hermitian indefinite +*> matrix A using the factorization computed by ZHETRF_RK or ZHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by ZHETRF_RK and ZHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the Hermitian inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_RK or ZHETRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + DOUBLE PRECISION AK, AKP1, T + COMPLEX*16 AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J, + $ U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZHESWAPR, ZTRTRI, ZTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**H) * inv(D) * inv(U) * P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / DBLE( A( K, K ) ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = ABS( WORK( K+1, 1 ) ) + AK = DBLE( A( K, K ) ) / T + AKP1 = DBLE( A( K+1, K+1 ) ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = DCONJG( WORK( K, INVD+1 ) ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**H) = (inv(U))**H +* +* inv(U**H) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = CONE + DO J = 1, I-1 + WORK( U11+I, J ) = CZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**H * invD1 * U11 -> U11 +* + CALL ZTRMM( 'L', 'U', 'C', 'U', NNB, NNB, + $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**H * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL ZGEMM( 'C', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1), + $ N+NB+1 ) + +* +* U11 = U11**H * invD1 * U11 + U01**H * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**H * invD0 * U01 +* + CALL ZTRMM( 'L', UPLO, 'C', 'U', CUT, NNB, + $ CONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**H) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**H) * inv(D) * inv(L) * P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = ONE / DBLE( A( K, K ) ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = ABS( WORK( K-1, 1 ) ) + AK = DBLE( A( K-1, K-1 ) ) / T + AKP1 = DBLE( A( K, K ) ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = DCONJG( WORK( K, INVD+1 ) ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**H) = (inv(L))**H +* +* inv(L**H) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = CONE + DO J = I+1, NNB + WORK( U11+I, J ) = CZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**H * invD1 * L11 -> L11 +* + CALL ZTRMM( 'L', UPLO, 'C', 'U', NNB, NNB, CONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**H * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL ZGEMM( 'C', 'N', NNB, NNB, N-NNB-CUT, CONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ CZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**H * invD1 * L11 + U01**H * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**H * invD2 * L21 +* + CALL ZTRMM( 'L', UPLO, 'C', 'U', N-NNB-CUT, NNB, CONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**H * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**H) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of ZHETRI_3X +* + END diff --git a/dspl/liblapack/SRC/zhetri_rook.f b/dspl/liblapack/SRC/zhetri_rook.f new file mode 100644 index 0000000..68b88ac --- /dev/null +++ b/dspl/liblapack/SRC/zhetri_rook.f @@ -0,0 +1,516 @@ +*> \brief \b ZHETRI_ROOK computes the inverse of HE matrix using the factorization obtained with the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRI_ROOK computes the inverse of a complex Hermitian indefinite matrix +*> A using the factorization A = U*D*U**H or A = L*D*L**H computed by +*> ZHETRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZHETRF_ROOK. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, CZERO + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP, KSTEP + DOUBLE PRECISION AK, AKP1, D, T + COMPLEX*16 AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZHEMV, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 70 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K+1 ) ) + AK = DBLE( A( K, K ) ) / T + AKP1 = DBLE( A( K+1, K+1 ) ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1, + $ K ), 1 ) ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, A( 1, K+1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k,1:k) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 40 J = KP + 1, K - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 40 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1) in the leading submatrix A(k+1:n,k+1:n) +* +* (1) Interchange rows and columns K and -IPIV(K) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 50 J = KP + 1, K - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 50 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP +* + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* +* (2) Interchange rows and columns K+1 and -IPIV(K+1) +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) +* + DO 60 J = KP + 1, K - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 60 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 70 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 80 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 120 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / DBLE( A( K, K ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( A( K, K-1 ) ) + AK = DBLE( A( K-1, K-1 ) ) / T + AKP1 = DBLE( A( K, K ) ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1, + $ A( K+1, K ), 1 ) ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK, + $ 1, CZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ DBLE( ZDOTC( N-K, WORK, 1, A( K+1, K-1 ), + $ 1 ) ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k:n,k:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 90 J = K + 1, KP - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 90 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* +* (1) Interchange rows and columns K and -IPIV(K) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 100 J = K + 1, KP - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 100 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP +* + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* +* (2) Interchange rows and columns K-1 and -IPIV(K-1) +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) +* + DO 110 J = K + 1, KP - 1 + TEMP = DCONJG( A( J, K ) ) + A( J, K ) = DCONJG( A( KP, J ) ) + A( KP, J ) = TEMP + 110 CONTINUE +* + A( KP, K ) = DCONJG( A( KP, K ) ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 80 + 120 CONTINUE + END IF +* + RETURN +* +* End of ZHETRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/zhetrs.f b/dspl/liblapack/SRC/zhetrs.f new file mode 100644 index 0000000..5af9542 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs.f @@ -0,0 +1,469 @@ +*> \brief \b ZHETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / DCONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / DCONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / DCONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZHETRS +* + END diff --git a/dspl/liblapack/SRC/zhetrs2.f b/dspl/liblapack/SRC/zhetrs2.f new file mode 100644 index 0000000..77f60d4 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs2.f @@ -0,0 +1,359 @@ +*> \brief \b ZHETRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS2 solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHETRF and converted by ZSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D+0,0.0D+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSYCONV, ZSWAP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL ZSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + S = DBLE( ONE ) / DBLE( A( I, I ) ) + CALL ZDSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / DCONJG( AKM1K ) + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**H \ B) -> B [ U**H \ (D \ (U \P**T * B) ) ] +* + CALL ZTRSM('L','U','C','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**H \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + S = DBLE( ONE ) / DBLE( A( I, I ) ) + CALL ZDSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / DCONJG( AKM1K ) + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / DCONJG( AKM1K ) + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**H \ B) -> B [ L**H \ (D \ (L \P**T * B) ) ] +* + CALL ZTRSM('L','L','C','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**H \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL ZSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of ZHETRS2 +* + END diff --git a/dspl/liblapack/SRC/zhetrs_3.f b/dspl/liblapack/SRC/zhetrs_3.f new file mode 100644 index 0000000..937544e --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs_3.f @@ -0,0 +1,374 @@ +*> \brief \b ZHETRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZHETRS_3 solves a system of linear equations A * X = B with a complex +*> Hermitian matrix A using the factorization computed +*> by ZHETRF_RK or ZHETRF_BK: +*> +*> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**H (or L**H) is the conjugate of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is Hermitian and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**H)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**H)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by ZHETRF_RK and ZHETRF_BK: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_RK or ZHETRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0,0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSWAP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**H. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + S = DBLE( ONE ) / DBLE( A( I, I ) ) + CALL ZDSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / DCONJG( AKM1K ) + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**H \ B) -> B [ U**H \ (D \ (U \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'C', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**H \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**H. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + S = DBLE( ONE ) / DBLE( A( I, I ) ) + CALL ZDSCAL( NRHS, S, B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / DCONJG( AKM1K ) + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / DCONJG( AKM1K ) + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**H \ B) -> B [ L**H \ (D \ (L \P**T * B) ) ] +* + CALL ZTRSM('L', 'L', 'C', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**H \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of ZHETRS_3 +* + END diff --git a/dspl/liblapack/SRC/zhetrs_aa.f b/dspl/liblapack/SRC/zhetrs_aa.f new file mode 100644 index 0000000..9d302b9 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs_aa.f @@ -0,0 +1,288 @@ +*> \brief \b ZHETRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS_AA solves a system of linear equations A*X = B with a complex +*> hermitian matrix A using the factorization A = U*T*U**H or +*> A = L*T*L**T computed by ZHETRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**H; +*> = 'L': Lower triangular, form is A = L*T*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Details of factors computed by ZHETRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by ZHETRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZHETRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGTSV, ZSWAP, ZTRSM, ZLACGV, ZLACPY, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM('L', 'U', 'C', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1) + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1) + CALL ZLACGV( N-1, WORK( 1 ), 1 ) + END IF + CALL ZGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B(2, 1), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B(2, 1), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1) + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1) + CALL ZLACGV( N-1, WORK( 2*N ), 1 ) + END IF + CALL ZGTSV(N, NRHS, WORK(1), WORK(N), WORK(2*N), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'C', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of ZHETRS_AA +* + END diff --git a/dspl/liblapack/SRC/zhetrs_aa_2stage.f b/dspl/liblapack/SRC/zhetrs_aa_2stage.f new file mode 100644 index 0000000..02e1747 --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs_aa_2stage.f @@ -0,0 +1,283 @@ +*> \brief \b ZHETRS_AA_2STAGE +* +* @generated from SRC/dsytrs_aa_2stage.f, fortran d -> c, Mon Oct 30 11:59:02 2017 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS_AA_2STAGE solves a system of linear equations A*X = B with a +*> hermitian matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZHETRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16array, dimension (LDA,N) +*> Details of factors computed by ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16array, dimension (LTB) +*> Details of factors computed by ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZHETRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZHETRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGBTRS, ZLASWP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'C', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'C', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of ZHETRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zhetrs_rook.f b/dspl/liblapack/SRC/zhetrs_rook.f new file mode 100644 index 0000000..f99697c --- /dev/null +++ b/dspl/liblapack/SRC/zhetrs_rook.f @@ -0,0 +1,503 @@ +*> \brief \b ZHETRS_ROOK computes the solution to a system of linear equations A * X = B for HE matrices using factorization obtained with one of the bounded diagonal pivoting methods (max 2 interchanges) +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRS_ROOK solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHETRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZHETRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERU, ZLACGV, ZDSCAL, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, DBLE +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K), then K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / DCONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K), then K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( A( K, K ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K), then K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / DCONJG( AKM1K ) + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / DCONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K), then K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZHETRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/zhfrk.f b/dspl/liblapack/SRC/zhfrk.f new file mode 100644 index 0000000..6440542 --- /dev/null +++ b/dspl/liblapack/SRC/zhfrk.f @@ -0,0 +1,552 @@ +*> \brief \b ZHFRK performs a Hermitian rank-k operation for matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHFRK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, +* C ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER K, LDA, N +* CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for C in RFP Format. +*> +*> ZHFRK performs one of the Hermitian rank--k operations +*> +*> C := alpha*A*A**H + beta*C, +*> +*> or +*> +*> C := alpha*A**H*A + beta*C, +*> +*> where alpha and beta are real scalars, C is an n--by--n Hermitian +*> matrix and A is an n--by--k matrix in the first case and a k--by--n +*> matrix in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'C': The Conjugate-transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*A**H + beta*C. +*> +*> TRANS = 'C' or 'c' C := alpha*A**H*A + beta*C. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrix A, and on entry with +*> TRANS = 'C' or 'c', K specifies the number of rows of the +*> matrix A. K must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,ka) +*> where KA +*> is K when TRANS = 'N' or 'n', and is N otherwise. Before +*> entry with TRANS = 'N' or 'n', the leading N--by--K part of +*> the array A must contain the matrix A, otherwise the leading +*> K--by--N part of the array A must contain the matrix A. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the matrix A in RFP Format. RFP Format is +*> described by TRANSR, UPLO and N. Note that the imaginary +*> parts of the diagonal elements need not be set, they are +*> assumed to be zero, and on exit they are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHFRK( TRANSR, UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, + $ C ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER K, LDA, N + CHARACTER TRANS, TRANSR, UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + COMPLEX*16 CZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR, NISODD, NOTRANS + INTEGER INFO, NROWA, J, NK, N1, N2 + COMPLEX*16 CALPHA, CBETA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, DCMPLX +* .. +* .. Executable Statements .. +* +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) +* + IF( NOTRANS ) THEN + NROWA = N + ELSE + NROWA = K + END IF +* + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHFRK ', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* +* The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not +* done (it is in ZHERK for example) and left in the general case. +* + IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND. + $ ( BETA.EQ.ONE ) ) )RETURN +* + IF( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ZERO ) ) THEN + DO J = 1, ( ( N*( N+1 ) ) / 2 ) + C( J ) = CZERO + END DO + RETURN + END IF +* + CALPHA = DCMPLX( ALPHA, ZERO ) + CBETA = DCMPLX( BETA, ZERO ) +* +* C is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and NK. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + NK = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL ZHERK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL ZHERK( 'U', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL ZGEMM( 'N', 'C', N2, N1, K, CALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( N1+1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' +* + CALL ZHERK( 'L', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N ) + CALL ZHERK( 'U', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N+1 ), N ) + CALL ZGEMM( 'C', 'N', N2, N1, K, CALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( N1+1 ), N ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL ZHERK( 'L', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL ZHERK( 'U', 'N', N2, K, ALPHA, A( N2, 1 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL ZGEMM( 'N', 'C', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( N2, 1 ), LDA, CBETA, C( 1 ), N ) +* + ELSE +* +* N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' +* + CALL ZHERK( 'L', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2+1 ), N ) + CALL ZHERK( 'U', 'C', N2, K, ALPHA, A( 1, N2 ), LDA, + $ BETA, C( N1+1 ), N ) + CALL ZGEMM( 'C', 'N', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, N2 ), LDA, CBETA, C( 1 ), N ) +* + END IF +* + END IF +* + ELSE +* +* N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' +* + CALL ZHERK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL ZHERK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL ZGEMM( 'N', 'C', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( N1+1, 1 ), LDA, CBETA, + $ C( N1*N1+1 ), N1 ) +* + ELSE +* +* N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' +* + CALL ZHERK( 'U', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 1 ), N1 ) + CALL ZHERK( 'L', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( 2 ), N1 ) + CALL ZGEMM( 'C', 'N', N1, N2, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, N1+1 ), LDA, CBETA, + $ C( N1*N1+1 ), N1 ) +* + END IF +* + ELSE +* +* N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' +* + CALL ZHERK( 'U', 'N', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL ZHERK( 'L', 'N', N2, K, ALPHA, A( N1+1, 1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL ZGEMM( 'N', 'C', N2, N1, K, CALPHA, A( N1+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), N2 ) +* + ELSE +* +* N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' +* + CALL ZHERK( 'U', 'C', N1, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( N2*N2+1 ), N2 ) + CALL ZHERK( 'L', 'C', N2, K, ALPHA, A( 1, N1+1 ), LDA, + $ BETA, C( N1*N2+1 ), N2 ) + CALL ZGEMM( 'C', 'N', N2, N1, K, CALPHA, A( 1, N1+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), N2 ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' +* + CALL ZHERK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL ZHERK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL ZGEMM( 'N', 'C', NK, NK, K, CALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( NK+2 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' +* + CALL ZHERK( 'L', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( 2 ), N+1 ) + CALL ZHERK( 'U', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), N+1 ) + CALL ZGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( NK+2 ), + $ N+1 ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' +* + CALL ZHERK( 'L', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL ZHERK( 'U', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL ZGEMM( 'N', 'C', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, CBETA, C( 1 ), + $ N+1 ) +* + ELSE +* +* N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' +* + CALL ZHERK( 'L', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+2 ), N+1 ) + CALL ZHERK( 'U', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK+1 ), N+1 ) + CALL ZGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, CBETA, C( 1 ), + $ N+1 ) +* + END IF +* + END IF +* + ELSE +* +* N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' +* + CALL ZHERK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL ZHERK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL ZGEMM( 'N', 'C', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( NK+1, 1 ), LDA, CBETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' +* + CALL ZHERK( 'U', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK+1 ), NK ) + CALL ZHERK( 'L', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( 1 ), NK ) + CALL ZGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, 1 ), + $ LDA, A( 1, NK+1 ), LDA, CBETA, + $ C( ( ( NK+1 )*NK )+1 ), NK ) +* + END IF +* + ELSE +* +* N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' +* + CALL ZHERK( 'U', 'N', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL ZHERK( 'L', 'N', NK, K, ALPHA, A( NK+1, 1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL ZGEMM( 'N', 'C', NK, NK, K, CALPHA, A( NK+1, 1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), NK ) +* + ELSE +* +* N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' +* + CALL ZHERK( 'U', 'C', NK, K, ALPHA, A( 1, 1 ), LDA, + $ BETA, C( NK*( NK+1 )+1 ), NK ) + CALL ZHERK( 'L', 'C', NK, K, ALPHA, A( 1, NK+1 ), LDA, + $ BETA, C( NK*NK+1 ), NK ) + CALL ZGEMM( 'C', 'N', NK, NK, K, CALPHA, A( 1, NK+1 ), + $ LDA, A( 1, 1 ), LDA, CBETA, C( 1 ), NK ) +* + END IF +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZHFRK +* + END diff --git a/dspl/liblapack/SRC/zhgeqz.f b/dspl/liblapack/SRC/zhgeqz.f new file mode 100644 index 0000000..b51cba4 --- /dev/null +++ b/dspl/liblapack/SRC/zhgeqz.f @@ -0,0 +1,876 @@ +*> \brief \b ZHGEQZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHGEQZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, +* ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, COMPZ, JOB +* INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 ALPHA( * ), BETA( * ), H( LDH, * ), +* $ Q( LDQ, * ), T( LDT, * ), WORK( * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHGEQZ computes the eigenvalues of a complex matrix pair (H,T), +*> where H is an upper Hessenberg matrix and T is upper triangular, +*> using the single-shift QZ method. +*> Matrix pairs of this type are produced by the reduction to +*> generalized upper Hessenberg form of a complex matrix pair (A,B): +*> +*> A = Q1*H*Z1**H, B = Q1*T*Z1**H, +*> +*> as computed by ZGGHRD. +*> +*> If JOB='S', then the Hessenberg-triangular pair (H,T) is +*> also reduced to generalized Schur form, +*> +*> H = Q*S*Z**H, T = Q*P*Z**H, +*> +*> where Q and Z are unitary matrices and S and P are upper triangular. +*> +*> Optionally, the unitary matrix Q from the generalized Schur +*> factorization may be postmultiplied into an input matrix Q1, and the +*> unitary matrix Z may be postmultiplied into an input matrix Z1. +*> If Q1 and Z1 are the unitary matrices from ZGGHRD that reduced +*> the matrix pair (A,B) to generalized Hessenberg form, then the output +*> matrices Q1*Q and Z1*Z are the unitary factors from the generalized +*> Schur factorization of (A,B): +*> +*> A = (Q1*Q)*S*(Z1*Z)**H, B = (Q1*Q)*P*(Z1*Z)**H. +*> +*> To avoid overflow, eigenvalues of the matrix pair (H,T) +*> (equivalently, of (A,B)) are computed as a pair of complex values +*> (alpha,beta). If beta is nonzero, lambda = alpha / beta is an +*> eigenvalue of the generalized nonsymmetric eigenvalue problem (GNEP) +*> A*x = lambda*B*x +*> and if alpha is nonzero, mu = beta / alpha is an eigenvalue of the +*> alternate form of the GNEP +*> mu*A*y = B*y. +*> The values of alpha and beta for the i-th eigenvalue can be read +*> directly from the generalized Schur form: alpha = S(i,i), +*> beta = P(i,i). +*> +*> Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix +*> Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973), +*> pp. 241--256. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': Compute eigenvalues only; +*> = 'S': Computer eigenvalues and the Schur form. +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'N': Left Schur vectors (Q) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Q +*> of left Schur vectors of (H,T) is returned; +*> = 'V': Q must contain a unitary matrix Q1 on entry and +*> the product Q1*Q is returned. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Right Schur vectors (Z) are not computed; +*> = 'I': Q is initialized to the unit matrix and the matrix Z +*> of right Schur vectors of (H,T) is returned; +*> = 'V': Z must contain a unitary matrix Z1 on entry and +*> the product Z1*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices H, T, Q, and Z. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> ILO and IHI mark the rows and columns of H which are in +*> Hessenberg form. It is assumed that A is already upper +*> triangular in rows and columns 1:ILO-1 and IHI+1:N. +*> If N > 0, 1 <= ILO <= IHI <= N; if N = 0, ILO=1 and IHI=0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH, N) +*> On entry, the N-by-N upper Hessenberg matrix H. +*> On exit, if JOB = 'S', H contains the upper triangular +*> matrix S from the generalized Schur factorization. +*> If JOB = 'E', the diagonal of H matches that of S, but +*> the rest of H is unspecified. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT, N) +*> On entry, the N-by-N upper triangular matrix T. +*> On exit, if JOB = 'S', T contains the upper triangular +*> matrix P from the generalized Schur factorization. +*> If JOB = 'E', the diagonal of T matches that of P, but +*> the rest of T is unspecified. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> The complex scalars alpha that define the eigenvalues of +*> GNEP. ALPHA(i) = S(i,i) in the generalized Schur +*> factorization. +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> The real non-negative scalars beta that define the +*> eigenvalues of GNEP. BETA(i) = P(i,i) in the generalized +*> Schur factorization. +*> +*> Together, the quantities alpha = ALPHA(j) and beta = BETA(j) +*> represent the j-th eigenvalue of the matrix pair (A,B), in +*> one of the forms lambda = alpha/beta or mu = beta/alpha. +*> Since either lambda or mu may overflow, they should not, +*> in general, be computed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ, N) +*> On entry, if COMPQ = 'V', the unitary matrix Q1 used in the +*> reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPQ = 'I', the unitary matrix of left Schur +*> vectors of (H,T), and if COMPQ = 'V', the unitary matrix of +*> left Schur vectors of (A,B). +*> Not referenced if COMPQ = 'N'. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If COMPQ='V' or 'I', then LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix Z1 used in the +*> reduction of (A,B) to generalized Hessenberg form. +*> On exit, if COMPZ = 'I', the unitary matrix of right Schur +*> vectors of (H,T), and if COMPZ = 'V', the unitary matrix of +*> right Schur vectors of (A,B). +*> Not referenced if COMPZ = 'N'. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If COMPZ='V' or 'I', then LDZ >= N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO >= 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1,...,N: the QZ iteration did not converge. (H,T) is not +*> in Schur form, but ALPHA(i) and BETA(i), +*> i=INFO+1,...,N should be correct. +*> = N+1,...,2*N: the shift calculation failed. (H,T) is not +*> in Schur form, but ALPHA(i) and BETA(i), +*> i=INFO-N+1,...,N should be correct. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We assume that complex ABS works as long as its value is less than +*> overflow. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, H, LDH, T, LDT, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, COMPZ, JOB + INTEGER IHI, ILO, INFO, LDH, LDQ, LDT, LDZ, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 ALPHA( * ), BETA( * ), H( LDH, * ), + $ Q( LDQ, * ), T( LDT, * ), WORK( * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D+0 ) +* .. +* .. Local Scalars .. + LOGICAL ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY + INTEGER ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST, + $ ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER, + $ JR, MAXIT + DOUBLE PRECISION ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL, + $ C, SAFMIN, TEMP, TEMP2, TEMPR, ULP + COMPLEX*16 ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2, + $ CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T1, + $ U12, X +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHS + EXTERNAL LSAME, DLAMCH, ZLANHS +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, + $ SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode JOB, COMPQ, COMPZ +* + IF( LSAME( JOB, 'E' ) ) THEN + ILSCHR = .FALSE. + ISCHUR = 1 + ELSE IF( LSAME( JOB, 'S' ) ) THEN + ILSCHR = .TRUE. + ISCHUR = 2 + ELSE + ISCHUR = 0 + END IF +* + IF( LSAME( COMPQ, 'N' ) ) THEN + ILQ = .FALSE. + ICOMPQ = 1 + ELSE IF( LSAME( COMPQ, 'V' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 2 + ELSE IF( LSAME( COMPQ, 'I' ) ) THEN + ILQ = .TRUE. + ICOMPQ = 3 + ELSE + ICOMPQ = 0 + END IF +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ILZ = .FALSE. + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 2 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ILZ = .TRUE. + ICOMPZ = 3 + ELSE + ICOMPZ = 0 + END IF +* +* Check Argument Values +* + INFO = 0 + WORK( 1 ) = MAX( 1, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( ISCHUR.EQ.0 ) THEN + INFO = -1 + ELSE IF( ICOMPQ.EQ.0 ) THEN + INFO = -2 + ELSE IF( ICOMPZ.EQ.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 ) THEN + INFO = -5 + ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN + INFO = -6 + ELSE IF( LDH.LT.N ) THEN + INFO = -8 + ELSE IF( LDT.LT.N ) THEN + INFO = -10 + ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN + INFO = -14 + ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHGEQZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* +* WORK( 1 ) = CMPLX( 1 ) + IF( N.LE.0 ) THEN + WORK( 1 ) = DCMPLX( 1 ) + RETURN + END IF +* +* Initialize Q and Z +* + IF( ICOMPQ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) + IF( ICOMPZ.EQ.3 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Machine Constants +* + IN = IHI + 1 - ILO + SAFMIN = DLAMCH( 'S' ) + ULP = DLAMCH( 'E' )*DLAMCH( 'B' ) + ANORM = ZLANHS( 'F', IN, H( ILO, ILO ), LDH, RWORK ) + BNORM = ZLANHS( 'F', IN, T( ILO, ILO ), LDT, RWORK ) + ATOL = MAX( SAFMIN, ULP*ANORM ) + BTOL = MAX( SAFMIN, ULP*BNORM ) + ASCALE = ONE / MAX( SAFMIN, ANORM ) + BSCALE = ONE / MAX( SAFMIN, BNORM ) +* +* +* Set Eigenvalues IHI+1:N +* + DO 10 J = IHI + 1, N + ABSB = ABS( T( J, J ) ) + IF( ABSB.GT.SAFMIN ) THEN + SIGNBC = DCONJG( T( J, J ) / ABSB ) + T( J, J ) = ABSB + IF( ILSCHR ) THEN + CALL ZSCAL( J-1, SIGNBC, T( 1, J ), 1 ) + CALL ZSCAL( J, SIGNBC, H( 1, J ), 1 ) + ELSE + CALL ZSCAL( 1, SIGNBC, H( J, J ), 1 ) + END IF + IF( ILZ ) + $ CALL ZSCAL( N, SIGNBC, Z( 1, J ), 1 ) + ELSE + T( J, J ) = CZERO + END IF + ALPHA( J ) = H( J, J ) + BETA( J ) = T( J, J ) + 10 CONTINUE +* +* If IHI < ILO, skip QZ steps +* + IF( IHI.LT.ILO ) + $ GO TO 190 +* +* MAIN QZ ITERATION LOOP +* +* Initialize dynamic indices +* +* Eigenvalues ILAST+1:N have been found. +* Column operations modify rows IFRSTM:whatever +* Row operations modify columns whatever:ILASTM +* +* If only eigenvalues are being computed, then +* IFRSTM is the row of the last splitting row above row ILAST; +* this is always at least ILO. +* IITER counts iterations since the last eigenvalue was found, +* to tell when to use an extraordinary shift. +* MAXIT is the maximum number of QZ sweeps allowed. +* + ILAST = IHI + IF( ILSCHR ) THEN + IFRSTM = 1 + ILASTM = N + ELSE + IFRSTM = ILO + ILASTM = IHI + END IF + IITER = 0 + ESHIFT = CZERO + MAXIT = 30*( IHI-ILO+1 ) +* + DO 170 JITER = 1, MAXIT +* +* Check for too many iterations. +* + IF( JITER.GT.MAXIT ) + $ GO TO 180 +* +* Split the matrix if possible. +* +* Two tests: +* 1: H(j,j-1)=0 or j=ILO +* 2: T(j,j)=0 +* +* Special case: j=ILAST +* + IF( ILAST.EQ.ILO ) THEN + GO TO 60 + ELSE + IF( ABS1( H( ILAST, ILAST-1 ) ).LE.ATOL ) THEN + H( ILAST, ILAST-1 ) = CZERO + GO TO 60 + END IF + END IF +* + IF( ABS( T( ILAST, ILAST ) ).LE.BTOL ) THEN + T( ILAST, ILAST ) = CZERO + GO TO 50 + END IF +* +* General case: j \brief \b ZHPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPCON estimates the reciprocal of the condition number of a complex +*> Hermitian packed matrix A using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPTRS, ZLACN2 +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**H) or inv(U*D*U**H). +* + CALL ZHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZHPCON +* + END diff --git a/dspl/liblapack/SRC/zhpev.f b/dspl/liblapack/SRC/zhpev.f new file mode 100644 index 0000000..2ac1c93 --- /dev/null +++ b/dspl/liblapack/SRC/zhpev.f @@ -0,0 +1,276 @@ +*> \brief ZHPEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPEV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPEV computes all the eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix in packed storage. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(1, 2*N-1)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEQR, + $ ZUPGTR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + RWORK( 1 ) = 1 + IF( WANTZ ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUPGTR to generate the orthogonal matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + INDWRK = INDTAU + N + CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + INDRWK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + RETURN +* +* End of ZHPEV +* + END diff --git a/dspl/liblapack/SRC/zhpevd.f b/dspl/liblapack/SRC/zhpevd.f new file mode 100644 index 0000000..83983ee --- /dev/null +++ b/dspl/liblapack/SRC/zhpevd.f @@ -0,0 +1,378 @@ +*> \brief ZHPEVD computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPEVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, +* RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPEVD computes all the eigenvalues and, optionally, eigenvectors of +*> a complex Hermitian matrix A in packed storage. If eigenvectors are +*> desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal +*> eigenvectors of the matrix A, with the i-th column of Z +*> holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. +*> If N <= 1, LWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LWORK must be at least 2*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the required LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK must be at least 1. +*> If JOBZ = 'N' and N > 1, LRWORK must be at least N. +*> If JOBZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, + $ RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK, + $ ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEDC, + $ ZUPMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -9 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + W( 1 ) = AP( 1 ) + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDRWK = INDE + N + INDWRK = INDTAU + N + LLWRK = LWORK - INDWRK + 1 + LLRWK = LRWORK - INDRWK + 1 + CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ), + $ IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUPGTR to generate the orthogonal matrix, then call ZSTEDC. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ), + $ LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK, + $ INFO ) + CALL ZUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHPEVD +* + END diff --git a/dspl/liblapack/SRC/zhpevx.f b/dspl/liblapack/SRC/zhpevx.f new file mode 100644 index 0000000..7e32a37 --- /dev/null +++ b/dspl/liblapack/SRC/zhpevx.f @@ -0,0 +1,507 @@ +*> \brief ZHPEVX computes the eigenvalues and, optionally, the left and/or right eigenvectors for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPEVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, +* IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, IU, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPEVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex Hermitian matrix A in packed storage. +*> Eigenvalues/vectors can be selected by specifying either a range of +*> values or a range of indices for the desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, AP is overwritten by values generated during the +*> reduction to tridiagonal form. If UPLO = 'U', the diagonal +*> and first superdiagonal of the tridiagonal matrix T overwrite +*> the corresponding elements of A, and if UPLO = 'L', the +*> diagonal and first subdiagonal of T overwrite the +*> corresponding elements of A. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> +*> See "Computing Small Singular Values of Bidiagonal Matrices +*> with Guaranteed High Relative Accuracy," by Demmel and +*> Kahan, LAPACK Working Note #3. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the selected eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M)) +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and +*> the index of the eigenvector is returned in IFAIL. +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge. +*> Their indices are stored in array IFAIL. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK, + $ IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, TEST, VALEIG, WANTZ + CHARACTER ORDER + INTEGER I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL, + $ INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE, + $ ITMP1, J, JJ, NSPLIT + DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, + $ SIGMA, SMLNUM, TMP1, VLL, VUU +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL, + $ ZHPTRD, ZSTEIN, ZSTEQR, ZSWAP, ZUPGTR, ZUPMTR +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) + $ INFO = -7 + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -9 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) + $ INFO = -14 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPEVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = AP( 1 ) + ELSE + IF( VL.LT.DBLE( AP( 1 ) ) .AND. VU.GE.DBLE( AP( 1 ) ) ) THEN + M = 1 + W( 1 ) = AP( 1 ) + END IF + END IF + IF( WANTZ ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* +* Scale matrix to allowable range, if necessary. +* + ISCALE = 0 + ABSTLL = ABSTOL + IF( VALEIG ) THEN + VLL = VL + VUU = VU + ELSE + VLL = ZERO + VUU = ZERO + END IF + ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK ) + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) THEN + CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 ) + IF( ABSTOL.GT.0 ) + $ ABSTLL = ABSTOL*SIGMA + IF( VALEIG ) THEN + VLL = VL*SIGMA + VUU = VU*SIGMA + END IF + END IF +* +* Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form. +* + INDD = 1 + INDE = INDD + N + INDRWK = INDE + N + INDTAU = 1 + INDWRK = INDTAU + N + CALL ZHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ), + $ WORK( INDTAU ), IINFO ) +* +* If all eigenvalues are desired and ABSTOL is less than or equal +* to zero, then call DSTERF or ZUPGTR and ZSTEQR. If this fails +* for some eigenvalue, then try DSTEBZ. +* + TEST = .FALSE. + IF (INDEIG) THEN + IF (IL.EQ.1 .AND. IU.EQ.N) THEN + TEST = .TRUE. + END IF + END IF + IF ((ALLEIG .OR. TEST) .AND. (ABSTOL.LE.ZERO)) THEN + CALL DCOPY( N, RWORK( INDD ), 1, W, 1 ) + INDEE = INDRWK + 2*N + IF( .NOT.WANTZ ) THEN + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL DSTERF( N, W, RWORK( INDEE ), INFO ) + ELSE + CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 ) + CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ, + $ RWORK( INDRWK ), INFO ) + IF( INFO.EQ.0 ) THEN + DO 10 I = 1, N + IFAIL( I ) = 0 + 10 CONTINUE + END IF + END IF + IF( INFO.EQ.0 ) THEN + M = N + GO TO 20 + END IF + INFO = 0 + END IF +* +* Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN. +* + IF( WANTZ ) THEN + ORDER = 'B' + ELSE + ORDER = 'E' + END IF + INDIBL = 1 + INDISP = INDIBL + N + INDIWK = INDISP + N + CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL, + $ RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W, + $ IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ), + $ IWORK( INDIWK ), INFO ) +* + IF( WANTZ ) THEN + CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W, + $ IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ, + $ RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO ) +* +* Apply unitary matrix used in reduction to tridiagonal +* form to eigenvectors returned by ZSTEIN. +* + INDWRK = INDTAU + N + CALL ZUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ, + $ WORK( INDWRK ), IINFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + 20 CONTINUE + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = M + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* If eigenvalues are not in order, then sort them, along with +* eigenvectors. +* + IF( WANTZ ) THEN + DO 40 J = 1, M - 1 + I = 0 + TMP1 = W( J ) + DO 30 JJ = J + 1, M + IF( W( JJ ).LT.TMP1 ) THEN + I = JJ + TMP1 = W( JJ ) + END IF + 30 CONTINUE +* + IF( I.NE.0 ) THEN + ITMP1 = IWORK( INDIBL+I-1 ) + W( I ) = W( J ) + IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 ) + W( J ) = TMP1 + IWORK( INDIBL+J-1 ) = ITMP1 + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + IF( INFO.NE.0 ) THEN + ITMP1 = IFAIL( I ) + IFAIL( I ) = IFAIL( J ) + IFAIL( J ) = ITMP1 + END IF + END IF + 40 CONTINUE + END IF +* + RETURN +* +* End of ZHPEVX +* + END diff --git a/dspl/liblapack/SRC/zhpgst.f b/dspl/liblapack/SRC/zhpgst.f new file mode 100644 index 0000000..cf2c516 --- /dev/null +++ b/dspl/liblapack/SRC/zhpgst.f @@ -0,0 +1,281 @@ +*> \brief \b ZHPGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPGST + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), BP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPGST reduces a complex Hermitian-definite generalized +*> eigenproblem to standard form, using packed storage. +*> +*> If ITYPE = 1, the problem is A*x = lambda*B*x, +*> and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) +*> +*> If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or +*> B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. +*> +*> B must have been previously factorized as U**H*U or L*L**H by ZPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); +*> = 2 or 3: compute U*A*U**H or L**H*A*L. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored and B is factored as +*> U**H*U; +*> = 'L': Lower triangle of A is stored and B is factored as +*> L*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, if INFO = 0, the transformed matrix, stored in the +*> same format as A. +*> \endverbatim +*> +*> \param[in] BP +*> \verbatim +*> BP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The triangular factor from the Cholesky factorization of B, +*> stored in the same format as A, as returned by ZPPTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, ITYPE, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), BP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, HALF + PARAMETER ( ONE = 1.0D+0, HALF = 0.5D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, J1, J1J1, JJ, K, K1, K1K1, KK + DOUBLE PRECISION AJJ, AKK, BJJ, BKK + COMPLEX*16 CT +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZDSCAL, ZHPMV, ZHPR2, ZTPMV, + $ ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGST', -INFO ) + RETURN + END IF +* + IF( ITYPE.EQ.1 ) THEN + IF( UPPER ) THEN +* +* Compute inv(U**H)*A*inv(U) +* +* J1 and JJ are the indices of A(1,j) and A(j,j) +* + JJ = 0 + DO 10 J = 1, N + J1 = JJ + 1 + JJ = JJ + J +* +* Compute the j-th column of the upper triangle of A +* + AP( JJ ) = DBLE( AP( JJ ) ) + BJJ = BP( JJ ) + CALL ZTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J, + $ BP, AP( J1 ), 1 ) + CALL ZHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE, + $ AP( J1 ), 1 ) + CALL ZDSCAL( J-1, ONE / BJJ, AP( J1 ), 1 ) + AP( JJ ) = ( AP( JJ )-ZDOTC( J-1, AP( J1 ), 1, BP( J1 ), + $ 1 ) ) / BJJ + 10 CONTINUE + ELSE +* +* Compute inv(L)*A*inv(L**H) +* +* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) +* + KK = 1 + DO 20 K = 1, N + K1K1 = KK + N - K + 1 +* +* Update the lower triangle of A(k:n,k:n) +* + AKK = AP( KK ) + BKK = BP( KK ) + AKK = AKK / BKK**2 + AP( KK ) = AKK + IF( K.LT.N ) THEN + CALL ZDSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 ) + CT = -HALF*AKK + CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL ZHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1, + $ BP( KK+1 ), 1, AP( K1K1 ) ) + CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 ) + CALL ZTPSV( UPLO, 'No transpose', 'Non-unit', N-K, + $ BP( K1K1 ), AP( KK+1 ), 1 ) + END IF + KK = K1K1 + 20 CONTINUE + END IF + ELSE + IF( UPPER ) THEN +* +* Compute U*A*U**H +* +* K1 and KK are the indices of A(1,k) and A(k,k) +* + KK = 0 + DO 30 K = 1, N + K1 = KK + 1 + KK = KK + K +* +* Update the upper triangle of A(1:k,1:k) +* + AKK = AP( KK ) + BKK = BP( KK ) + CALL ZTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP, + $ AP( K1 ), 1 ) + CT = HALF*AKK + CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL ZHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1, + $ AP ) + CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 ) + CALL ZDSCAL( K-1, BKK, AP( K1 ), 1 ) + AP( KK ) = AKK*BKK**2 + 30 CONTINUE + ELSE +* +* Compute L**H *A*L +* +* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) +* + JJ = 1 + DO 40 J = 1, N + J1J1 = JJ + N - J + 1 +* +* Compute the j-th column of the lower triangle of A +* + AJJ = AP( JJ ) + BJJ = BP( JJ ) + AP( JJ ) = AJJ*BJJ + ZDOTC( N-J, AP( JJ+1 ), 1, + $ BP( JJ+1 ), 1 ) + CALL ZDSCAL( N-J, BJJ, AP( JJ+1 ), 1 ) + CALL ZHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1, + $ CONE, AP( JJ+1 ), 1 ) + CALL ZTPMV( UPLO, 'Conjugate transpose', 'Non-unit', + $ N-J+1, BP( JJ ), AP( JJ ), 1 ) + JJ = J1J1 + 40 CONTINUE + END IF + END IF + RETURN +* +* End of ZHPGST +* + END diff --git a/dspl/liblapack/SRC/zhpgv.f b/dspl/liblapack/SRC/zhpgv.f new file mode 100644 index 0000000..c6a9a6a --- /dev/null +++ b/dspl/liblapack/SRC/zhpgv.f @@ -0,0 +1,282 @@ +*> \brief \b ZHPGV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPGV computes all the eigenvalues and, optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be Hermitian, stored in packed format, +*> and B is also positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (max(1, 2*N-1)) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPPTRF or ZHPEV returned an error code: +*> <= N: if INFO = i, ZHPEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not convergeto zero; +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +* ===================================================================== + SUBROUTINE ZHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, WANTZ + CHARACTER TRANS + INTEGER J, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEV, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGV ', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF + RETURN +* +* End of ZHPGV +* + END diff --git a/dspl/liblapack/SRC/zhpgvd.f b/dspl/liblapack/SRC/zhpgvd.f new file mode 100644 index 0000000..f0fad9e --- /dev/null +++ b/dspl/liblapack/SRC/zhpgvd.f @@ -0,0 +1,393 @@ +*> \brief \b ZHPGVD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPGVD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, +* LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPGVD computes all the eigenvalues and, optionally, the eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian, stored in packed format, and B is also +*> positive definite. +*> If eigenvectors are desired, it uses a divide and conquer algorithm. +*> +*> The divide and conquer algorithm makes very mild assumptions about +*> floating point arithmetic. It will work on machines with a guard +*> digit in add/subtract, or on those binary machines without guard +*> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or +*> Cray-2. It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of +*> eigenvectors. The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> If JOBZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the required LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If N <= 1, LWORK >= 1. +*> If JOBZ = 'N' and N > 1, LWORK >= N. +*> If JOBZ = 'V' and N > 1, LWORK >= 2*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the required sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the required LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. +*> If N <= 1, LRWORK >= 1. +*> If JOBZ = 'N' and N > 1, LRWORK >= N. +*> If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the required LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of array IWORK. +*> If JOBZ = 'N' or N <= 1, LIWORK >= 1. +*> If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the required sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPPTRF or ZHPEVD returned an error code: +*> <= N: if INFO = i, ZHPEVD failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not convergeto zero; +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK, + $ LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER J, LIWMIN, LRWMIN, LWMIN, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEVD, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.LE.1 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE + IF( WANTZ ) THEN + LWMIN = 2*N + LRWMIN = 1 + 5*N + 2*N**2 + LIWMIN = 3 + 5*N + ELSE + LWMIN = N + LRWMIN = N + LIWMIN = 1 + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -13 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -15 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGVD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) + LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) ) + LRWMIN = MAX( DBLE( LRWMIN ), DBLE( RWORK( 1 ) ) ) + LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, NEIG + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, NEIG + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZHPGVD +* + END diff --git a/dspl/liblapack/SRC/zhpgvx.f b/dspl/liblapack/SRC/zhpgvx.f new file mode 100644 index 0000000..8e8ec13 --- /dev/null +++ b/dspl/liblapack/SRC/zhpgvx.f @@ -0,0 +1,421 @@ +*> \brief \b ZHPGVX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPGVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, +* IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE, UPLO +* INTEGER IL, INFO, ITYPE, IU, LDZ, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IFAIL( * ), IWORK( * ) +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPGVX computes selected eigenvalues and, optionally, eigenvectors +*> of a complex generalized Hermitian-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and +*> B are assumed to be Hermitian, stored in packed format, and B is also +*> positive definite. Eigenvalues and eigenvectors can be selected by +*> specifying either a range of values or a range of indices for the +*> desired eigenvalues. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found; +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found; +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the contents of AP are destroyed. +*> \endverbatim +*> +*> \param[in,out] BP +*> \verbatim +*> BP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> B, packed columnwise in a linear array. The j-th column of B +*> is stored in the array BP as follows: +*> if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j; +*> if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n. +*> +*> On exit, the triangular factor U or L from the Cholesky +*> factorization B = U**H*U or B = L*L**H, in the same storage +*> format as B. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> The absolute error tolerance for the eigenvalues. +*> An approximate eigenvalue is accepted as converged +*> when it is determined to lie in an interval [a,b] +*> of width less than or equal to +*> +*> ABSTOL + EPS * max( |a|,|b| ) , +*> +*> where EPS is the machine precision. If ABSTOL is less than +*> or equal to zero, then EPS*|T| will be used in its place, +*> where |T| is the 1-norm of the tridiagonal matrix obtained +*> by reducing AP to tridiagonal form. +*> +*> Eigenvalues will be computed most accurately when ABSTOL is +*> set to twice the underflow threshold 2*DLAMCH('S'), not zero. +*> If this routine returns with INFO>0, indicating that some +*> eigenvectors did not converge, try setting ABSTOL to +*> 2*DLAMCH('S'). +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> On normal exit, the first M elements contain the selected +*> eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> If JOBZ = 'N', then Z is not referenced. +*> If JOBZ = 'V', then if INFO = 0, the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix A +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> The eigenvectors are normalized as follows: +*> if ITYPE = 1 or 2, Z**H*B*Z = I; +*> if ITYPE = 3, Z**H*inv(B)*Z = I. +*> +*> If an eigenvector fails to converge, then that column of Z +*> contains the latest approximation to the eigenvector, and the +*> index of the eigenvector is returned in IFAIL. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (N) +*> If JOBZ = 'V', then if INFO = 0, the first M elements of +*> IFAIL are zero. If INFO > 0, then IFAIL contains the +*> indices of the eigenvectors that failed to converge. +*> If JOBZ = 'N', then IFAIL is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: ZPPTRF or ZHPEVX returned an error code: +*> <= N: if INFO = i, ZHPEVX failed to converge; +*> i eigenvectors failed to converge. Their indices +*> are stored in array IFAIL. +*> > N: if INFO = N + i, for 1 <= i <= n, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHEReigen +* +*> \par Contributors: +* ================== +*> +*> Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA +* +* ===================================================================== + SUBROUTINE ZHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU, + $ IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE, UPLO + INTEGER IL, INFO, ITYPE, IU, LDZ, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER IFAIL( * ), IWORK( * ) + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 AP( * ), BP( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, UPPER, VALEIG, WANTZ + CHARACTER TRANS + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPEVX, ZHPGST, ZPPTRF, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -3 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE + IF( VALEIG ) THEN + IF( N.GT.0 .AND. VU.LE.VL ) THEN + INFO = -9 + END IF + ELSE IF( INDEIG ) THEN + IF( IL.LT.1 ) THEN + INFO = -10 + ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN + INFO = -11 + END IF + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPGVX', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL ZPPTRF( UPLO, N, BP, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO ) + CALL ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M, + $ W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + IF( INFO.GT.0 ) + $ M = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'C' + END IF +* + DO 10 J = 1, M + CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 10 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**H *y +* + IF( UPPER ) THEN + TRANS = 'C' + ELSE + TRANS = 'N' + END IF +* + DO 20 J = 1, M + CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ), + $ 1 ) + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHPGVX +* + END diff --git a/dspl/liblapack/SRC/zhprfs.f b/dspl/liblapack/SRC/zhprfs.f new file mode 100644 index 0000000..277a6e7 --- /dev/null +++ b/dspl/liblapack/SRC/zhprfs.f @@ -0,0 +1,438 @@ +*> \brief \b ZHPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**H or +*> A = L*D*L**H as computed by ZHPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZHPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHPMV, ZHPTRS, ZLACN2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZHPRFS +* + END diff --git a/dspl/liblapack/SRC/zhpsv.f b/dspl/liblapack/SRC/zhpsv.f new file mode 100644 index 0000000..37ca126 --- /dev/null +++ b/dspl/liblapack/SRC/zhpsv.f @@ -0,0 +1,224 @@ +*> \brief ZHPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is Hermitian and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by ZHPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHPTRF, ZHPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL ZHPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZHPSV +* + END diff --git a/dspl/liblapack/SRC/zhpsvx.f b/dspl/liblapack/SRC/zhpsvx.f new file mode 100644 index 0000000..5dcb9f4 --- /dev/null +++ b/dspl/liblapack/SRC/zhpsvx.f @@ -0,0 +1,387 @@ +*> \brief ZHPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPSVX uses the diagonal pivoting factorization A = U*D*U**H or +*> A = L*D*L**H to compute the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N Hermitian matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**H, if UPLO = 'U', or +*> A = L * D * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form of +*> A. AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by ZHPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by ZHPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHPCON, ZHPRFS, ZHPTRF, ZHPTRS, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**H or A = L*D*L**H. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZHPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZHPSVX +* + END diff --git a/dspl/liblapack/SRC/zhptrd.f b/dspl/liblapack/SRC/zhptrd.f new file mode 100644 index 0000000..a230e3d --- /dev/null +++ b/dspl/liblapack/SRC/zhptrd.f @@ -0,0 +1,310 @@ +*> \brief \b ZHPTRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPTRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 AP( * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPTRD reduces a complex Hermitian matrix A stored in packed form to +*> real symmetric tridiagonal form T by a unitary similarity +*> transformation: Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP, +*> overwriting A(1:i-1,i+1), and tau is stored in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP, +*> overwriting A(i+2:n,i), and tau is stored in TAU(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHPTRD( UPLO, N, AP, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 AP( * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, I1, I1I1, II + COMPLEX*16 ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZHPMV, ZHPR2, ZLARFG +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRD', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* I1 is the index in AP of A(1,I+1). +* + I1 = N*( N-1 ) / 2 + 1 + AP( I1+N-1 ) = DBLE( AP( I1+N-1 ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(1:i-1,i+1) +* + ALPHA = AP( I1+I-1 ) + CALL ZLARFG( I, ALPHA, AP( I1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + AP( I1+I-1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(1:i) +* + CALL ZHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU, + $ 1 ) +* +* Compute w := y - 1/2 * tau * (y**H *v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, AP( I1 ), 1 ) + CALL ZAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP ) +* + END IF + AP( I1+I-1 ) = E( I ) + D( I+1 ) = AP( I1+I ) + TAU( I ) = TAUI + I1 = I1 - I + 10 CONTINUE + D( 1 ) = AP( 1 ) + ELSE +* +* Reduce the lower triangle of A. II is the index in AP of +* A(i,i) and I1I1 is the index of A(i+1,i+1). +* + II = 1 + AP( 1 ) = DBLE( AP( 1 ) ) + DO 20 I = 1, N - 1 + I1I1 = II + N - I + 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(i+2:n,i) +* + ALPHA = AP( II+1 ) + CALL ZLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + AP( II+1 ) = ONE +* +* Compute y := tau * A * v storing y in TAU(i:n-1) +* + CALL ZHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1, + $ ZERO, TAU( I ), 1 ) +* +* Compute w := y - 1/2 * tau * (y**H *v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, AP( II+1 ), + $ 1 ) + CALL ZAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1, + $ AP( I1I1 ) ) +* + END IF + AP( II+1 ) = E( I ) + D( I ) = AP( II ) + TAU( I ) = TAUI + II = I1I1 + 20 CONTINUE + D( N ) = AP( II ) + END IF +* + RETURN +* +* End of ZHPTRD +* + END diff --git a/dspl/liblapack/SRC/zhptrf.f b/dspl/liblapack/SRC/zhptrf.f new file mode 100644 index 0000000..2fa2ee8 --- /dev/null +++ b/dspl/liblapack/SRC/zhptrf.f @@ -0,0 +1,650 @@ +*> \brief \b ZHPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPTRF computes the factorization of a complex Hermitian packed +*> matrix A using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**H or A = L*D*L**H +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is Hermitian and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**H, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**H, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> J. Lewis, Boeing Computer Services Company +* +* ===================================================================== + SUBROUTINE ZHPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX, + $ TT + COMPLEX*16 D12, D21, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAPY2 + EXTERNAL LSAME, IZAMAX, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**H using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( AP( KC+K-1 ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( AP( KPC+IMAX-1 ) ) ).GE.ALPHA* + $ ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = DCONJG( AP( KNC+J-1 ) ) + AP( KNC+J-1 ) = DCONJG( AP( KX ) ) + AP( KX ) = T + 30 CONTINUE + AP( KX+KK-1 ) = DCONJG( AP( KX+KK-1 ) ) + R1 = DBLE( AP( KNC+KK-1 ) ) + AP( KNC+KK-1 ) = DBLE( AP( KPC+KP-1 ) ) + AP( KPC+KP-1 ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + ELSE + AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) ) + IF( KSTEP.EQ.2 ) + $ AP( KC-1 ) = DBLE( AP( KC-1 ) ) + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**H = A - W(k)*1/D(k)*W(k)**H +* + R1 = ONE / DBLE( AP( KC+K-1 ) ) + CALL ZHPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL ZDSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**H +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**H +* + IF( K.GT.2 ) THEN +* + D = DLAPY2( DBLE( AP( K-1+( K-1 )*K / 2 ) ), + $ DIMAG( AP( K-1+( K-1 )*K / 2 ) ) ) + D22 = DBLE( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D + D11 = DBLE( AP( K+( K-1 )*K / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D12 = AP( K-1+( K-1 )*K / 2 ) / D + D = TT / D +* + DO 50 J = K - 2, 1, -1 + WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ DCONJG( D12 )*AP( J+( K-1 )*K / 2 ) ) + WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12* + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*DCONJG( WK ) - + $ AP( I+( K-2 )*( K-1 ) / 2 )*DCONJG( WKM1 ) + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + AP( J+( J-1 )*J / 2 ) = DCMPLX( DBLE( AP( J+( J- + $ 1 )*J / 2 ) ), 0.0D+0 ) + 50 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**H using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( AP( KC ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + AP( KC ) = DBLE( AP( KC ) ) + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( ABS( DBLE( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = DCONJG( AP( KNC+J-KK ) ) + AP( KNC+J-KK ) = DCONJG( AP( KX ) ) + AP( KX ) = T + 80 CONTINUE + AP( KNC+KP-KK ) = DCONJG( AP( KNC+KP-KK ) ) + R1 = DBLE( AP( KNC ) ) + AP( KNC ) = DBLE( AP( KPC ) ) + AP( KPC ) = R1 + IF( KSTEP.EQ.2 ) THEN + AP( KC ) = DBLE( AP( KC ) ) + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + ELSE + AP( KC ) = DBLE( AP( KC ) ) + IF( KSTEP.EQ.2 ) + $ AP( KNC ) = DBLE( AP( KNC ) ) + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**H = A - W(k)*(1/D(k))*W(k)**H +* + R1 = ONE / DBLE( AP( KC ) ) + CALL ZHPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL ZDSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**H +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**H +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D = DLAPY2( DBLE( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ), + $ DIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) ) + D11 = DBLE( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D + D22 = DBLE( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D + TT = ONE / ( D11*D22-ONE ) + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D + D = TT / D +* + DO 100 J = K + 2, N + WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21* + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ DCONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) / + $ 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*DCONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )* + $ DCONJG( WKP1 ) + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + AP( J+( J-1 )*( 2*N-J ) / 2 ) + $ = DCMPLX( DBLE( AP( J+( J-1 )*( 2*N-J ) / 2 ) ), + $ 0.0D+0 ) + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of ZHPTRF +* + END diff --git a/dspl/liblapack/SRC/zhptri.f b/dspl/liblapack/SRC/zhptri.f new file mode 100644 index 0000000..4df455d --- /dev/null +++ b/dspl/liblapack/SRC/zhptri.f @@ -0,0 +1,410 @@ +*> \brief \b ZHPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPTRI computes the inverse of a complex Hermitian indefinite matrix +*> A in packed storage using the factorization A = U*D*U**H or +*> A = L*D*L**H computed by ZHPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZHPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (Hermitian) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE, ZERO + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + DOUBLE PRECISION AK, AKP1, D, T + COMPLEX*16 AKKP1, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZHPMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / DBLE( AP( KC+K-1 ) ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+K-1 ) ) + AK = DBLE( AP( KC+K-1 ) ) / T + AKP1 = DBLE( AP( KCNEXT+K ) ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KC ), 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ ZDOTC( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ DBLE( ZDOTC( K-1, WORK, 1, AP( KCNEXT ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = DCONJG( AP( KC+J-1 ) ) + AP( KC+J-1 ) = DCONJG( AP( KX ) ) + AP( KX ) = TEMP + 40 CONTINUE + AP( KC+KP-1 ) = DCONJG( AP( KC+KP-1 ) ) + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**H. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / DBLE( AP( KC ) ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = ABS( AP( KCNEXT+1 ) ) + AK = DBLE( AP( KCNEXT ) ) / T + AKP1 = DBLE( AP( KC ) ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1, + $ AP( KC+1 ), 1 ) ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ ZDOTC( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK, + $ 1, ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ DBLE( ZDOTC( N-K, WORK, 1, AP( KCNEXT+2 ), + $ 1 ) ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = DCONJG( AP( KC+J-K ) ) + AP( KC+J-K ) = DCONJG( AP( KX ) ) + AP( KX ) = TEMP + 70 CONTINUE + AP( KC+KP-K ) = DCONJG( AP( KC+KP-K ) ) + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZHPTRI +* + END diff --git a/dspl/liblapack/SRC/zhptrs.f b/dspl/liblapack/SRC/zhptrs.f new file mode 100644 index 0000000..71289d5 --- /dev/null +++ b/dspl/liblapack/SRC/zhptrs.f @@ -0,0 +1,474 @@ +*> \brief \b ZHPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHPTRS solves a system of linear equations A*X = B with a complex +*> Hermitian matrix A stored in packed format using the factorization +*> A = U*D*U**H or A = L*D*L**H computed by ZHPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**H; +*> = 'L': Lower triangular, form is A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + DOUBLE PRECISION S + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**H. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( AP( KC+K-1 ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / DCONJG( AKM1K ) + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / DCONJG( AKM1K ) + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**H *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**H(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**H(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B, + $ LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**H. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + S = DBLE( ONE ) / DBLE( AP( KC ) ) + CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / DCONJG( AKM1K ) + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / DCONJG( AKM1K ) + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**H *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**H(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + END IF +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**H(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE, + $ B( K, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K, 1 ), LDB ) +* + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE, + $ B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE, + $ B( K-1, 1 ), LDB ) + CALL ZLACGV( NRHS, B( K-1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZHPTRS +* + END diff --git a/dspl/liblapack/SRC/zhsein.f b/dspl/liblapack/SRC/zhsein.f new file mode 100644 index 0000000..652c7d7 --- /dev/null +++ b/dspl/liblapack/SRC/zhsein.f @@ -0,0 +1,468 @@ +*> \brief \b ZHSEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHSEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, +* LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, +* IFAILR, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EIGSRC, INITV, SIDE +* INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IFAILL( * ), IFAILR( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHSEIN uses inverse iteration to find specified right and/or left +*> eigenvectors of a complex upper Hessenberg matrix H. +*> +*> The right eigenvector x and the left eigenvector y of the matrix H +*> corresponding to an eigenvalue w are defined by: +*> +*> H * x = w * x, y**h * H = w * y**h +*> +*> where y**h denotes the conjugate transpose of the vector y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] EIGSRC +*> \verbatim +*> EIGSRC is CHARACTER*1 +*> Specifies the source of eigenvalues supplied in W: +*> = 'Q': the eigenvalues were found using ZHSEQR; thus, if +*> H has zero subdiagonal elements, and so is +*> block-triangular, then the j-th eigenvalue can be +*> assumed to be an eigenvalue of the block containing +*> the j-th row/column. This property allows ZHSEIN to +*> perform inverse iteration on just one diagonal block. +*> = 'N': no assumptions are made on the correspondence +*> between eigenvalues and diagonal blocks. In this +*> case, ZHSEIN must always perform inverse iteration +*> using the whole matrix H. +*> \endverbatim +*> +*> \param[in] INITV +*> \verbatim +*> INITV is CHARACTER*1 +*> = 'N': no initial vectors are supplied; +*> = 'U': user-supplied initial vectors are stored in the arrays +*> VL and/or VR. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> Specifies the eigenvectors to be computed. To select the +*> eigenvector corresponding to the eigenvalue W(j), +*> SELECT(j) must be set to .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> If a NaN is detected in H, the routine will return with INFO=-6. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> On entry, the eigenvalues of H. +*> On exit, the real parts of W may have been altered since +*> close eigenvalues are perturbed slightly in searching for +*> independent eigenvectors. +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,MM) +*> On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must +*> contain starting vectors for the inverse iteration for the +*> left eigenvectors; the starting vector for each eigenvector +*> must be in the same column in which the eigenvector will be +*> stored. +*> On exit, if SIDE = 'L' or 'B', the left eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VL, in the same order as their eigenvalues. +*> If SIDE = 'R', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,MM) +*> On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must +*> contain starting vectors for the inverse iteration for the +*> right eigenvectors; the starting vector for each eigenvector +*> must be in the same column in which the eigenvector will be +*> stored. +*> On exit, if SIDE = 'R' or 'B', the right eigenvectors +*> specified by SELECT will be stored consecutively in the +*> columns of VR, in the same order as their eigenvalues. +*> If SIDE = 'L', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR required to +*> store the eigenvectors (= the number of .TRUE. elements in +*> SELECT). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAILL +*> \verbatim +*> IFAILL is INTEGER array, dimension (MM) +*> If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left +*> eigenvector in the i-th column of VL (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the +*> eigenvector converged satisfactorily. +*> If SIDE = 'R', IFAILL is not referenced. +*> \endverbatim +*> +*> \param[out] IFAILR +*> \verbatim +*> IFAILR is INTEGER array, dimension (MM) +*> If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right +*> eigenvector in the i-th column of VR (corresponding to the +*> eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the +*> eigenvector converged satisfactorily. +*> If SIDE = 'L', IFAILR is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, i is the number of eigenvectors which +*> failed to converge; see IFAILL and IFAILR for further +*> details. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x|+|y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL, + $ IFAILR, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EIGSRC, INITV, SIDE + INTEGER INFO, LDH, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IFAILL( * ), IFAILR( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ), + $ W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV + INTEGER I, IINFO, K, KL, KLN, KR, KS, LDWORK + DOUBLE PRECISION EPS3, HNORM, SMLNUM, ULP, UNFL + COMPLEX*16 CDUM, WK +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH, ZLANHS + EXTERNAL LSAME, DLAMCH, ZLANHS, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLAEIN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + FROMQR = LSAME( EIGSRC, 'Q' ) +* + NOINIT = LSAME( INITV, 'N' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -10 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -12 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHSEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set machine-dependent constants. +* + UNFL = DLAMCH( 'Safe minimum' ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* + LDWORK = N +* + KL = 1 + KLN = 0 + IF( FROMQR ) THEN + KR = 0 + ELSE + KR = N + END IF + KS = 1 +* + DO 100 K = 1, N + IF( SELECT( K ) ) THEN +* +* Compute eigenvector(s) corresponding to W(K). +* + IF( FROMQR ) THEN +* +* If affiliation of eigenvalues is known, check whether +* the matrix splits. +* +* Determine KL and KR such that 1 <= KL <= K <= KR <= N +* and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or +* KR = N). +* +* Then inverse iteration can be performed with the +* submatrix H(KL:N,KL:N) for a left eigenvector, and with +* the submatrix H(1:KR,1:KR) for a right eigenvector. +* + DO 20 I = K, KL + 1, -1 + IF( H( I, I-1 ).EQ.ZERO ) + $ GO TO 30 + 20 CONTINUE + 30 CONTINUE + KL = I + IF( K.GT.KR ) THEN + DO 40 I = K, N - 1 + IF( H( I+1, I ).EQ.ZERO ) + $ GO TO 50 + 40 CONTINUE + 50 CONTINUE + KR = I + END IF + END IF +* + IF( KL.NE.KLN ) THEN + KLN = KL +* +* Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it +* has not ben computed before. +* + HNORM = ZLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK ) + IF( DISNAN( HNORM ) ) THEN + INFO = -6 + RETURN + ELSE IF( HNORM.GT.RZERO ) THEN + EPS3 = HNORM*ULP + ELSE + EPS3 = SMLNUM + END IF + END IF +* +* Perturb eigenvalue if it is close to any previous +* selected eigenvalues affiliated to the submatrix +* H(KL:KR,KL:KR). Close roots are modified by EPS3. +* + WK = W( K ) + 60 CONTINUE + DO 70 I = K - 1, KL, -1 + IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN + WK = WK + EPS3 + GO TO 60 + END IF + 70 CONTINUE + W( K ) = WK +* + IF( LEFTV ) THEN +* +* Compute left eigenvector. +* + CALL ZLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH, + $ WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3, + $ SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILL( KS ) = K + ELSE + IFAILL( KS ) = 0 + END IF + DO 80 I = 1, KL - 1 + VL( I, KS ) = ZERO + 80 CONTINUE + END IF + IF( RIGHTV ) THEN +* +* Compute right eigenvector. +* + CALL ZLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ), + $ WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO ) + IF( IINFO.GT.0 ) THEN + INFO = INFO + 1 + IFAILR( KS ) = K + ELSE + IFAILR( KS ) = 0 + END IF + DO 90 I = KR + 1, N + VR( I, KS ) = ZERO + 90 CONTINUE + END IF + KS = KS + 1 + END IF + 100 CONTINUE +* + RETURN +* +* End of ZHSEIN +* + END diff --git a/dspl/liblapack/SRC/zhseqr.f b/dspl/liblapack/SRC/zhseqr.f new file mode 100644 index 0000000..1e8134c --- /dev/null +++ b/dspl/liblapack/SRC/zhseqr.f @@ -0,0 +1,498 @@ +*> \brief \b ZHSEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHSEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N +* CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHSEQR computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*T*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> = 'E': compute eigenvalues only; +*> = 'S': compute eigenvalues and the Schur form T. +*> \endverbatim +*> +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': no Schur vectors are computed; +*> = 'I': Z is initialized to the unit matrix and the matrix Z +*> of Schur vectors of H is returned; +*> = 'V': Z must contain an unitary matrix Q on entry, and +*> the product Q*Z is returned. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally +*> set by a previous call to ZGEBAL, and then passed to ZGEHRD +*> when the matrix output by ZGEBAL is reduced to Hessenberg +*> form. Otherwise ILO and IHI should be set to 1 and N +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and JOB = 'S', H contains the upper +*> triangular matrix T from the Schur decomposition (the +*> Schur form). If INFO = 0 and JOB = 'E', the contents of +*> H are unspecified on exit. (The output value of H when +*> INFO.GT.0 is given under the description of INFO below.) +*> +*> Unlike earlier versions of ZHSEQR, this subroutine may +*> explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1 +*> or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The computed eigenvalues. If JOB = 'S', the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> If COMPZ = 'N', Z is not referenced. +*> If COMPZ = 'I', on entry Z need not be set and on exit, +*> if INFO = 0, Z contains the unitary matrix Z of the Schur +*> vectors of H. If COMPZ = 'V', on entry Z must contain an +*> N-by-N matrix Q, which is assumed to be equal to the unit +*> matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit, +*> if INFO = 0, Z contains Q*Z. +*> Normally Q is the unitary matrix generated by ZUNGHR +*> after the call to ZGEHRD which formed the Hessenberg matrix +*> H. (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if COMPZ = 'I' or +*> COMPZ = 'V', then LDZ.GE.MAX(1,N). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient and delivers very good and sometimes +*> optimal performance. However, LWORK as large as 11*N +*> may be required for optimal performance. A workspace +*> query is recommended to determine the optimal workspace +*> size. +*> +*> If LWORK = -1, then ZHSEQR does a workspace query. +*> In this case, ZHSEQR checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .LT. 0: if INFO = -i, the i-th argument had an illegal +*> value +*> .GT. 0: if INFO = i, ZHSEQR failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and JOB = 'E', then on exit, the +*> remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and JOB = 'S', then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and COMPZ = 'V', then on exit +*> +*> (final value of Z) = (initial value of Z)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'I', then on exit +*> (final value of Z) = U +*> where U is the unitary matrix in (*) (regard- +*> less of the value of JOB.) +*> +*> If INFO .GT. 0 and COMPZ = 'N', then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Default values supplied by +*> ILAENV(ISPEC,'ZHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK). +*> It is suggested that these defaults be adjusted in order +*> to attain best performance in each particular +*> computational environment. +*> +*> ISPEC=12: The ZLAHQR vs ZLAQR0 crossover point. +*> Default: 75. (Must be at least 11.) +*> +*> ISPEC=13: Recommended deflation window size. +*> This depends on ILO, IHI and NS. NS is the +*> number of simultaneous shifts returned +*> by ILAENV(ISPEC=15). (See ISPEC=15 below.) +*> The default for (IHI-ILO+1).LE.500 is NS. +*> The default for (IHI-ILO+1).GT.500 is 3*NS/2. +*> +*> ISPEC=14: Nibble crossover point. (See IPARMQ for +*> details.) Default: 14% of deflation window +*> size. +*> +*> ISPEC=15: Number of simultaneous shifts in a multishift +*> QR iteration. +*> +*> If IHI-ILO+1 is ... +*> +*> greater than ...but less ... the +*> or equal to ... than default is +*> +*> 1 30 NS = 2(+) +*> 30 60 NS = 4(+) +*> 60 150 NS = 10(+) +*> 150 590 NS = ** +*> 590 3000 NS = 64 +*> 3000 6000 NS = 128 +*> 6000 infinity NS = 256 +*> +*> (+) By default some or all matrices of this order +*> are passed to the implicit double shift routine +*> ZLAHQR and this parameter is ignored. See +*> ISPEC=12 above and comments in IPARMQ for +*> details. +*> +*> (**) The asterisks (**) indicate an ad-hoc +*> function of N increasing from 10 to 64. +*> +*> ISPEC=16: Select structured matrix multiply. +*> If the number of simultaneous shifts (specified +*> by ISPEC=15) is less than 14, then the default +*> for ISPEC=16 is 0. Otherwise the default for +*> ISPEC=16 is 2. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +* +* ===================================================================== + SUBROUTINE ZHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDH, LDZ, LWORK, N + CHARACTER COMPZ, JOB +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . ZLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== NL allocates some local workspace to help small matrices +* . through a rare ZLAHQR failure. NL .GT. NTINY = 11 is +* . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom- +* . mended. (The default value of NMIN is 75.) Using NL = 49 +* . allows up to six simultaneous shifts and a 16-by-16 +* . deflation window. ==== + INTEGER NL + PARAMETER ( NL = 49 ) + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0d0 ) +* .. +* .. Local Arrays .. + COMPLEX*16 HL( NL, NL ), WORKL( NL ) +* .. +* .. Local Scalars .. + INTEGER KBOT, NMIN + LOGICAL INITZ, LQUERY, WANTT, WANTZ +* .. +* .. External Functions .. + INTEGER ILAENV + LOGICAL LSAME + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZLAHQR, ZLAQR0, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* ==== Decode and check the input parameters. ==== +* + WANTT = LSAME( JOB, 'S' ) + INITZ = LSAME( COMPZ, 'I' ) + WANTZ = INITZ .OR. LSAME( COMPZ, 'V' ) + WORK( 1 ) = DCMPLX( DBLE( MAX( 1, N ) ), RZERO ) + LQUERY = LWORK.EQ.-1 +* + INFO = 0 + IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -5 + ELSE IF( LDH.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.NE.0 ) THEN +* +* ==== Quick return in case of invalid argument. ==== +* + CALL XERBLA( 'ZHSEQR', -INFO ) + RETURN +* + ELSE IF( N.EQ.0 ) THEN +* +* ==== Quick return in case N = 0; nothing to do. ==== +* + RETURN +* + ELSE IF( LQUERY ) THEN +* +* ==== Quick return in case of a workspace query ==== +* + CALL ZLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z, + $ LDZ, WORK, LWORK, INFO ) +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== + WORK( 1 ) = DCMPLX( MAX( DBLE( WORK( 1 ) ), DBLE( MAX( 1, + $ N ) ) ), RZERO ) + RETURN +* + ELSE +* +* ==== copy eigenvalues isolated by ZGEBAL ==== +* + IF( ILO.GT.1 ) + $ CALL ZCOPY( ILO-1, H, LDH+1, W, 1 ) + IF( IHI.LT.N ) + $ CALL ZCOPY( N-IHI, H( IHI+1, IHI+1 ), LDH+1, W( IHI+1 ), 1 ) +* +* ==== Initialize Z, if requested ==== +* + IF( INITZ ) + $ CALL ZLASET( 'A', N, N, ZERO, ONE, Z, LDZ ) +* +* ==== Quick return if possible ==== +* + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== ZLAHQR/ZLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'ZHSEQR', JOB( : 1 ) // COMPZ( : 1 ), N, + $ ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== ZLAQR0 for big matrices; ZLAHQR for small ones ==== +* + IF( N.GT.NMIN ) THEN + CALL ZLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, WORK, LWORK, INFO ) + ELSE +* +* ==== Small matrix ==== +* + CALL ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, + $ Z, LDZ, INFO ) +* + IF( INFO.GT.0 ) THEN +* +* ==== A rare ZLAHQR failure! ZLAQR0 sometimes succeeds +* . when ZLAHQR fails. ==== +* + KBOT = INFO +* + IF( N.GE.NL ) THEN +* +* ==== Larger matrices have enough subdiagonal scratch +* . space to call ZLAQR0 directly. ==== +* + CALL ZLAQR0( WANTT, WANTZ, N, ILO, KBOT, H, LDH, W, + $ ILO, IHI, Z, LDZ, WORK, LWORK, INFO ) +* + ELSE +* +* ==== Tiny matrices don't have enough subdiagonal +* . scratch space to benefit from ZLAQR0. Hence, +* . tiny matrices must be copied into a larger +* . array before calling ZLAQR0. ==== +* + CALL ZLACPY( 'A', N, N, H, LDH, HL, NL ) + HL( N+1, N ) = ZERO + CALL ZLASET( 'A', NL, NL-N, ZERO, ZERO, HL( 1, N+1 ), + $ NL ) + CALL ZLAQR0( WANTT, WANTZ, NL, ILO, KBOT, HL, NL, W, + $ ILO, IHI, Z, LDZ, WORKL, NL, INFO ) + IF( WANTT .OR. INFO.NE.0 ) + $ CALL ZLACPY( 'A', N, N, HL, NL, H, LDH ) + END IF + END IF + END IF +* +* ==== Clear out the trash, if necessary. ==== +* + IF( ( WANTT .OR. INFO.NE.0 ) .AND. N.GT.2 ) + $ CALL ZLASET( 'L', N-2, N-2, ZERO, ZERO, H( 3, 1 ), LDH ) +* +* ==== Ensure reported workspace size is backward-compatible with +* . previous LAPACK versions. ==== +* + WORK( 1 ) = DCMPLX( MAX( DBLE( MAX( 1, N ) ), + $ DBLE( WORK( 1 ) ) ), RZERO ) + END IF +* +* ==== End of ZHSEQR ==== +* + END diff --git a/dspl/liblapack/SRC/zla_gbamv.f b/dspl/liblapack/SRC/zla_gbamv.f new file mode 100644 index 0000000..6ffaf8f --- /dev/null +++ b/dspl/liblapack/SRC/zla_gbamv.f @@ -0,0 +1,422 @@ +*> \brief \b ZLA_GBAMV performs a matrix-vector operation to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GBAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, +* INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ), X( * ) +* DOUBLE PRECISION Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GBAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension ( LDAB, n ) +*> Before entry, the leading m by n part of the array AB must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> On entry, LDAB specifies the first dimension of AB as declared +*> in the calling (sub) program. LDAB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZLA_GBAMV( TRANS, M, N, KL, KU, ALPHA, AB, LDAB, X, + $ INCX, BETA, Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDAB, M, N, KL, KU, TRANS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), X( * ) + DOUBLE PRECISION Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY, KD, KE + COMPLEX*16 CDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, REAL, DIMAG, SIGN +* .. +* .. Statement Functions + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( KL.LT.0 .OR. KL.GT.M-1 ) THEN + INFO = 4 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = 5 + ELSE IF( LDAB.LT.KL+KU+1 )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZLA_GBAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + KD = KU + 1 + KE = KL + 1 + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KD+I-J, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + JX = KX + DO J = MAX( I-KL, 1 ), MIN( I+KU, LENX ) + TEMP = CABS1( AB( KE-I+J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of ZLA_GBAMV +* + END diff --git a/dspl/liblapack/SRC/zla_gbrcond_c.f b/dspl/liblapack/SRC/zla_gbrcond_c.f new file mode 100644 index 0000000..2010912 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gbrcond_c.f @@ -0,0 +1,344 @@ +*> \brief \b ZLA_GBRCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for general banded matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GBRCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GBRCOND_C( TRANS, N, KL, KU, AB, +* LDAB, AFB, LDAFB, IPIV, +* C, CAPPLY, INFO, WORK, +* RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* LOGICAL CAPPLY +* INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ) +* DOUBLE PRECISION C( * ), RWORK( * ) +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GBRCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GBRCOND_C( TRANS, N, KL, KU, AB, + $ LDAB, AFB, LDAFB, IPIV, + $ C, CAPPLY, INFO, WORK, + $ RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + LOGICAL CAPPLY + INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ) + DOUBLE PRECISION C( * ), RWORK( * ) +* +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + ZLA_GBRCOND_C = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_GBRCOND_C', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + KD = KU + 1 + KE = KL + 1 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J ) ) / C( J ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) ) / C( J ) + END DO + ELSE + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_GBRCOND_C = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL ZGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL ZGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( NOTRANS ) THEN + CALL ZGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ELSE + CALL ZGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_GBRCOND_C = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_gbrcond_x.f b/dspl/liblapack/SRC/zla_gbrcond_x.f new file mode 100644 index 0000000..7e6c12e --- /dev/null +++ b/dspl/liblapack/SRC/zla_gbrcond_x.f @@ -0,0 +1,321 @@ +*> \brief \b ZLA_GBRCOND_X computes the infinity norm condition number of op(A)*diag(x) for general banded matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GBRCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GBRCOND_X( TRANS, N, KL, KU, AB, +* LDAB, AFB, LDAFB, IPIV, +* X, INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), +* $ X( * ) +* DOUBLE PRECISION RWORK( * ) +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GBRCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX*16 vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GBRCOND_X( TRANS, N, KL, KU, AB, + $ LDAB, AFB, LDAFB, IPIV, + $ X, INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, KL, KU, KD, KE, LDAB, LDAFB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), WORK( * ), + $ X( * ) + DOUBLE PRECISION RWORK( * ) +* +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZGBTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_GBRCOND_X = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME(TRANS, 'T') .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KL.LT.0 .OR. KL.GT.N-1 ) THEN + INFO = -3 + ELSE IF( KU.LT.0 .OR. KU.GT.N-1 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KL+KU+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_GBRCOND_X', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + KD = KU + 1 + KE = KL + 1 + ANORM = 0.0D+0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0D+0 + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KD+I-J, J) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + DO J = MAX( I-KL, 1 ), MIN( I+KU, N ) + TMP = TMP + CABS1( AB( KE-I+J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_GBRCOND_X = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL ZGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + ELSE + CALL ZGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL ZGBTRS( 'Conjugate transpose', N, KL, KU, 1, AFB, + $ LDAFB, IPIV, WORK, N, INFO ) + ELSE + CALL ZGBTRS( 'No transpose', N, KL, KU, 1, AFB, LDAFB, + $ IPIV, WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_GBRCOND_X = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_gbrfsx_extended.f b/dspl/liblapack/SRC/zla_gbrfsx_extended.f new file mode 100644 index 0000000..7a850f1 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gbrfsx_extended.f @@ -0,0 +1,713 @@ +*> \brief \b ZLA_GBRFSX_EXTENDED improves the computed solution to a system of linear equations for general banded matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GBRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, +* NRHS, AB, LDAB, AFB, LDAFB, IPIV, +* COLEQU, C, B, LDB, Y, LDY, +* BERR_OUT, N_NORMS, ERR_BNDS_NORM, +* ERR_BNDS_COMP, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, +* $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GBRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by ZGBRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0 +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= max(1,N). +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by ZGBTRF. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGBTRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by ZGBTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by ZLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX*16 array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to ZGBTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZLA_GBRFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, KL, KU, + $ NRHS, AB, LDAB, AFB, LDAFB, IPIV, + $ COLEQU, C, B, LDB, Y, LDY, + $ BERR_OUT, N_NORMS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDAB, LDAFB, LDB, LDY, N, KL, KU, NRHS, + $ PREC_TYPE, TRANS_TYPE, N_NORMS, ITHRESH + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB(*), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, M, X_STATE, Z_STATE, Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX*16 ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGBTRS, ZGBMV, BLAS_ZGBMV_X, + $ BLAS_ZGBMV2_X, ZLA_GBAMV, ZLA_WWADDW, DLAMCH, + $ CHLA_TRANSTYPE, ZLA_LIN_BERR + DOUBLE PRECISION DLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions.. + INTRINSIC ABS, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS + M = KL+KU+1 + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL ZGBMV( TRANS, M, N, KL, KU, (-1.0D+0,0.0D+0), AB, + $ LDAB, Y( 1, J ), 1, (1.0D+0,0.0D+0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_ZGBMV_X( TRANS_TYPE, N, N, KL, KU, + $ (-1.0D+0,0.0D+0), AB, LDAB, Y( 1, J ), 1, + $ (1.0D+0,0.0D+0), RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_ZGBMV2_X( TRANS_TYPE, N, N, KL, KU, + $ (-1.0D+0,0.0D+0), AB, LDAB, Y( 1, J ), Y_TAIL, 1, + $ (1.0D+0,0.0D+0), RES, 1, PREC_TYPE ) + END IF + +! XXX: RES is no longer needed. + CALL ZCOPY( N, RES, 1, DY, 1 ) + CALL ZGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, DY, N, + $ INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF (YK .NE. 0.0D+0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX(NORMDX, DYK * C(I)) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL ZAXPY( N, (1.0D+0,0.0D+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL ZLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL ZGBMV( TRANS, N, N, KL, KU, (-1.0D+0,0.0D+0), AB, LDAB, + $ Y(1,J), 1, (1.0D+0,0.0D+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL ZLA_GBAMV( TRANS_TYPE, N, N, KL, KU, 1.0D+0, + $ AB, LDAB, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL ZLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/zla_gbrpvgrw.f b/dspl/liblapack/SRC/zla_gbrpvgrw.f new file mode 100644 index 0000000..b2ae02b --- /dev/null +++ b/dspl/liblapack/SRC/zla_gbrpvgrw.f @@ -0,0 +1,167 @@ +*> \brief \b ZLA_GBRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a general banded matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GBRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GBRPVGRW( N, KL, KU, NCOLS, AB, +* LDAB, AFB, LDAFB ) +* +* .. Scalar Arguments .. +* INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GBRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl) +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> Details of the LU factorization of the band matrix A, as +*> computed by ZGBTRF. U is stored as an upper triangular +*> band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, +*> and the multipliers used during the factorization are stored +*> in rows KL+KU+2 to 2*KL+KU+1. +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= 2*KL+KU+1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GBRPVGRW( N, KL, KU, NCOLS, AB, + $ LDAB, AFB, LDAFB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N, KL, KU, NCOLS, LDAB, LDAFB +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J, KD + DOUBLE PRECISION AMAX, UMAX, RPVGRW + COMPLEX*16 ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0D+0 + + KD = KU + 1 + DO J = 1, NCOLS + AMAX = 0.0D+0 + UMAX = 0.0D+0 + DO I = MAX( J-KU, 1 ), MIN( J+KL, N ) + AMAX = MAX( CABS1( AB( KD+I-J, J ) ), AMAX ) + END DO + DO I = MAX( J-KU, 1 ), J + UMAX = MAX( CABS1( AFB( KD+I-J, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ZLA_GBRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/zla_geamv.f b/dspl/liblapack/SRC/zla_geamv.f new file mode 100644 index 0000000..221e0c3 --- /dev/null +++ b/dspl/liblapack/SRC/zla_geamv.f @@ -0,0 +1,406 @@ +*> \brief \b ZLA_GEAMV computes a matrix-vector product using a general matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, +* Y, INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDA, M, N +* INTEGER TRANS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), X( * ) +* DOUBLE PRECISION Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GEAMV performs one of the matrix-vector operations +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> or y := alpha*abs(A)**T*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is INTEGER +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> BLAS_NO_TRANS y := alpha*abs(A)*abs(x) + beta*abs(y) +*> BLAS_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> BLAS_CONJ_TRANS y := alpha*abs(A**T)*abs(x) + beta*abs(y) +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, n ) +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> +*> Level 2 Blas routine. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZLA_GEAMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, + $ Y, INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, M, N + INTEGER TRANS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) + DOUBLE PRECISION Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY, LENX, LENY + COMPLEX*16 CDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILATRANS + INTEGER ILATRANS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, REAL, DIMAG, SIGN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT.( ( TRANS.EQ.ILATRANS( 'N' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'T' ) ) + $ .OR. ( TRANS.EQ.ILATRANS( 'C' ) ) ) ) THEN + INFO = 1 + ELSE IF( M.LT.0 )THEN + INFO = 2 + ELSE IF( N.LT.0 )THEN + INFO = 3 + ELSE IF( LDA.LT.MAX( 1, M ) )THEN + INFO = 6 + ELSE IF( INCX.EQ.0 )THEN + INFO = 8 + ELSE IF( INCY.EQ.0 )THEN + INFO = 11 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZLA_GEAMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR. + $ ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( LENX - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( LENY - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(M*N) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + DO J = 1, LENX + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + DO J = 1, LENX + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF( TRANS.EQ.ILATRANS( 'N' ) )THEN + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + JX = KX + DO J = 1, LENX + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, LENY + IF ( BETA .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. 0.0D+0 ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. 0.0D+0 ) THEN + JX = KX + DO J = 1, LENX + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( JX ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) Y( IY ) = + $ Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of ZLA_GEAMV +* + END diff --git a/dspl/liblapack/SRC/zla_gercond_c.f b/dspl/liblapack/SRC/zla_gercond_c.f new file mode 100644 index 0000000..e629f90 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gercond_c.f @@ -0,0 +1,318 @@ +*> \brief \b ZLA_GERCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for general matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GERCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GERCOND_C( TRANS, N, A, LDA, AF, +* LDAF, IPIV, C, CAPPLY, +* INFO, WORK, RWORK ) +* +* .. Scalar Aguments .. +* CHARACTER TRANS +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) +* DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GERCOND_C computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GERCOND_C( TRANS, N, A, LDA, AF, + $ LDAF, IPIV, C, CAPPLY, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Aguments .. + CHARACTER TRANS + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) + DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + ZLA_GERCOND_C = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_GERCOND_C', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_GERCOND_C = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF (NOTRANS) THEN + CALL ZGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( NOTRANS ) THEN + CALL ZGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_GERCOND_C = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_gercond_x.f b/dspl/liblapack/SRC/zla_gercond_x.f new file mode 100644 index 0000000..244bf58 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gercond_x.f @@ -0,0 +1,294 @@ +*> \brief \b ZLA_GERCOND_X computes the infinity norm condition number of op(A)*diag(x) for general matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GERCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GERCOND_X( TRANS, N, A, LDA, AF, +* LDAF, IPIV, X, INFO, +* WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* DOUBLE PRECISION RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GERCOND_X computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX*16 vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate Transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GERCOND_X( TRANS, N, A, LDA, AF, + $ LDAF, IPIV, X, INFO, + $ WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + DOUBLE PRECISION RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL NOTRANS + INTEGER KASE + DOUBLE PRECISION AINVNM, ANORM, TMP + INTEGER I, J + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_GERCOND_X = 0.0D+0 +* + INFO = 0 + NOTRANS = LSAME( TRANS, 'N' ) + IF ( .NOT. NOTRANS .AND. .NOT. LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_GERCOND_X', -INFO ) + RETURN + END IF +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( NOTRANS ) THEN + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_GERCOND_X = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* Multiply by R. + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL ZGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( NOTRANS ) THEN + CALL ZGETRS( 'Conjugate transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZGETRS( 'No transpose', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_GERCOND_X = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_gerfsx_extended.f b/dspl/liblapack/SRC/zla_gerfsx_extended.f new file mode 100644 index 0000000..2e93e26 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gerfsx_extended.f @@ -0,0 +1,697 @@ +*> \brief \b ZLA_GERFSX_EXTENDED +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, +* LDA, AF, LDAF, IPIV, COLEQU, C, B, +* LDB, Y, LDY, BERR_OUT, N_NORMS, +* ERRS_N, ERRS_C, RES, AYB, DY, +* Y_TAIL, RCOND, ITHRESH, RTHRESH, +* DZ_UB, IGNORE_CWISE, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ TRANS_TYPE, N_NORMS +* LOGICAL COLEQU, IGNORE_CWISE +* INTEGER ITHRESH +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_GERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by ZGERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERRS_N +*> and ERRS_C for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERRS_N and ERRS_C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] TRANS_TYPE +*> \verbatim +*> TRANS_TYPE is INTEGER +*> Specifies the transposition operation on A. +*> The value is defined by ILATRANS(T) where T is a CHARACTER and +*> T = 'N': No transpose +*> = 'T': Transpose +*> = 'C': Conjugate transpose +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from the factorization A = P*L*U +*> as computed by ZGETRF; row i of the matrix was interchanged +*> with row IPIV(i). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by ZGETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by ZLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERRS_N +*> and ERRS_C). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERRS_N +*> \verbatim +*> ERRS_N is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERRS_N(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_N(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERRS_C +*> \verbatim +*> ERRS_C is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERRS_C is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERRS_C(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERRS_C(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX*16 array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERRS_N and ERRS_C may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to ZGETRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZLA_GERFSX_EXTENDED( PREC_TYPE, TRANS_TYPE, N, NRHS, A, + $ LDA, AF, LDAF, IPIV, COLEQU, C, B, + $ LDB, Y, LDY, BERR_OUT, N_NORMS, + $ ERRS_N, ERRS_C, RES, AYB, DY, + $ Y_TAIL, RCOND, ITHRESH, RTHRESH, + $ DZ_UB, IGNORE_CWISE, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ TRANS_TYPE, N_NORMS + LOGICAL COLEQU, IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERRS_N( NRHS, * ), ERRS_C( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + CHARACTER TRANS + INTEGER CNT, I, J, X_STATE, Z_STATE, Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX*16 ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, + $ NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGETRS, ZGEMV, BLAS_ZGEMV_X, + $ BLAS_ZGEMV2_X, ZLA_GEAMV, ZLA_WWADDW, DLAMCH, + $ CHLA_TRANSTYPE, ZLA_LIN_BERR + DOUBLE PRECISION DLAMCH + CHARACTER CHLA_TRANSTYPE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF ( INFO.NE.0 ) RETURN + TRANS = CHLA_TRANSTYPE(TRANS_TYPE) + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS +* + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL ZGEMV( TRANS, N, N, (-1.0D+0,0.0D+0), A, LDA, + $ Y( 1, J ), 1, (1.0D+0,0.0D+0), RES, 1) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_ZGEMV_X( TRANS_TYPE, N, N, (-1.0D+0,0.0D+0), A, + $ LDA, Y( 1, J ), 1, (1.0D+0,0.0D+0), + $ RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_ZGEMV2_X( TRANS_TYPE, N, N, (-1.0D+0,0.0D+0), + $ A, LDA, Y(1, J), Y_TAIL, 1, (1.0D+0,0.0D+0), RES, 1, + $ PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL ZCOPY( N, RES, 1, DY, 1 ) + CALL ZGETRS( TRANS, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL +* + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria +* + IF (.NOT.IGNORE_CWISE + $ .AND. YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF (DX_X .LE. EPS) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF ( DXRAT .GT. DXRATMAX ) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF +* +* Exit if both normwise and componentwise stopped working, +* but if componentwise is unstable, let it go at least two +* iterations. +* + IF ( X_STATE.NE.WORKING_STATE ) THEN + IF ( IGNORE_CWISE ) GOTO 666 + IF ( Z_STATE.EQ.NOPROG_STATE .OR. Z_STATE.EQ.CONV_STATE ) + $ GOTO 666 + IF ( Z_STATE.EQ.UNSTABLE_STATE .AND. CNT.GT.1 ) GOTO 666 + END IF + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL ZAXPY( N, (1.0D+0,0.0D+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL ZLA_WWADDW( N, Y( 1, J ), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds +* + IF (N_NORMS .GE. 1) THEN + ERRS_N( J, LA_LINRX_ERR_I ) = FINAL_DX_X / (1 - DXRATMAX) + + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERRS_C( J, LA_LINRX_ERR_I ) = FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL ZGEMV( TRANS, N, N, (-1.0D+0,0.0D+0), A, LDA, Y(1,J), 1, + $ (1.0D+0,0.0D+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL ZLA_GEAMV ( TRANS_TYPE, N, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL ZLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/zla_gerpvgrw.f b/dspl/liblapack/SRC/zla_gerpvgrw.f new file mode 100644 index 0000000..a5f5174 --- /dev/null +++ b/dspl/liblapack/SRC/zla_gerpvgrw.f @@ -0,0 +1,149 @@ +*> \brief \b ZLA_GERPVGRW multiplies a square real matrix by a complex matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_GERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_GERPVGRW( N, NCOLS, A, LDA, AF, +* LDAF ) +* +* .. Scalar Arguments .. +* INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> ZLA_GERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factors L and U from the factorization +*> A = P*L*U as computed by ZGETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_GERPVGRW( N, NCOLS, A, LDA, AF, + $ LDAF ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER N, NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION AMAX, UMAX, RPVGRW + COMPLEX*16 ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, ABS, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + RPVGRW = 1.0D+0 + + DO J = 1, NCOLS + AMAX = 0.0D+0 + UMAX = 0.0D+0 + DO I = 1, N + AMAX = MAX( CABS1( A( I, J ) ), AMAX ) + END DO + DO I = 1, J + UMAX = MAX( CABS1( AF( I, J ) ), UMAX ) + END DO + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ZLA_GERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/zla_heamv.f b/dspl/liblapack/SRC/zla_heamv.f new file mode 100644 index 0000000..cd536ef --- /dev/null +++ b/dspl/liblapack/SRC/zla_heamv.f @@ -0,0 +1,426 @@ +*> \brief \b ZLA_HEAMV computes a matrix-vector product using a Hermitian indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_HEAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), X( * ) +* DOUBLE PRECISION Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLA_HEAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, N, UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) + DOUBLE PRECISION Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE ( ZDUM ) ) + ABS( DIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'ZHEMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF (.NOT.SYMB_ZERO) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of ZLA_HEAMV +* + END diff --git a/dspl/liblapack/SRC/zla_hercond_c.f b/dspl/liblapack/SRC/zla_hercond_c.f new file mode 100644 index 0000000..61cfe95 --- /dev/null +++ b/dspl/liblapack/SRC/zla_hercond_c.f @@ -0,0 +1,329 @@ +*> \brief \b ZLA_HERCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for Hermitian indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_HERCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_HERCOND_C( UPLO, N, A, LDA, AF, +* LDAF, IPIV, C, CAPPLY, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) +* DOUBLE PRECISION C ( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_HERCOND_C computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_HERCOND_C( UPLO, N, A, LDA, AF, + $ LDAF, IPIV, C, CAPPLY, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) + DOUBLE PRECISION C ( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZHETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_HERCOND_C = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_HERCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_HERCOND_C = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL ZHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_HERCOND_C = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_hercond_x.f b/dspl/liblapack/SRC/zla_hercond_x.f new file mode 100644 index 0000000..9c19b48 --- /dev/null +++ b/dspl/liblapack/SRC/zla_hercond_x.f @@ -0,0 +1,299 @@ +*> \brief \b ZLA_HERCOND_X computes the infinity norm condition number of op(A)*diag(x) for Hermitian indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_HERCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_HERCOND_X( UPLO, N, A, LDA, AF, +* LDAF, IPIV, X, INFO, +* WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* DOUBLE PRECISION RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_HERCOND_X computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX*16 vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by CHETRF. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_HERCOND_X( UPLO, N, A, LDA, AF, + $ LDAF, IPIV, X, INFO, + $ WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + DOUBLE PRECISION RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZHETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_HERCOND_X = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF ( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_HERCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_HERCOND_X = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL ZHETRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZHETRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_HERCOND_X = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_herfsx_extended.f b/dspl/liblapack/SRC/zla_herfsx_extended.f new file mode 100644 index 0000000..5b43a58 --- /dev/null +++ b/dspl/liblapack/SRC/zla_herfsx_extended.f @@ -0,0 +1,717 @@ +*> \brief \b ZLA_HERFSX_EXTENDED improves the computed solution to a system of linear equations for Hermitian indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_HERFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_HERFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by ZHERFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by ZHETRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by ZLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX*16 array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to ZLA_HERFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZLA_HERFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER + COMPLEX*16 ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZHETRS, ZHEMV, BLAS_ZHEMV_X, + $ BLAS_ZHEMV2_X, ZLA_HEAMV, ZLA_WWADDW, + $ ZLA_LIN_BERR + DOUBLE PRECISION DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_HERFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL ZHEMV( UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y( 1, J ), + $ 1, DCMPLX(1.0D+0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_ZHEMV_X( UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y( 1, J ), 1, DCMPLX(1.0D+0), RES, 1, PREC_TYPE) + ELSE + CALL BLAS_ZHEMV2_X(UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y(1, J), Y_TAIL, 1, DCMPLX(1.0D+0), RES, 1, + $ PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL ZCOPY( N, RES, 1, DY, 1 ) + CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF (YK .NE. 0.0D+0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL ZAXPY( N, DCMPLX(1.0D+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL ZLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL ZHEMV( UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y(1,J), 1, + $ DCMPLX(1.0D+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL ZLA_HEAMV( UPLO2, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL ZLA_LIN_BERR( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/zla_herpvgrw.f b/dspl/liblapack/SRC/zla_herpvgrw.f new file mode 100644 index 0000000..557d6e8 --- /dev/null +++ b/dspl/liblapack/SRC/zla_herpvgrw.f @@ -0,0 +1,330 @@ +*> \brief \b ZLA_HERPVGRW +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_HERPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, +* LDAF, IPIV, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ) +* DOUBLE PRECISION WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> ZLA_HERPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from ZHETRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZHETRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZHETRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_HERPVGRW( UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ) + DOUBLE PRECISION WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + DOUBLE PRECISION AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER, LSAME + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE ( ZDUM ) ) + ABS( DIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF (UPPER) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0D+0 + DO I = 1, 2*N + WORK( I ) = 0.0D+0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( CABS1( A( I,J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I,J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from zsytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = + $ MAX( CABS1( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = + $ MAX( CABS1( AF( I, K+1 ) ) , WORK( K+1 ) ) + END DO + WORK(K) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + ZLA_HERPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/zla_lin_berr.f b/dspl/liblapack/SRC/zla_lin_berr.f new file mode 100644 index 0000000..91be519 --- /dev/null +++ b/dspl/liblapack/SRC/zla_lin_berr.f @@ -0,0 +1,160 @@ +*> \brief \b ZLA_LIN_BERR computes a component-wise relative backward error. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_LIN_BERR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* .. Scalar Arguments .. +* INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION AYB( N, NRHS ), BERR( NRHS ) +* COMPLEX*16 RES( N, NRHS ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_LIN_BERR computes componentwise relative backward error from +*> the formula +*> max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NZ +*> \verbatim +*> NZ is INTEGER +*> We add (NZ+1)*SLAMCH( 'Safe minimum' ) to R(i) in the numerator to +*> guard against spuriously zero residuals. Default value is N. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices AYB, RES, and BERR. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N,NRHS) +*> The residual matrix, i.e., the matrix R in the relative backward +*> error formula above. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N, NRHS) +*> The denominator in the relative backward error formula above, i.e., +*> the matrix abs(op(A_s))*abs(Y) + abs(B_s). The matrices A, Y, and B +*> are from iterative refinement (see zla_gerfsx_extended.f). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error from the formula above. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLA_LIN_BERR ( N, NZ, NRHS, RES, AYB, BERR ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER N, NZ, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION AYB( N, NRHS ), BERR( NRHS ) + COMPLEX*16 RES( N, NRHS ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION TMP + INTEGER I, J + COMPLEX*16 CDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, DIMAG, MAX +* .. +* .. External Functions .. + EXTERNAL DLAMCH + DOUBLE PRECISION DLAMCH + DOUBLE PRECISION SAFE1 +* .. +* .. Statement Functions .. + COMPLEX*16 CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Adding SAFE1 to the numerator guards against spuriously zero +* residuals. A similar safeguard is in the CLA_yyAMV routine used +* to compute AYB. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (NZ+1)*SAFE1 + + DO J = 1, NRHS + BERR(J) = 0.0D+0 + DO I = 1, N + IF (AYB(I,J) .NE. 0.0D+0) THEN + TMP = (SAFE1 + CABS1(RES(I,J)))/AYB(I,J) + BERR(J) = MAX( BERR(J), TMP ) + END IF +* +* If AYB is exactly 0.0 (and if computed by CLA_yyAMV), then we know +* the true residual also must be exactly 0.0. +* + END DO + END DO + END diff --git a/dspl/liblapack/SRC/zla_porcond_c.f b/dspl/liblapack/SRC/zla_porcond_c.f new file mode 100644 index 0000000..a74295b --- /dev/null +++ b/dspl/liblapack/SRC/zla_porcond_c.f @@ -0,0 +1,321 @@ +*> \brief \b ZLA_PORCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for Hermitian positive-definite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_PORCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_PORCOND_C( UPLO, N, A, LDA, AF, +* LDAF, C, CAPPLY, INFO, +* WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) +* DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_PORCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_PORCOND_C( UPLO, N, A, LDA, AF, + $ LDAF, C, CAPPLY, INFO, + $ WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) + DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + DOUBLE PRECISION AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_PORCOND_C = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_PORCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_PORCOND_C = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL ZPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**H). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL ZPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL ZPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_PORCOND_C = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_porcond_x.f b/dspl/liblapack/SRC/zla_porcond_x.f new file mode 100644 index 0000000..0b2c84f --- /dev/null +++ b/dspl/liblapack/SRC/zla_porcond_x.f @@ -0,0 +1,290 @@ +*> \brief \b ZLA_PORCOND_X computes the infinity norm condition number of op(A)*diag(x) for Hermitian positive-definite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_PORCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_PORCOND_X( UPLO, N, A, LDA, AF, +* LDAF, X, INFO, WORK, +* RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* DOUBLE PRECISION RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_PORCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX*16 vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_PORCOND_X( UPLO, N, A, LDA, AF, + $ LDAF, X, INFO, WORK, + $ RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + DOUBLE PRECISION RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE, I, J + DOUBLE PRECISION AINVNM, ANORM, TMP + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZPOTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_PORCOND_X = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF ( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_PORCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_PORCOND_X = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL ZPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**H). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL ZPOTRS( 'U', N, 1, AF, LDAF, + $ WORK, N, INFO ) + ELSE + CALL ZPOTRS( 'L', N, 1, AF, LDAF, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_PORCOND_X = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_porfsx_extended.f b/dspl/liblapack/SRC/zla_porfsx_extended.f new file mode 100644 index 0000000..85dd427 --- /dev/null +++ b/dspl/liblapack/SRC/zla_porfsx_extended.f @@ -0,0 +1,688 @@ +*> \brief \b ZLA_PORFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric or Hermitian positive-definite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_PORFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, COLEQU, C, B, LDB, Y, +* LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_PORFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by ZPORFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by ZPOTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by ZLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX*16 PRECISION array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX*16 array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to ZPOTRS had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, COLEQU, C, B, LDB, Y, + $ LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC + COMPLEX*16 ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZPOTRS, ZHEMV, BLAS_ZHEMV_X, + $ BLAS_ZHEMV2_X, ZLA_HEAMV, ZLA_WWADDW, + $ ZLA_LIN_BERR, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + IF (INFO.NE.0) RETURN + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE(N) * EPS + + IF (LSAME (UPLO, 'L')) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF (Y_PREC_STATE .EQ. EXTRA_Y) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + IF (Y_PREC_STATE .EQ. BASE_RESIDUAL) THEN + CALL ZHEMV(UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y(1,J), 1, + $ DCMPLX(1.0D+0), RES, 1) + ELSE IF (Y_PREC_STATE .EQ. EXTRA_RESIDUAL) THEN + CALL BLAS_ZHEMV_X(UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y( 1, J ), 1, DCMPLX(1.0D+0), RES, 1, PREC_TYPE) + ELSE + CALL BLAS_ZHEMV2_X(UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y(1, J), Y_TAIL, 1, DCMPLX(1.0D+0), RES, 1, + $ PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL ZCOPY( N, RES, 1, DY, 1 ) + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, DY, N, INFO) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1(Y(I, J)) + DYK = CABS1(DY(I)) + + IF (YK .NE. 0.0D+0) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF (DYK .NE. 0.0D+0) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX(NORMX, YK * C(I)) + NORMDX = MAX(NORMDX, DYK * C(I)) + ELSE + NORMX = NORMY + NORMDX = MAX(NORMDX, DYK) + END IF + END DO + + IF (NORMX .NE. 0.0D+0) THEN + DX_X = NORMDX / NORMX + ELSE IF (NORMDX .EQ. 0.0D+0) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF (YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y) + $ INCR_PREC = .TRUE. + + IF (X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH) + $ X_STATE = WORKING_STATE + IF (X_STATE .EQ. WORKING_STATE) THEN + IF (DX_X .LE. EPS) THEN + X_STATE = CONV_STATE + ELSE IF (DXRAT .GT. RTHRESH) THEN + IF (Y_PREC_STATE .NE. EXTRA_Y) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF (X_STATE .GT. WORKING_STATE) FINAL_DX_X = DX_X + END IF + + IF (Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB) + $ Z_STATE = WORKING_STATE + IF (Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH) + $ Z_STATE = WORKING_STATE + IF (Z_STATE .EQ. WORKING_STATE) THEN + IF (DZ_Z .LE. EPS) THEN + Z_STATE = CONV_STATE + ELSE IF (DZ_Z .GT. DZ_UB) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF (DZRAT .GT. RTHRESH) THEN + IF (Y_PREC_STATE .NE. EXTRA_Y) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF (DZRAT .GT. DZRATMAX) DZRATMAX = DZRAT + END IF + IF (Z_STATE .GT. WORKING_STATE) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ (IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE) ) + $ GOTO 666 + + IF (INCR_PREC) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF (Y_PREC_STATE .LT. EXTRA_Y) THEN + CALL ZAXPY( N, DCMPLX(1.0D+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL ZLA_WWADDW(N, Y(1,J), Y_TAIL, DY) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF (X_STATE .EQ. WORKING_STATE) FINAL_DX_X = DX_X + IF (Z_STATE .EQ. WORKING_STATE) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF (N_NORMS .GE. 1) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF (N_NORMS .GE. 2) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL ZHEMV(UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y(1,J), 1, + $ DCMPLX(1.0D+0), RES, 1) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL ZLA_HEAMV (UPLO2, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1) + + CALL ZLA_LIN_BERR (N, N, 1, RES, AYB, BERR_OUT(J)) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/zla_porpvgrw.f b/dspl/liblapack/SRC/zla_porpvgrw.f new file mode 100644 index 0000000..cd71635 --- /dev/null +++ b/dspl/liblapack/SRC/zla_porpvgrw.f @@ -0,0 +1,218 @@ +*> \brief \b ZLA_PORPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric or Hermitian positive-definite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_PORPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, +* LDAF, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ) +* DOUBLE PRECISION WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> ZLA_PORPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] NCOLS +*> \verbatim +*> NCOLS is INTEGER +*> The number of columns of the matrix A. NCOLS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_PORPVGRW( UPLO, NCOLS, A, LDA, AF, + $ LDAF, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER NCOLS, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ) + DOUBLE PRECISION WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION AMAX, UMAX, RPVGRW + LOGICAL UPPER + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. + UPPER = LSAME( 'Upper', UPLO ) +* +* DPOTRF will have factored only the NCOLSxNCOLS leading minor, so +* we restrict the growth search to that minor and use only the first +* 2*NCOLS workspace entries. +* + RPVGRW = 1.0D+0 + DO I = 1, 2*NCOLS + WORK( I ) = 0.0D+0 + END DO +* +* Find the max magnitude entry of each column. +* + IF ( UPPER ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( NCOLS+J ) = + $ MAX( CABS1( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( NCOLS+J ) = + $ MAX( CABS1( A( I, J ) ), WORK( NCOLS+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of the factor in +* AF. No pivoting, so no permutations. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO J = 1, NCOLS + DO I = 1, J + WORK( J ) = MAX( CABS1( AF( I, J ) ), WORK( J ) ) + END DO + END DO + ELSE + DO J = 1, NCOLS + DO I = J, NCOLS + WORK( J ) = MAX( CABS1( AF( I, J ) ), WORK( J ) ) + END DO + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( LSAME( 'Upper', UPLO ) ) THEN + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( NCOLS+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + ZLA_PORPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/zla_syamv.f b/dspl/liblapack/SRC/zla_syamv.f new file mode 100644 index 0000000..02958be --- /dev/null +++ b/dspl/liblapack/SRC/zla_syamv.f @@ -0,0 +1,428 @@ +*> \brief \b ZLA_SYAMV computes a matrix-vector product using a symmetric indefinite matrix to calculate error bounds. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_SYAMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, +* INCY ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION ALPHA, BETA +* INTEGER INCX, INCY, LDA, N +* INTEGER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), X( * ) +* DOUBLE PRECISION Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_SYAMV performs the matrix-vector operation +*> +*> y := alpha*abs(A)*abs(x) + beta*abs(y), +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> n by n symmetric matrix. +*> +*> This function is primarily used in calculating error bounds. +*> To protect against underflow during evaluation, components in +*> the resulting vector are perturbed away from zero by (N+1) +*> times the underflow threshold. To prevent unnecessarily large +*> errors for block-structure embedded in general matrices, +*> "symbolically" zero components are not perturbed. A zero +*> entry is considered "symbolic" if all multiplications involved +*> in computing that entry have at least one zero multiplicand. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is INTEGER +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = BLAS_UPPER Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = BLAS_LOWER Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION . +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ) +*> Before entry, the incremented array X must contain the +*> vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION . +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is DOUBLE PRECISION array, dimension +*> ( 1 + ( n - 1 )*abs( INCY ) ) +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> -- Modified for the absolute-value product, April 2006 +*> Jason Riedy, UC Berkeley +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLA_SYAMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, + $ INCY ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + DOUBLE PRECISION ALPHA, BETA + INTEGER INCX, INCY, LDA, N + INTEGER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) + DOUBLE PRECISION Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL SYMB_ZERO + DOUBLE PRECISION TEMP, SAFE1 + INTEGER I, INFO, IY, J, JX, KX, KY + COMPLEX*16 ZDUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, DLAMCH + DOUBLE PRECISION DLAMCH +* .. +* .. External Functions .. + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, ABS, SIGN, REAL, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE ( ZDUM ) ) + ABS( DIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( UPLO.NE.ILAUPLO( 'U' ) .AND. + $ UPLO.NE.ILAUPLO( 'L' ) )THEN + INFO = 1 + ELSE IF( N.LT.0 )THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) )THEN + INFO = 5 + ELSE IF( INCX.EQ.0 )THEN + INFO = 7 + ELSE IF( INCY.EQ.0 )THEN + INFO = 10 + END IF + IF( INFO.NE.0 )THEN + CALL XERBLA( 'DSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 )THEN + KX = 1 + ELSE + KX = 1 - ( N - 1 )*INCX + END IF + IF( INCY.GT.0 )THEN + KY = 1 + ELSE + KY = 1 - ( N - 1 )*INCY + END IF +* +* Set SAFE1 essentially to be the underflow threshold times the +* number of additions in each row. +* + SAFE1 = DLAMCH( 'Safe minimum' ) + SAFE1 = (N+1)*SAFE1 +* +* Form y := alpha*abs(A)*abs(x) + beta*abs(y). +* +* The O(N^2) SYMB_ZERO tests could be replaced by O(N) queries to +* the inexact flag. Still doesn't help change the iteration order +* to per-column. +* + IY = KY + IF ( INCX.EQ.1 ) THEN + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( J ) )*TEMP + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + ELSE + IF ( UPLO .EQ. ILAUPLO( 'U' ) ) THEN + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + ELSE + DO I = 1, N + IF ( BETA .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + Y( IY ) = 0.0D+0 + ELSE IF ( Y( IY ) .EQ. ZERO ) THEN + SYMB_ZERO = .TRUE. + ELSE + SYMB_ZERO = .FALSE. + Y( IY ) = BETA * ABS( Y( IY ) ) + END IF + JX = KX + IF ( ALPHA .NE. ZERO ) THEN + DO J = 1, I + TEMP = CABS1( A( I, J ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + DO J = I+1, N + TEMP = CABS1( A( J, I ) ) + SYMB_ZERO = SYMB_ZERO .AND. + $ ( X( J ) .EQ. ZERO .OR. TEMP .EQ. ZERO ) + + Y( IY ) = Y( IY ) + ALPHA*CABS1( X( JX ) )*TEMP + JX = JX + INCX + END DO + END IF + + IF ( .NOT.SYMB_ZERO ) + $ Y( IY ) = Y( IY ) + SIGN( SAFE1, Y( IY ) ) + + IY = IY + INCY + END DO + END IF + + END IF +* + RETURN +* +* End of ZLA_SYAMV +* + END diff --git a/dspl/liblapack/SRC/zla_syrcond_c.f b/dspl/liblapack/SRC/zla_syrcond_c.f new file mode 100644 index 0000000..be9d14b --- /dev/null +++ b/dspl/liblapack/SRC/zla_syrcond_c.f @@ -0,0 +1,330 @@ +*> \brief \b ZLA_SYRCOND_C computes the infinity norm condition number of op(A)*inv(diag(c)) for symmetric indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_SYRCOND_C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_SYRCOND_C( UPLO, N, A, LDA, AF, +* LDAF, IPIV, C, CAPPLY, +* INFO, WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* LOGICAL CAPPLY +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) +* DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_SYRCOND_C Computes the infinity norm condition number of +*> op(A) * inv(diag(C)) where C is a DOUBLE PRECISION vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The vector C in the formula op(A) * inv(diag(C)). +*> \endverbatim +*> +*> \param[in] CAPPLY +*> \verbatim +*> CAPPLY is LOGICAL +*> If .TRUE. then access the vector C in the formula above. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_SYRCOND_C( UPLO, N, A, LDA, AF, + $ LDAF, IPIV, C, CAPPLY, + $ INFO, WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + LOGICAL CAPPLY + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ) + DOUBLE PRECISION C( * ), RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + DOUBLE PRECISION AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_SYRCOND_C = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_SYRCOND_C', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + IF ( CAPPLY ) THEN + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) / C( J ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) / C( J ) + END DO + ELSE + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) ) + END DO + END IF + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_SYRCOND_C = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(C). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF + ELSE +* +* Multiply by inv(C**T). +* + IF ( CAPPLY ) THEN + DO I = 1, N + WORK( I ) = WORK( I ) * C( I ) + END DO + END IF +* + IF ( UP ) THEN + CALL ZSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_SYRCOND_C = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_syrcond_x.f b/dspl/liblapack/SRC/zla_syrcond_x.f new file mode 100644 index 0000000..2d02690 --- /dev/null +++ b/dspl/liblapack/SRC/zla_syrcond_x.f @@ -0,0 +1,300 @@ +*> \brief \b ZLA_SYRCOND_X computes the infinity norm condition number of op(A)*diag(x) for symmetric indefinite matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_SYRCOND_X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_SYRCOND_X( UPLO, N, A, LDA, AF, +* LDAF, IPIV, X, INFO, +* WORK, RWORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) +* DOUBLE PRECISION RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_SYRCOND_X Computes the infinity norm condition number of +*> op(A) * diag(X) where X is a COMPLEX*16 vector. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector X in the formula op(A) * diag(X). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> i > 0: The ith argument is invalid. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N). +*> Workspace. +*> \endverbatim +*> +*> \param[in] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N). +*> Workspace. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_SYRCOND_X( UPLO, N, A, LDA, AF, + $ LDAF, IPIV, X, INFO, + $ WORK, RWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LDAF, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), WORK( * ), X( * ) + DOUBLE PRECISION RWORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER KASE + DOUBLE PRECISION AINVNM, ANORM, TMP + INTEGER I, J + LOGICAL UP, UPPER + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZSYTRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + ZLA_SYRCOND_X = 0.0D+0 +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_SYRCOND_X', -INFO ) + RETURN + END IF + UP = .FALSE. + IF ( LSAME( UPLO, 'U' ) ) UP = .TRUE. +* +* Compute norm of op(A)*op2(C). +* + ANORM = 0.0D+0 + IF ( UP ) THEN + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + ELSE + DO I = 1, N + TMP = 0.0D+0 + DO J = 1, I + TMP = TMP + CABS1( A( I, J ) * X( J ) ) + END DO + DO J = I+1, N + TMP = TMP + CABS1( A( J, I ) * X( J ) ) + END DO + RWORK( I ) = TMP + ANORM = MAX( ANORM, TMP ) + END DO + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + ZLA_SYRCOND_X = 1.0D+0 + RETURN + ELSE IF( ANORM .EQ. 0.0D+0 ) THEN + RETURN + END IF +* +* Estimate the norm of inv(op(A)). +* + AINVNM = 0.0D+0 +* + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.2 ) THEN +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO +* + IF ( UP ) THEN + CALL ZSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ENDIF +* +* Multiply by inv(X). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO + ELSE +* +* Multiply by inv(X**T). +* + DO I = 1, N + WORK( I ) = WORK( I ) / X( I ) + END DO +* + IF ( UP ) THEN + CALL ZSYTRS( 'U', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + ELSE + CALL ZSYTRS( 'L', N, 1, AF, LDAF, IPIV, + $ WORK, N, INFO ) + END IF +* +* Multiply by R. +* + DO I = 1, N + WORK( I ) = WORK( I ) * RWORK( I ) + END DO + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM .NE. 0.0D+0 ) + $ ZLA_SYRCOND_X = 1.0D+0 / AINVNM +* + RETURN +* + END diff --git a/dspl/liblapack/SRC/zla_syrfsx_extended.f b/dspl/liblapack/SRC/zla_syrfsx_extended.f new file mode 100644 index 0000000..a9716fd --- /dev/null +++ b/dspl/liblapack/SRC/zla_syrfsx_extended.f @@ -0,0 +1,717 @@ +*> \brief \b ZLA_SYRFSX_EXTENDED improves the computed solution to a system of linear equations for symmetric indefinite matrices by performing extra-precise iterative refinement and provides error bounds and backward error estimates for the solution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_SYRFSX_EXTENDED + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, +* AF, LDAF, IPIV, COLEQU, C, B, LDB, +* Y, LDY, BERR_OUT, N_NORMS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, RES, +* AYB, DY, Y_TAIL, RCOND, ITHRESH, +* RTHRESH, DZ_UB, IGNORE_CWISE, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, +* $ N_NORMS, ITHRESH +* CHARACTER UPLO +* LOGICAL COLEQU, IGNORE_CWISE +* DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) +* DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_SYRFSX_EXTENDED improves the computed solution to a system of +*> linear equations by performing extra-precise iterative refinement +*> and provides error bounds and backward error estimates for the solution. +*> This subroutine is called by ZSYRFSX to perform iterative refinement. +*> In addition to normwise error bound, the code provides maximum +*> componentwise error bound if possible. See comments for ERR_BNDS_NORM +*> and ERR_BNDS_COMP for details of the error bounds. Note that this +*> subroutine is only resonsible for setting the second fields of +*> ERR_BNDS_NORM and ERR_BNDS_COMP. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] PREC_TYPE +*> \verbatim +*> PREC_TYPE is INTEGER +*> Specifies the intermediate precision to be used in refinement. +*> The value is defined by ILAPREC(P) where P is a CHARACTER and +*> P = 'S': Single +*> = 'D': Double +*> = 'I': Indigenous +*> = 'X', 'E': Extra +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand-sides, i.e., the number of columns of the +*> matrix B. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] COLEQU +*> \verbatim +*> COLEQU is LOGICAL +*> If .TRUE. then column equilibration was done to A before calling +*> this routine. This is needed to compute the solution and error +*> bounds correctly. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. If COLEQU = .FALSE., C +*> is not accessed. If C is input, each element of C should be a power +*> of the radix to ensure a reliable solution and error estimates. +*> Scaling by powers of the radix does not cause rounding errors unless +*> the result underflows or overflows. Rounding errors during scaling +*> lead to refining with a matrix that is not equivalent to the +*> input matrix, producing error estimates that may not be +*> reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right-hand-side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NRHS) +*> On entry, the solution matrix X, as computed by ZSYTRS. +*> On exit, the improved solution matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +*> +*> \param[out] BERR_OUT +*> \verbatim +*> BERR_OUT is DOUBLE PRECISION array, dimension (NRHS) +*> On exit, BERR_OUT(j) contains the componentwise relative backward +*> error for right-hand-side j from the formula +*> max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +*> where abs(Z) is the componentwise absolute value of the matrix +*> or vector Z. This is computed by ZLA_LIN_BERR. +*> \endverbatim +*> +*> \param[in] N_NORMS +*> \verbatim +*> N_NORMS is INTEGER +*> Determines which error bounds to return (see ERR_BNDS_NORM +*> and ERR_BNDS_COMP). +*> If N_NORMS >= 1 return normwise error bounds. +*> If N_NORMS >= 2 return componentwise error bounds. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in,out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * slamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * slamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * slamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> This subroutine is only responsible for setting the second field +*> above. +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] RES +*> \verbatim +*> RES is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate residual. +*> \endverbatim +*> +*> \param[in] AYB +*> \verbatim +*> AYB is DOUBLE PRECISION array, dimension (N) +*> Workspace. +*> \endverbatim +*> +*> \param[in] DY +*> \verbatim +*> DY is COMPLEX*16 array, dimension (N) +*> Workspace to hold the intermediate solution. +*> \endverbatim +*> +*> \param[in] Y_TAIL +*> \verbatim +*> Y_TAIL is COMPLEX*16 array, dimension (N) +*> Workspace to hold the trailing bits of the intermediate solution. +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[in] ITHRESH +*> \verbatim +*> ITHRESH is INTEGER +*> The maximum number of residual computations allowed for +*> refinement. The default is 10. For 'aggressive' set to 100 to +*> permit convergence using approximate factorizations or +*> factorizations other than LU. If the factorization uses a +*> technique other than Gaussian elimination, the guarantees in +*> ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. +*> \endverbatim +*> +*> \param[in] RTHRESH +*> \verbatim +*> RTHRESH is DOUBLE PRECISION +*> Determines when to stop refinement if the error estimate stops +*> decreasing. Refinement will stop when the next solution no longer +*> satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is +*> the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The +*> default value is 0.5. For 'aggressive' set to 0.9 to permit +*> convergence on extremely ill-conditioned matrices. See LAWN 165 +*> for more details. +*> \endverbatim +*> +*> \param[in] DZ_UB +*> \verbatim +*> DZ_UB is DOUBLE PRECISION +*> Determines when to start considering componentwise convergence. +*> Componentwise convergence is only considered after each component +*> of the solution Y is stable, which we definte as the relative +*> change in each component being less than DZ_UB. The default value +*> is 0.25, requiring the first bit to be stable. See LAWN 165 for +*> more details. +*> \endverbatim +*> +*> \param[in] IGNORE_CWISE +*> \verbatim +*> IGNORE_CWISE is LOGICAL +*> If .TRUE. then ignore componentwise convergence. Default value +*> is .FALSE.. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. +*> < 0: if INFO = -i, the ith argument to ZLA_HERFSX_EXTENDED had an illegal +*> value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, NRHS, A, LDA, + $ AF, LDAF, IPIV, COLEQU, C, B, LDB, + $ Y, LDY, BERR_OUT, N_NORMS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, RES, + $ AYB, DY, Y_TAIL, RCOND, ITHRESH, + $ RTHRESH, DZ_UB, IGNORE_CWISE, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDAF, LDB, LDY, N, NRHS, PREC_TYPE, + $ N_NORMS, ITHRESH + CHARACTER UPLO + LOGICAL COLEQU, IGNORE_CWISE + DOUBLE PRECISION RTHRESH, DZ_UB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ Y( LDY, * ), RES( * ), DY( * ), Y_TAIL( * ) + DOUBLE PRECISION C( * ), AYB( * ), RCOND, BERR_OUT( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER UPLO2, CNT, I, J, X_STATE, Z_STATE, + $ Y_PREC_STATE + DOUBLE PRECISION YK, DYK, YMIN, NORMY, NORMX, NORMDX, DXRAT, + $ DZRAT, PREVNORMDX, PREV_DZ_Z, DXRATMAX, + $ DZRATMAX, DX_X, DZ_Z, FINAL_DX_X, FINAL_DZ_Z, + $ EPS, HUGEVAL, INCR_THRESH + LOGICAL INCR_PREC, UPPER + COMPLEX*16 ZDUM +* .. +* .. Parameters .. + INTEGER UNSTABLE_STATE, WORKING_STATE, CONV_STATE, + $ NOPROG_STATE, BASE_RESIDUAL, EXTRA_RESIDUAL, + $ EXTRA_Y + PARAMETER ( UNSTABLE_STATE = 0, WORKING_STATE = 1, + $ CONV_STATE = 2, NOPROG_STATE = 3 ) + PARAMETER ( BASE_RESIDUAL = 0, EXTRA_RESIDUAL = 1, + $ EXTRA_Y = 2 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL ILAUPLO + INTEGER ILAUPLO +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZSYTRS, ZSYMV, BLAS_ZSYMV_X, + $ BLAS_ZSYMV2_X, ZLA_SYAMV, ZLA_WWADDW, + $ ZLA_LIN_BERR + DOUBLE PRECISION DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDY.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLA_HERFSX_EXTENDED', -INFO ) + RETURN + END IF + EPS = DLAMCH( 'Epsilon' ) + HUGEVAL = DLAMCH( 'Overflow' ) +* Force HUGEVAL to Inf + HUGEVAL = HUGEVAL * HUGEVAL +* Using HUGEVAL may lead to spurious underflows. + INCR_THRESH = DBLE( N ) * EPS + + IF ( LSAME ( UPLO, 'L' ) ) THEN + UPLO2 = ILAUPLO( 'L' ) + ELSE + UPLO2 = ILAUPLO( 'U' ) + ENDIF + + DO J = 1, NRHS + Y_PREC_STATE = EXTRA_RESIDUAL + IF ( Y_PREC_STATE .EQ. EXTRA_Y ) THEN + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + DXRAT = 0.0D+0 + DXRATMAX = 0.0D+0 + DZRAT = 0.0D+0 + DZRATMAX = 0.0D+0 + FINAL_DX_X = HUGEVAL + FINAL_DZ_Z = HUGEVAL + PREVNORMDX = HUGEVAL + PREV_DZ_Z = HUGEVAL + DZ_Z = HUGEVAL + DX_X = HUGEVAL + + X_STATE = WORKING_STATE + Z_STATE = UNSTABLE_STATE + INCR_PREC = .FALSE. + + DO CNT = 1, ITHRESH +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + IF ( Y_PREC_STATE .EQ. BASE_RESIDUAL ) THEN + CALL ZSYMV( UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y(1,J), 1, + $ DCMPLX(1.0D+0), RES, 1 ) + ELSE IF ( Y_PREC_STATE .EQ. EXTRA_RESIDUAL ) THEN + CALL BLAS_ZSYMV_X( UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y( 1, J ), 1, DCMPLX(1.0D+0), RES, 1, PREC_TYPE ) + ELSE + CALL BLAS_ZSYMV2_X(UPLO2, N, DCMPLX(-1.0D+0), A, LDA, + $ Y(1, J), Y_TAIL, 1, DCMPLX(1.0D+0), RES, 1, + $ PREC_TYPE) + END IF + +! XXX: RES is no longer needed. + CALL ZCOPY( N, RES, 1, DY, 1 ) + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, DY, N, INFO ) +* +* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. +* + NORMX = 0.0D+0 + NORMY = 0.0D+0 + NORMDX = 0.0D+0 + DZ_Z = 0.0D+0 + YMIN = HUGEVAL + + DO I = 1, N + YK = CABS1( Y( I, J ) ) + DYK = CABS1( DY( I ) ) + + IF ( YK .NE. 0.0D+0 ) THEN + DZ_Z = MAX( DZ_Z, DYK / YK ) + ELSE IF ( DYK .NE. 0.0D+0 ) THEN + DZ_Z = HUGEVAL + END IF + + YMIN = MIN( YMIN, YK ) + + NORMY = MAX( NORMY, YK ) + + IF ( COLEQU ) THEN + NORMX = MAX( NORMX, YK * C( I ) ) + NORMDX = MAX( NORMDX, DYK * C( I ) ) + ELSE + NORMX = NORMY + NORMDX = MAX( NORMDX, DYK ) + END IF + END DO + + IF ( NORMX .NE. 0.0D+0 ) THEN + DX_X = NORMDX / NORMX + ELSE IF ( NORMDX .EQ. 0.0D+0 ) THEN + DX_X = 0.0D+0 + ELSE + DX_X = HUGEVAL + END IF + + DXRAT = NORMDX / PREVNORMDX + DZRAT = DZ_Z / PREV_DZ_Z +* +* Check termination criteria. +* + IF ( YMIN*RCOND .LT. INCR_THRESH*NORMY + $ .AND. Y_PREC_STATE .LT. EXTRA_Y ) + $ INCR_PREC = .TRUE. + + IF ( X_STATE .EQ. NOPROG_STATE .AND. DXRAT .LE. RTHRESH ) + $ X_STATE = WORKING_STATE + IF ( X_STATE .EQ. WORKING_STATE ) THEN + IF ( DX_X .LE. EPS ) THEN + X_STATE = CONV_STATE + ELSE IF ( DXRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + X_STATE = NOPROG_STATE + END IF + ELSE + IF (DXRAT .GT. DXRATMAX) DXRATMAX = DXRAT + END IF + IF ( X_STATE .GT. WORKING_STATE ) FINAL_DX_X = DX_X + END IF + + IF ( Z_STATE .EQ. UNSTABLE_STATE .AND. DZ_Z .LE. DZ_UB ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. NOPROG_STATE .AND. DZRAT .LE. RTHRESH ) + $ Z_STATE = WORKING_STATE + IF ( Z_STATE .EQ. WORKING_STATE ) THEN + IF ( DZ_Z .LE. EPS ) THEN + Z_STATE = CONV_STATE + ELSE IF ( DZ_Z .GT. DZ_UB ) THEN + Z_STATE = UNSTABLE_STATE + DZRATMAX = 0.0D+0 + FINAL_DZ_Z = HUGEVAL + ELSE IF ( DZRAT .GT. RTHRESH ) THEN + IF ( Y_PREC_STATE .NE. EXTRA_Y ) THEN + INCR_PREC = .TRUE. + ELSE + Z_STATE = NOPROG_STATE + END IF + ELSE + IF ( DZRAT .GT. DZRATMAX ) DZRATMAX = DZRAT + END IF + IF ( Z_STATE .GT. WORKING_STATE ) FINAL_DZ_Z = DZ_Z + END IF + + IF ( X_STATE.NE.WORKING_STATE.AND. + $ ( IGNORE_CWISE.OR.Z_STATE.NE.WORKING_STATE ) ) + $ GOTO 666 + + IF ( INCR_PREC ) THEN + INCR_PREC = .FALSE. + Y_PREC_STATE = Y_PREC_STATE + 1 + DO I = 1, N + Y_TAIL( I ) = 0.0D+0 + END DO + END IF + + PREVNORMDX = NORMDX + PREV_DZ_Z = DZ_Z +* +* Update soluton. +* + IF ( Y_PREC_STATE .LT. EXTRA_Y ) THEN + CALL ZAXPY( N, DCMPLX(1.0D+0), DY, 1, Y(1,J), 1 ) + ELSE + CALL ZLA_WWADDW( N, Y(1,J), Y_TAIL, DY ) + END IF + + END DO +* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. + 666 CONTINUE +* +* Set final_* when cnt hits ithresh. +* + IF ( X_STATE .EQ. WORKING_STATE ) FINAL_DX_X = DX_X + IF ( Z_STATE .EQ. WORKING_STATE ) FINAL_DZ_Z = DZ_Z +* +* Compute error bounds. +* + IF ( N_NORMS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = + $ FINAL_DX_X / (1 - DXRATMAX) + END IF + IF ( N_NORMS .GE. 2 ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = + $ FINAL_DZ_Z / (1 - DZRATMAX) + END IF +* +* Compute componentwise relative backward error from formula +* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. +* +* Compute residual RES = B_s - op(A_s) * Y, +* op(A) = A, A**T, or A**H depending on TRANS (and type). +* + CALL ZCOPY( N, B( 1, J ), 1, RES, 1 ) + CALL ZSYMV( UPLO, N, DCMPLX(-1.0D+0), A, LDA, Y(1,J), 1, + $ DCMPLX(1.0D+0), RES, 1 ) + + DO I = 1, N + AYB( I ) = CABS1( B( I, J ) ) + END DO +* +* Compute abs(op(A_s))*abs(Y) + abs(B_s). +* + CALL ZLA_SYAMV ( UPLO2, N, 1.0D+0, + $ A, LDA, Y(1, J), 1, 1.0D+0, AYB, 1 ) + + CALL ZLA_LIN_BERR ( N, N, 1, RES, AYB, BERR_OUT( J ) ) +* +* End of loop for each RHS. +* + END DO +* + RETURN + END diff --git a/dspl/liblapack/SRC/zla_syrpvgrw.f b/dspl/liblapack/SRC/zla_syrpvgrw.f new file mode 100644 index 0000000..ccf4fc2 --- /dev/null +++ b/dspl/liblapack/SRC/zla_syrpvgrw.f @@ -0,0 +1,331 @@ +*> \brief \b ZLA_SYRPVGRW computes the reciprocal pivot growth factor norm(A)/norm(U) for a symmetric indefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_SYRPVGRW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, +* LDAF, IPIV, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER*1 UPLO +* INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ) +* DOUBLE PRECISION WORK( * ) +* INTEGER IPIV( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> +*> ZLA_SYRPVGRW computes the reciprocal pivot growth factor +*> norm(A)/norm(U). The "max absolute element" norm is used. If this is +*> much less than 1, the stability of the LU factorization of the +*> (equilibrated) matrix A could be poor. This also means that the +*> solution X, estimated condition numbers, and error bounds could be +*> unreliable. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] INFO +*> \verbatim +*> INFO is INTEGER +*> The value of INFO returned from ZSYTRF, .i.e., the pivot in +*> column INFO is exactly 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER*1 UPLO + INTEGER N, INFO, LDA, LDAF +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ) + DOUBLE PRECISION WORK( * ) + INTEGER IPIV( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER NCOLS, I, J, K, KP + DOUBLE PRECISION AMAX, UMAX, RPVGRW, TMP + LOGICAL UPPER + COMPLEX*16 ZDUM +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, REAL, DIMAG, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL LSAME + LOGICAL LSAME +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE ( ZDUM ) ) + ABS( DIMAG ( ZDUM ) ) +* .. +* .. Executable Statements .. +* + UPPER = LSAME( 'Upper', UPLO ) + IF ( INFO.EQ.0 ) THEN + IF ( UPPER ) THEN + NCOLS = 1 + ELSE + NCOLS = N + END IF + ELSE + NCOLS = INFO + END IF + + RPVGRW = 1.0D+0 + DO I = 1, 2*N + WORK( I ) = 0.0D+0 + END DO +* +* Find the max magnitude entry of each column of A. Compute the max +* for all N columns so we can apply the pivot permutation while +* looping below. Assume a full factorization is the common case. +* + IF ( UPPER ) THEN + DO J = 1, N + DO I = 1, J + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + ELSE + DO J = 1, N + DO I = J, N + WORK( N+I ) = MAX( CABS1( A( I, J ) ), WORK( N+I ) ) + WORK( N+J ) = MAX( CABS1( A( I, J ) ), WORK( N+J ) ) + END DO + END DO + END IF +* +* Now find the max magnitude entry of each column of U or L. Also +* permute the magnitudes of A above so they're in the same order as +* the factor. +* +* The iteration orders and permutations were copied from zsytrs. +* Calls to SSWAP would be severe overkill. +* + IF ( UPPER ) THEN + K = N + DO WHILE ( K .LT. NCOLS .AND. K.GT.0 ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = 1, K + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K - 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K-1 ) + WORK( N+K-1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = 1, K-1 + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K-1 ) = + $ MAX( CABS1( AF( I, K-1 ) ), WORK( K-1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K - 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .LE. N ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K + 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K + 2 + END IF + END DO + ELSE + K = 1 + DO WHILE ( K .LE. NCOLS ) + IF ( IPIV( K ).GT.0 ) THEN +! 1x1 pivot + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + DO I = K, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + END DO + K = K + 1 + ELSE +! 2x2 pivot + KP = -IPIV( K ) + TMP = WORK( N+K+1 ) + WORK( N+K+1 ) = WORK( N+KP ) + WORK( N+KP ) = TMP + DO I = K+1, N + WORK( K ) = MAX( CABS1( AF( I, K ) ), WORK( K ) ) + WORK( K+1 ) = + $ MAX( CABS1( AF( I, K+1 ) ), WORK( K+1 ) ) + END DO + WORK( K ) = MAX( CABS1( AF( K, K ) ), WORK( K ) ) + K = K + 2 + END IF + END DO + K = NCOLS + DO WHILE ( K .GE. 1 ) + IF ( IPIV( K ).GT.0 ) THEN + KP = IPIV( K ) + IF ( KP .NE. K ) THEN + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + END IF + K = K - 1 + ELSE + KP = -IPIV( K ) + TMP = WORK( N+K ) + WORK( N+K ) = WORK( N+KP ) + WORK( N+KP ) = TMP + K = K - 2 + ENDIF + END DO + END IF +* +* Compute the *inverse* of the max element growth factor. Dividing +* by zero would imply the largest entry of the factor's column is +* zero. Than can happen when either the column of A is zero or +* massive pivots made the factor underflow to zero. Neither counts +* as growth in itself, so simply ignore terms with zero +* denominators. +* + IF ( UPPER ) THEN + DO I = NCOLS, N + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + ELSE + DO I = 1, NCOLS + UMAX = WORK( I ) + AMAX = WORK( N+I ) + IF ( UMAX /= 0.0D+0 ) THEN + RPVGRW = MIN( AMAX / UMAX, RPVGRW ) + END IF + END DO + END IF + + ZLA_SYRPVGRW = RPVGRW + END diff --git a/dspl/liblapack/SRC/zla_wwaddw.f b/dspl/liblapack/SRC/zla_wwaddw.f new file mode 100644 index 0000000..b4f9df3 --- /dev/null +++ b/dspl/liblapack/SRC/zla_wwaddw.f @@ -0,0 +1,110 @@ +*> \brief \b ZLA_WWADDW adds a vector into a doubled-single vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLA_WWADDW + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLA_WWADDW( N, X, Y, W ) +* +* .. Scalar Arguments .. +* INTEGER N +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ), Y( * ), W( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLA_WWADDW adds a vector W into a doubled-single vector (X, Y). +*> +*> This works for all extant IBM's hex and binary floating point +*> arithmetics, but not for decimal. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of vectors X, Y, and W. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The first part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (N) +*> The second part of the doubled-single accumulation vector. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The vector to be added. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLA_WWADDW( N, X, Y, W ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ), Y( * ), W( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + COMPLEX*16 S + INTEGER I +* .. +* .. Executable Statements .. + DO 10 I = 1, N + S = X(I) + W(I) + S = (S + S) - S + Y(I) = ((X(I) - S) + W(I)) + Y(I) + X(I) = S + 10 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/zlabrd.f b/dspl/liblapack/SRC/zlabrd.f new file mode 100644 index 0000000..9078409 --- /dev/null +++ b/dspl/liblapack/SRC/zlabrd.f @@ -0,0 +1,420 @@ +*> \brief \b ZLABRD reduces the first nb rows and columns of a general matrix to a bidiagonal form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLABRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, +* LDY ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), +* $ Y( LDY, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLABRD reduces the first NB rows and columns of a complex general +*> m by n matrix A to upper or lower real bidiagonal form by a unitary +*> transformation Q**H * A * P, and returns the matrices X and Y which +*> are needed to apply the transformation to the unreduced part of A. +*> +*> If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower +*> bidiagonal form. +*> +*> This is an auxiliary routine called by ZGEBRD +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of leading rows and columns of A to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n general matrix to be reduced. +*> On exit, the first NB rows and columns of the matrix are +*> overwritten; the rest of the array is unchanged. +*> If m >= n, elements on and below the diagonal in the first NB +*> columns, with the array TAUQ, represent the unitary +*> matrix Q as a product of elementary reflectors; and +*> elements above the diagonal in the first NB rows, with the +*> array TAUP, represent the unitary matrix P as a product +*> of elementary reflectors. +*> If m < n, elements below the diagonal in the first NB +*> columns, with the array TAUQ, represent the unitary +*> matrix Q as a product of elementary reflectors, and +*> elements on and above the diagonal in the first NB rows, +*> with the array TAUP, represent the unitary matrix P as +*> a product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (NB) +*> The diagonal elements of the first NB rows and columns of +*> the reduced matrix. D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (NB) +*> The off-diagonal elements of the first NB rows and columns of +*> the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAUQ +*> \verbatim +*> TAUQ is COMPLEX*16 array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix Q. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP +*> \verbatim +*> TAUP is COMPLEX*16 array, dimension (NB) +*> The scalar factors of the elementary reflectors which +*> represent the unitary matrix P. See Further Details. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NB) +*> The m-by-nb matrix X required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,M). +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NB) +*> The n-by-nb matrix Y required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrices Q and P are represented as products of elementary +*> reflectors: +*> +*> Q = H(1) H(2) . . . H(nb) and P = G(1) G(2) . . . G(nb) +*> +*> Each H(i) and G(i) has the form: +*> +*> H(i) = I - tauq * v * v**H and G(i) = I - taup * u * u**H +*> +*> where tauq and taup are complex scalars, and v and u are complex +*> vectors. +*> +*> If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in +*> A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in +*> A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in +*> A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). +*> +*> The elements of the vectors v and u together form the m-by-nb matrix +*> V and the nb-by-n matrix U**H which are needed, with X and Y, to apply +*> the transformation to the unreduced part of the matrix, using a block +*> update of the form: A := A - V*Y**H - X*U**H. +*> +*> The contents of A on exit are illustrated by the following examples +*> with nb = 2: +*> +*> m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n): +*> +*> ( 1 1 u1 u1 u1 ) ( 1 u1 u1 u1 u1 u1 ) +*> ( v1 1 1 u2 u2 ) ( 1 1 u2 u2 u2 u2 ) +*> ( v1 v2 a a a ) ( v1 1 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) ( v1 v2 a a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix which is unchanged, +*> vi denotes an element of the vector defining H(i), and ui an element +*> of the vector defining G(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y, + $ LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER LDA, LDX, LDY, M, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ), + $ Y( LDY, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLACGV, ZLARFG, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( M.GE.N ) THEN +* +* Reduce to upper bidiagonal form +* + DO 10 I = 1, NB +* +* Update A(i:m,i) +* + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+1:m,i) +* + ALPHA = A( I, I ) + CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1, + $ TAUQ( I ) ) + D( I ) = ALPHA + IF( I.LT.N ) THEN + A( I, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL ZGEMV( 'Conjugate transpose', M-I+1, N-I, ONE, + $ A( I, I+1 ), LDA, A( I, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ A( I, 1 ), LDA, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE, + $ X( I, 1 ), LDX, A( I, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) +* +* Update A(i,i+1:n) +* + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZLACGV( I, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA ) + CALL ZLACGV( I, A( I, 1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE, + $ A( I, I+1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+2:n) +* + ALPHA = A( I, I+1 ) + CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA, + $ TAUP( I ) ) + E( I ) = ALPHA + A( I, I+1 ) = ONE +* +* Compute X(i+1:m,i) +* + CALL ZGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I, ONE, + $ Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Reduce to lower bidiagonal form +* + DO 20 I = 1, NB +* +* Update A(i,i:n) +* + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ), + $ LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE, + $ A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ), + $ LDA ) + CALL ZLACGV( I-1, X( I, 1 ), LDX ) +* +* Generate reflection P(i) to annihilate A(i,i+1:n) +* + ALPHA = A( I, I ) + CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA, + $ TAUP( I ) ) + D( I ) = ALPHA + IF( I.LT.M ) THEN + A( I, I ) = ONE +* +* Compute X(i+1:m,i) +* + CALL ZGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I+1, I-1, ONE, + $ Y( I, 1 ), LDY, A( I, I ), LDA, ZERO, + $ X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ), + $ LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ), + $ LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 ) + CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 ) + CALL ZLACGV( N-I+1, A( I, I ), LDA ) +* +* Update A(i+1:m,i) +* + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 ) + CALL ZLACGV( I-1, Y( I, 1 ), LDY ) + CALL ZGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ), + $ LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 ) +* +* Generate reflection Q(i) to annihilate A(i+2:m,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1, + $ TAUQ( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute Y(i+1:n,i) +* + CALL ZGEMV( 'Conjugate transpose', M-I, N-I, ONE, + $ A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ), + $ LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', M-I, I, ONE, + $ X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO, + $ Y( 1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I, N-I, -ONE, + $ A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE, + $ Y( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 ) + ELSE + CALL ZLACGV( N-I+1, A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZLABRD +* + END diff --git a/dspl/liblapack/SRC/zlacgv.f b/dspl/liblapack/SRC/zlacgv.f new file mode 100644 index 0000000..1e3ca6e --- /dev/null +++ b/dspl/liblapack/SRC/zlacgv.f @@ -0,0 +1,116 @@ +*> \brief \b ZLACGV conjugates a complex vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACGV( N, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACGV conjugates a complex vector of length N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vector X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-1)*abs(INCX)) +*> On entry, the vector of length N to be conjugated. +*> On exit, X is overwritten with conjg(X). +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive elements of X. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = DCONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = DCONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of ZLACGV +* + END diff --git a/dspl/liblapack/SRC/zlacn2.f b/dspl/liblapack/SRC/zlacn2.f new file mode 100644 index 0000000..9d92773 --- /dev/null +++ b/dspl/liblapack/SRC/zlacn2.f @@ -0,0 +1,298 @@ +*> \brief \b ZLACN2 estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACN2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACN2( N, V, X, EST, KASE, ISAVE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* DOUBLE PRECISION EST +* .. +* .. Array Arguments .. +* INTEGER ISAVE( 3 ) +* COMPLEX*16 V( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACN2 estimates the 1-norm of a square, complex matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**H * X, if KASE=2, +*> where A**H is the conjugate transpose of A, and ZLACN2 must be +*> re-called with all the other parameters unchanged. +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is DOUBLE PRECISION +*> On entry with KASE = 1 or 2 and ISAVE(1) = 3, EST should be +*> unchanged from the previous call to ZLACN2. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to ZLACN2, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**H * X. +*> On the final return from ZLACN2, KASE will again be 0. +*> \endverbatim +*> +*> \param[in,out] ISAVE +*> \verbatim +*> ISAVE is INTEGER array, dimension (3) +*> ISAVE is used to save variables between calls to ZLACN2 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Originally named CONEST, dated March 16, 1988. +*> +*> Last modified: April, 1999 +*> +*> This is a thread safe version of ZLACON, which uses the array ISAVE +*> in place of a SAVE statement, as follows: +*> +*> ZLACON ZLACN2 +*> JUMP ISAVE(1) +*> J ISAVE(2) +*> ITER ISAVE(3) +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE ZLACN2( N, V, X, EST, KASE, ISAVE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + INTEGER ISAVE( 3 ) + COMPLEX*16 V( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, JLAST + DOUBLE PRECISION ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER IZMAX1 + DOUBLE PRECISION DLAMCH, DZSUM1 + EXTERNAL IZMAX1, DLAMCH, DZSUM1 +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = DCMPLX( ONE / DBLE( N ) ) + 10 CONTINUE + KASE = 1 + ISAVE( 1 ) = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )ISAVE( 1 ) +* +* ................ ENTRY (ISAVE( 1 ) = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = DZSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + ISAVE( 1 ) = 2 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + ISAVE( 2 ) = IZMAX1( N, X, 1 ) + ISAVE( 3 ) = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( ISAVE( 2 ) ) = CONE + KASE = 1 + ISAVE( 1 ) = 3 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL ZCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DZSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + ISAVE( 1 ) = 4 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = ISAVE( 2 ) + ISAVE( 2 ) = IZMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( ISAVE( 2 ) ) ) ) .AND. + $ ( ISAVE( 3 ).LT.ITMAX ) ) THEN + ISAVE( 3 ) = ISAVE( 3 ) + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = DCMPLX( ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + ISAVE( 1 ) = 5 + RETURN +* +* ................ ENTRY (ISAVE( 1 ) = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( DZSUM1( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL ZCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of ZLACN2 +* + END diff --git a/dspl/liblapack/SRC/zlacon.f b/dspl/liblapack/SRC/zlacon.f new file mode 100644 index 0000000..c7c2f5f --- /dev/null +++ b/dspl/liblapack/SRC/zlacon.f @@ -0,0 +1,281 @@ +*> \brief \b ZLACON estimates the 1-norm of a square matrix, using reverse communication for evaluating matrix-vector products. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACON( N, V, X, EST, KASE ) +* +* .. Scalar Arguments .. +* INTEGER KASE, N +* DOUBLE PRECISION EST +* .. +* .. Array Arguments .. +* COMPLEX*16 V( N ), X( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACON estimates the 1-norm of a square, complex matrix A. +*> Reverse communication is used for evaluating matrix-vector products. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 1. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (N) +*> On the final return, V = A*W, where EST = norm(V)/norm(W) +*> (W is not returned). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On an intermediate return, X should be overwritten by +*> A * X, if KASE=1, +*> A**H * X, if KASE=2, +*> where A**H is the conjugate transpose of A, and ZLACON must be +*> re-called with all the other parameters unchanged. +*> \endverbatim +*> +*> \param[in,out] EST +*> \verbatim +*> EST is DOUBLE PRECISION +*> On entry with KASE = 1 or 2 and JUMP = 3, EST should be +*> unchanged from the previous call to ZLACON. +*> On exit, EST is an estimate (a lower bound) for norm(A). +*> \endverbatim +*> +*> \param[in,out] KASE +*> \verbatim +*> KASE is INTEGER +*> On the initial call to ZLACON, KASE should be 0. +*> On an intermediate return, KASE will be 1 or 2, indicating +*> whether X should be overwritten by A * X or A**H * X. +*> On the final return from ZLACON, KASE will again be 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> Originally named CONEST, dated March 16, 1988. \n +*> Last modified: April, 1999 +* +*> \par Contributors: +* ================== +*> +*> Nick Higham, University of Manchester +* +*> \par References: +* ================ +*> +*> N.J. Higham, "FORTRAN codes for estimating the one-norm of +*> a real or complex matrix, with applications to condition estimation", +*> ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988. +*> +* ===================================================================== + SUBROUTINE ZLACON( N, V, X, EST, KASE ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KASE, N + DOUBLE PRECISION EST +* .. +* .. Array Arguments .. + COMPLEX*16 V( N ), X( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ONE, TWO + PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITER, J, JLAST, JUMP + DOUBLE PRECISION ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP +* .. +* .. External Functions .. + INTEGER IZMAX1 + DOUBLE PRECISION DLAMCH, DZSUM1 + EXTERNAL IZMAX1, DLAMCH, DZSUM1 +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG +* .. +* .. Save statement .. + SAVE +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + IF( KASE.EQ.0 ) THEN + DO 10 I = 1, N + X( I ) = DCMPLX( ONE / DBLE( N ) ) + 10 CONTINUE + KASE = 1 + JUMP = 1 + RETURN + END IF +* + GO TO ( 20, 40, 70, 90, 120 )JUMP +* +* ................ ENTRY (JUMP = 1) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY A*X. +* + 20 CONTINUE + IF( N.EQ.1 ) THEN + V( 1 ) = X( 1 ) + EST = ABS( V( 1 ) ) +* ... QUIT + GO TO 130 + END IF + EST = DZSUM1( N, X, 1 ) +* + DO 30 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 30 CONTINUE + KASE = 2 + JUMP = 2 + RETURN +* +* ................ ENTRY (JUMP = 2) +* FIRST ITERATION. X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 40 CONTINUE + J = IZMAX1( N, X, 1 ) + ITER = 2 +* +* MAIN LOOP - ITERATIONS 2,3,...,ITMAX. +* + 50 CONTINUE + DO 60 I = 1, N + X( I ) = CZERO + 60 CONTINUE + X( J ) = CONE + KASE = 1 + JUMP = 3 + RETURN +* +* ................ ENTRY (JUMP = 3) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 70 CONTINUE + CALL ZCOPY( N, X, 1, V, 1 ) + ESTOLD = EST + EST = DZSUM1( N, V, 1 ) +* +* TEST FOR CYCLING. + IF( EST.LE.ESTOLD ) + $ GO TO 100 +* + DO 80 I = 1, N + ABSXI = ABS( X( I ) ) + IF( ABSXI.GT.SAFMIN ) THEN + X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI, + $ DIMAG( X( I ) ) / ABSXI ) + ELSE + X( I ) = CONE + END IF + 80 CONTINUE + KASE = 2 + JUMP = 4 + RETURN +* +* ................ ENTRY (JUMP = 4) +* X HAS BEEN OVERWRITTEN BY CTRANS(A)*X. +* + 90 CONTINUE + JLAST = J + J = IZMAX1( N, X, 1 ) + IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND. + $ ( ITER.LT.ITMAX ) ) THEN + ITER = ITER + 1 + GO TO 50 + END IF +* +* ITERATION COMPLETE. FINAL STAGE. +* + 100 CONTINUE + ALTSGN = ONE + DO 110 I = 1, N + X( I ) = DCMPLX( ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) ) + ALTSGN = -ALTSGN + 110 CONTINUE + KASE = 1 + JUMP = 5 + RETURN +* +* ................ ENTRY (JUMP = 5) +* X HAS BEEN OVERWRITTEN BY A*X. +* + 120 CONTINUE + TEMP = TWO*( DZSUM1( N, X, 1 ) / DBLE( 3*N ) ) + IF( TEMP.GT.EST ) THEN + CALL ZCOPY( N, X, 1, V, 1 ) + EST = TEMP + END IF +* + 130 CONTINUE + KASE = 0 + RETURN +* +* End of ZLACON +* + END diff --git a/dspl/liblapack/SRC/zlacp2.f b/dspl/liblapack/SRC/zlacp2.f new file mode 100644 index 0000000..647e82a --- /dev/null +++ b/dspl/liblapack/SRC/zlacp2.f @@ -0,0 +1,161 @@ +*> \brief \b ZLACP2 copies all or part of a real two-dimensional array to a complex array. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ) +* COMPLEX*16 B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACP2 copies all or part of a real two-dimensional matrix A to a +*> complex matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper trapezium +*> is accessed; if UPLO = 'L', only the lower trapezium is +*> accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACP2( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ) + COMPLEX*16 B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of ZLACP2 +* + END diff --git a/dspl/liblapack/SRC/zlacpy.f b/dspl/liblapack/SRC/zlacpy.f new file mode 100644 index 0000000..81932b8 --- /dev/null +++ b/dspl/liblapack/SRC/zlacpy.f @@ -0,0 +1,159 @@ +*> \brief \b ZLACPY copies all or part of one two-dimensional array to another. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACPY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACPY copies all or part of a two-dimensional matrix A to another +*> matrix B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be copied to B. +*> = 'U': Upper triangular part +*> = 'L': Lower triangular part +*> Otherwise: All of the matrix A +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. If UPLO = 'U', only the upper trapezium +*> is accessed; if UPLO = 'L', only the lower trapezium is +*> accessed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On exit, B = A in the locations specified by UPLO. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDB, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( J, M ) + B( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN + DO 40 J = 1, N + DO 30 I = J, M + B( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE +* + ELSE + DO 60 J = 1, N + DO 50 I = 1, M + B( I, J ) = A( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + RETURN +* +* End of ZLACPY +* + END diff --git a/dspl/liblapack/SRC/zlacrm.f b/dspl/liblapack/SRC/zlacrm.f new file mode 100644 index 0000000..a3f919d --- /dev/null +++ b/dspl/liblapack/SRC/zlacrm.f @@ -0,0 +1,185 @@ +*> \brief \b ZLACRM multiplies a complex matrix by a square real matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACRM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION B( LDB, * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACRM performs a very simple matrix-matrix multiplication: +*> C := A * B, +*> where A is M by N and complex; B is N by N and real; +*> C is M by N and complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A and of the matrix C. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns and rows of the matrix B and +*> the number of columns of the matrix C. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, A contains the M by N matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >=max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, B contains the N by N matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >=max(1,N). +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On exit, C contains the M by N matrix C. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >=max(1,N). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*M*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION B( LDB, * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. External Subroutines .. + EXTERNAL DGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = DBLE( A( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = DIMAG( A( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = DCMPLX( DBLE( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of ZLACRM +* + END diff --git a/dspl/liblapack/SRC/zlacrt.f b/dspl/liblapack/SRC/zlacrt.f new file mode 100644 index 0000000..3e84998 --- /dev/null +++ b/dspl/liblapack/SRC/zlacrt.f @@ -0,0 +1,160 @@ +*> \brief \b ZLACRT performs a linear transformation of a pair of complex vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* COMPLEX*16 C, S +* .. +* .. Array Arguments .. +* COMPLEX*16 CX( * ), CY( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACRT performs the operation +*> +*> ( c s )( x ) ==> ( x ) +*> ( -s c )( y ) ( y ) +*> +*> where c and s are complex and the vectors x and y are complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vectors CX and CY. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX*16 array, dimension (N) +*> On input, the vector x. +*> On output, CX is overwritten with c*x + s*y. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of CX. INCX <> 0. +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX*16 array, dimension (N) +*> On input, the vector y. +*> On output, CY is overwritten with -s*x + c*y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive values of CY. INCY <> 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX*16 +*> C and S define the matrix +*> [ C S ]. +*> [ -S C ] +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACRT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + COMPLEX*16 C, S +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ), CY( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX*16 CTEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + CTEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - S*CX( IX ) + CX( IX ) = CTEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + CTEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - S*CX( I ) + CX( I ) = CTEMP + 30 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/zladiv.f b/dspl/liblapack/SRC/zladiv.f new file mode 100644 index 0000000..0bf6ea8 --- /dev/null +++ b/dspl/liblapack/SRC/zladiv.f @@ -0,0 +1,97 @@ +*> \brief \b ZLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLADIV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* .. Scalar Arguments .. +* COMPLEX*16 X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLADIV := X / Y, where X and Y are complex. The computation of X / Y +*> will not overflow on an intermediary step unless the results +*> overflows. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX*16 +*> The complex scalars X and Y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + COMPLEX*16 X, Y +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* + CALL DLADIV( DBLE( X ), DIMAG( X ), DBLE( Y ), DIMAG( Y ), ZR, + $ ZI ) + ZLADIV = DCMPLX( ZR, ZI ) +* + RETURN +* +* End of ZLADIV +* + END diff --git a/dspl/liblapack/SRC/zlaed0.f b/dspl/liblapack/SRC/zlaed0.f new file mode 100644 index 0000000..9267245 --- /dev/null +++ b/dspl/liblapack/SRC/zlaed0.f @@ -0,0 +1,371 @@ +*> \brief \b ZLAED0 used by sstedc. Computes all eigenvalues and corresponding eigenvectors of an unreduced symmetric tridiagonal matrix using the divide and conquer method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAED0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) +* COMPLEX*16 Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Using the divide and conquer method, ZLAED0 computes all eigenvalues +*> of a symmetric tridiagonal matrix which is one diagonal block of +*> those from reducing a dense or band Hermitian matrix and +*> corresponding eigenvectors of the dense or band matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the off-diagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, Q must contain an QSIZ x N matrix whose columns +*> unitarily orthonormal. It is a part of the unitary matrix +*> that reduces the full dense Hermitian matrix to a +*> (reducible) symmetric tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, +*> the dimension of IWORK must be at least +*> 6 + 6*N + 5*N*lg N +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (1 + 3*N + 2*N*lg N + 3*N**2) +*> ( lg( N ) = smallest integer k +*> such that 2^k >= N ) +*> \endverbatim +*> +*> \param[out] QSTORE +*> \verbatim +*> QSTORE is COMPLEX*16 array, dimension (LDQS, N) +*> Used to store parts of +*> the eigenvector matrix when the updating matrix multiplies +*> take place. +*> \endverbatim +*> +*> \param[in] LDQS +*> \verbatim +*> LDQS is INTEGER +*> The leading dimension of the array QSTORE. +*> LDQS >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDQ, LDQS, N, QSIZ +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 Q( LDQ, * ), QSTORE( LDQS, * ) +* .. +* +* ===================================================================== +* +* Warning: N could be as big as QSIZ! +* +* .. Parameters .. + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.D+0 ) +* .. +* .. Local Scalars .. + INTEGER CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM, + $ IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM, + $ J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1, + $ SPM1, SPM2, SUBMAT, SUBPBS, TLVLS + DOUBLE PRECISION TEMP +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DSTEQR, XERBLA, ZCOPY, ZLACRM, ZLAED7 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN +* INFO = -1 +* ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) ) +* $ THEN + IF( QSIZ.LT.MAX( 0, N ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED0', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + SMLSIZ = ILAENV( 9, 'ZLAED0', ' ', 0, 0, 0, 0 ) +* +* Determine the size and placement of the submatrices, and save in +* the leading elements of IWORK. +* + IWORK( 1 ) = N + SUBPBS = 1 + TLVLS = 0 + 10 CONTINUE + IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN + DO 20 J = SUBPBS, 1, -1 + IWORK( 2*J ) = ( IWORK( J )+1 ) / 2 + IWORK( 2*J-1 ) = IWORK( J ) / 2 + 20 CONTINUE + TLVLS = TLVLS + 1 + SUBPBS = 2*SUBPBS + GO TO 10 + END IF + DO 30 J = 2, SUBPBS + IWORK( J ) = IWORK( J ) + IWORK( J-1 ) + 30 CONTINUE +* +* Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1 +* using rank-1 modifications (cuts). +* + SPM1 = SUBPBS - 1 + DO 40 I = 1, SPM1 + SUBMAT = IWORK( I ) + 1 + SMM1 = SUBMAT - 1 + D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) ) + D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) ) + 40 CONTINUE +* + INDXQ = 4*N + 3 +* +* Set up workspaces for eigenvalues only/accumulate new vectors +* routine +* + TEMP = LOG( DBLE( N ) ) / LOG( TWO ) + LGN = INT( TEMP ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IPRMPT = INDXQ + N + 1 + IPERM = IPRMPT + N*LGN + IQPTR = IPERM + N*LGN + IGIVPT = IQPTR + N + 2 + IGIVCL = IGIVPT + N*LGN +* + IGIVNM = 1 + IQ = IGIVNM + 2*N*LGN + IWREM = IQ + N**2 + 1 +* Initialize pointers + DO 50 I = 0, SUBPBS + IWORK( IPRMPT+I ) = 1 + IWORK( IGIVPT+I ) = 1 + 50 CONTINUE + IWORK( IQPTR ) = 1 +* +* Solve each submatrix eigenproblem at the bottom of the divide and +* conquer tree. +* + CURR = 0 + DO 70 I = 0, SPM1 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 1 ) + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+1 ) - IWORK( I ) + END IF + LL = IQ - 1 + IWORK( IQPTR+CURR ) + CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ), + $ RWORK( LL ), MATSIZ, RWORK, INFO ) + CALL ZLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ), + $ MATSIZ, QSTORE( 1, SUBMAT ), LDQS, + $ RWORK( IWREM ) ) + IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2 + CURR = CURR + 1 + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + K = 1 + DO 60 J = SUBMAT, IWORK( I+1 ) + IWORK( INDXQ+J ) = K + K = K + 1 + 60 CONTINUE + 70 CONTINUE +* +* Successively merge eigensystems of adjacent submatrices +* into eigensystem for the corresponding larger matrix. +* +* while ( SUBPBS > 1 ) +* + CURLVL = 1 + 80 CONTINUE + IF( SUBPBS.GT.1 ) THEN + SPM2 = SUBPBS - 2 + DO 90 I = 0, SPM2, 2 + IF( I.EQ.0 ) THEN + SUBMAT = 1 + MATSIZ = IWORK( 2 ) + MSD2 = IWORK( 1 ) + CURPRB = 0 + ELSE + SUBMAT = IWORK( I ) + 1 + MATSIZ = IWORK( I+2 ) - IWORK( I ) + MSD2 = MATSIZ / 2 + CURPRB = CURPRB + 1 + END IF +* +* Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2) +* into an eigensystem of size MATSIZ. ZLAED7 handles the case +* when the eigenvectors of a full or band Hermitian matrix (which +* was reduced to tridiagonal form) are desired. +* +* I am free to use Q as a valuable working space until Loop 150. +* + CALL ZLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB, + $ D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS, + $ E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ), + $ RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ), + $ IWORK( IPERM ), IWORK( IGIVPT ), + $ IWORK( IGIVCL ), RWORK( IGIVNM ), + $ Q( 1, SUBMAT ), RWORK( IWREM ), + $ IWORK( SUBPBS+1 ), INFO ) + IF( INFO.GT.0 ) THEN + INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1 + RETURN + END IF + IWORK( I / 2+1 ) = IWORK( I+2 ) + 90 CONTINUE + SUBPBS = SUBPBS / 2 + CURLVL = CURLVL + 1 + GO TO 80 + END IF +* +* end while +* +* Re-merge the eigenvalues/vectors which were deflated at the final +* merge step. +* + DO 100 I = 1, N + J = IWORK( INDXQ+I ) + RWORK( I ) = D( J ) + CALL ZCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 ) + 100 CONTINUE + CALL DCOPY( N, RWORK, 1, D, 1 ) +* + RETURN +* +* End of ZLAED0 +* + END diff --git a/dspl/liblapack/SRC/zlaed7.f b/dspl/liblapack/SRC/zlaed7.f new file mode 100644 index 0000000..90416af --- /dev/null +++ b/dspl/liblapack/SRC/zlaed7.f @@ -0,0 +1,385 @@ +*> \brief \b ZLAED7 used by sstedc. Computes the updated eigensystem of a diagonal matrix after modification by a rank-one symmetric matrix. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAED7 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, +* LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, +* GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, +* $ TLVLS +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), +* $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) +* DOUBLE PRECISION D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) +* COMPLEX*16 Q( LDQ, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAED7 computes the updated eigensystem of a diagonal +*> matrix after modification by a rank-one symmetric matrix. This +*> routine is used only for the eigenproblem which requires all +*> eigenvalues and optionally eigenvectors of a dense or banded +*> Hermitian matrix that has been reduced to tridiagonal form. +*> +*> T = Q(in) ( D(in) + RHO * Z*Z**H ) Q**H(in) = Q(out) * D(out) * Q**H(out) +*> +*> where Z = Q**Hu, u is a vector of length N with ones in the +*> CUTPNT and CUTPNT + 1 th elements and zeros elsewhere. +*> +*> The eigenvectors of the original matrix are stored in Q, and the +*> eigenvalues are in D. The algorithm consists of three stages: +*> +*> The first stage consists of deflating the size of the problem +*> when there are multiple eigenvalues or if there is a zero in +*> the Z vector. For each such occurrence the dimension of the +*> secular equation problem is reduced by one. This stage is +*> performed by the routine DLAED2. +*> +*> The second stage consists of calculating the updated +*> eigenvalues. This is done by finding the roots of the secular +*> equation via the routine DLAED4 (as called by SLAED3). +*> This routine also calculates the eigenvectors of the current +*> problem. +*> +*> The final stage consists of computing the updated eigenvectors +*> directly using the updated eigenvalues. The eigenvectors for +*> the current problem are multiplied with the eigenvectors from +*> the overall problem. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. min(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the full matrix to tridiagonal form. QSIZ >= N. +*> \endverbatim +*> +*> \param[in] TLVLS +*> \verbatim +*> TLVLS is INTEGER +*> The total number of merging levels in the overall divide and +*> conquer tree. +*> \endverbatim +*> +*> \param[in] CURLVL +*> \verbatim +*> CURLVL is INTEGER +*> The current level in the overall merge routine, +*> 0 <= curlvl <= tlvls. +*> \endverbatim +*> +*> \param[in] CURPBM +*> \verbatim +*> CURPBM is INTEGER +*> The current problem in the current level in the overall +*> merge routine (counting from upper left to lower right). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the eigenvalues of the rank-1-perturbed matrix. +*> On exit, the eigenvalues of the repaired matrix. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, the eigenvectors of the rank-1-perturbed matrix. +*> On exit, the eigenvectors of the repaired tridiagonal matrix. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> Contains the subdiagonal element used to create the rank-1 +*> modification. +*> \endverbatim +*> +*> \param[out] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> This contains the permutation which will reintegrate the +*> subproblem just solved back into sorted order, +*> ie. D( INDXQ( I = 1, N ) ) will be in ascending order. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, +*> dimension (3*N+2*QSIZ*N) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (QSIZ*N) +*> \endverbatim +*> +*> \param[in,out] QSTORE +*> \verbatim +*> QSTORE is DOUBLE PRECISION array, dimension (N**2+1) +*> Stores eigenvectors of submatrices encountered during +*> divide and conquer, packed together. QPTR points to +*> beginning of the submatrices. +*> \endverbatim +*> +*> \param[in,out] QPTR +*> \verbatim +*> QPTR is INTEGER array, dimension (N+2) +*> List of indices pointing to beginning of submatrices stored +*> in QSTORE. The submatrices are numbered starting at the +*> bottom left of the divide and conquer tree, from left to +*> right and bottom to top. +*> \endverbatim +*> +*> \param[in] PRMPTR +*> \verbatim +*> PRMPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in PERM a +*> level's permutation is stored. PRMPTR(i+1) - PRMPTR(i) +*> indicates the size of the permutation and also the size of +*> the full, non-deflated problem. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N lg N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension (N lg N) +*> Contains a list of pointers which indicate where in GIVCOL a +*> level's Givens rotations are stored. GIVPTR(i+1) - GIVPTR(i) +*> indicates the number of Givens rotations. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N lg N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N lg N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = 1, an eigenvalue did not converge +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q, + $ LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM, + $ GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ, + $ TLVLS + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ), + $ IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * ) + DOUBLE PRECISION D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * ) + COMPLEX*16 Q( LDQ, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER COLTYP, CURR, I, IDLMDA, INDX, + $ INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR +* .. +* .. External Subroutines .. + EXTERNAL DLAED9, DLAEDA, DLAMRG, XERBLA, ZLACRM, ZLAED8 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* +* IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN +* INFO = -1 +* ELSE IF( N.LT.0 ) THEN + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED7', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* The following values are for bookkeeping purposes only. They are +* integer pointers which indicate the portion of the workspace +* used by a particular array in DLAED2 and SLAED3. +* + IZ = 1 + IDLMDA = IZ + N + IW = IDLMDA + N + IQ = IW + N +* + INDX = 1 + INDXC = INDX + N + COLTYP = INDXC + N + INDXP = COLTYP + N +* +* Form the z-vector which consists of the last row of Q_1 and the +* first row of Q_2. +* + PTR = 1 + 2**TLVLS + DO 10 I = 1, CURLVL - 1 + PTR = PTR + 2**( TLVLS-I ) + 10 CONTINUE + CURR = PTR + CURPBM + CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR, + $ GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ), + $ RWORK( IZ+N ), INFO ) +* +* When solving the final problem, we no longer need the stored data, +* so we will overwrite the data from this level onto the previously +* used storage space. +* + IF( CURLVL.EQ.TLVLS ) THEN + QPTR( CURR ) = 1 + PRMPTR( CURR ) = 1 + GIVPTR( CURR ) = 1 + END IF +* +* Sort and Deflate eigenvalues. +* + CALL ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ), + $ RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ), + $ IWORK( INDXP ), IWORK( INDX ), INDXQ, + $ PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ), + $ GIVCOL( 1, GIVPTR( CURR ) ), + $ GIVNUM( 1, GIVPTR( CURR ) ), INFO ) + PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N + GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR ) +* +* Solve Secular Equation. +* + IF( K.NE.0 ) THEN + CALL DLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO, + $ RWORK( IDLMDA ), RWORK( IW ), + $ QSTORE( QPTR( CURR ) ), K, INFO ) + CALL ZLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q, + $ LDQ, RWORK( IQ ) ) + QPTR( CURR+1 ) = QPTR( CURR ) + K**2 + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* Prepare the INDXQ sorting premutation. +* + N1 = K + N2 = N - K + CALL DLAMRG( N1, N2, D, 1, -1, INDXQ ) + ELSE + QPTR( CURR+1 ) = QPTR( CURR ) + DO 20 I = 1, N + INDXQ( I ) = I + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLAED7 +* + END diff --git a/dspl/liblapack/SRC/zlaed8.f b/dspl/liblapack/SRC/zlaed8.f new file mode 100644 index 0000000..4805727 --- /dev/null +++ b/dspl/liblapack/SRC/zlaed8.f @@ -0,0 +1,486 @@ +*> \brief \b ZLAED8 used by sstedc. Merges eigenvalues and deflates secular equation. Used when the original matrix is dense. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAED8 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, +* Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, +* GIVCOL, GIVNUM, INFO ) +* +* .. Scalar Arguments .. +* INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ +* DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), +* $ INDXQ( * ), PERM( * ) +* DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), +* $ Z( * ) +* COMPLEX*16 Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAED8 merges the two sets of eigenvalues together into a single +*> sorted set. Then it tries to deflate the size of the problem. +*> There are two ways in which deflation can occur: when two or more +*> eigenvalues are close together or if there is a tiny element in the +*> Z vector. For each such occurrence the order of the related secular +*> equation problem is reduced by one. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[out] K +*> \verbatim +*> K is INTEGER +*> Contains the number of non-deflated eigenvalues. +*> This is the order of the related secular equation. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] QSIZ +*> \verbatim +*> QSIZ is INTEGER +*> The dimension of the unitary matrix used to reduce +*> the dense or band matrix to tridiagonal form. +*> QSIZ >= N if ICOMPQ = 1. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, Q contains the eigenvectors of the partially solved +*> system which has been previously updated in matrix +*> multiplies with other partially solved eigensystems. +*> On exit, Q contains the trailing (N-K) updated eigenvectors +*> (those which were deflated) in its last N-K columns. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max( 1, N ). +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, D contains the eigenvalues of the two submatrices to +*> be combined. On exit, D contains the trailing (N-K) updated +*> eigenvalues (those which were deflated) sorted into increasing +*> order. +*> \endverbatim +*> +*> \param[in,out] RHO +*> \verbatim +*> RHO is DOUBLE PRECISION +*> Contains the off diagonal element associated with the rank-1 +*> cut which originally split the two submatrices which are now +*> being recombined. RHO is modified during the computation to +*> the value required by DLAED3. +*> \endverbatim +*> +*> \param[in] CUTPNT +*> \verbatim +*> CUTPNT is INTEGER +*> Contains the location of the last eigenvalue in the leading +*> sub-matrix. MIN(1,N) <= CUTPNT <= N. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension (N) +*> On input this vector contains the updating vector (the last +*> row of the first sub-eigenvector matrix and the first row of +*> the second sub-eigenvector matrix). The contents of Z are +*> destroyed during the updating process. +*> \endverbatim +*> +*> \param[out] DLAMDA +*> \verbatim +*> DLAMDA is DOUBLE PRECISION array, dimension (N) +*> Contains a copy of the first K eigenvalues which will be used +*> by DLAED3 to form the secular equation. +*> \endverbatim +*> +*> \param[out] Q2 +*> \verbatim +*> Q2 is COMPLEX*16 array, dimension (LDQ2,N) +*> If ICOMPQ = 0, Q2 is not referenced. Otherwise, +*> Contains a copy of the first K eigenvectors which will be used +*> by DLAED7 in a matrix multiply (DGEMM) to update the new +*> eigenvectors. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of the array Q2. LDQ2 >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> This will hold the first k values of the final +*> deflation-altered z-vector and will be passed to DLAED3. +*> \endverbatim +*> +*> \param[out] INDXP +*> \verbatim +*> INDXP is INTEGER array, dimension (N) +*> This will contain the permutation used to place deflated +*> values of D at the end of the array. On output INDXP(1:K) +*> points to the nondeflated D-values and INDXP(K+1:N) +*> points to the deflated eigenvalues. +*> \endverbatim +*> +*> \param[out] INDX +*> \verbatim +*> INDX is INTEGER array, dimension (N) +*> This will contain the permutation used to sort the contents of +*> D into ascending order. +*> \endverbatim +*> +*> \param[in] INDXQ +*> \verbatim +*> INDXQ is INTEGER array, dimension (N) +*> This contains the permutation which separately sorts the two +*> sub-problems in D into ascending order. Note that elements in +*> the second half of this permutation must first have CUTPNT +*> added to their values in order to be accurate. +*> \endverbatim +*> +*> \param[out] PERM +*> \verbatim +*> PERM is INTEGER array, dimension (N) +*> Contains the permutations (from deflation and sorting) to be +*> applied to each eigenblock. +*> \endverbatim +*> +*> \param[out] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> Contains the number of Givens rotations which took place in +*> this subproblem. +*> \endverbatim +*> +*> \param[out] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension (2, N) +*> Each pair of numbers indicates a pair of columns to take place +*> in a Givens rotation. +*> \endverbatim +*> +*> \param[out] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension (2, N) +*> Each number indicates the S value to be used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA, + $ Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR, + $ GIVCOL, GIVNUM, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ + DOUBLE PRECISION RHO +* .. +* .. Array Arguments .. + INTEGER GIVCOL( 2, * ), INDX( * ), INDXP( * ), + $ INDXQ( * ), PERM( * ) + DOUBLE PRECISION D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ), + $ Z( * ) + COMPLEX*16 Q( LDQ, * ), Q2( LDQ2, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION MONE, ZERO, ONE, TWO, EIGHT + PARAMETER ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, EIGHT = 8.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2 + DOUBLE PRECISION C, EPS, S, T, TAU, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL IDAMAX, DLAMCH, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAMRG, DSCAL, XERBLA, ZCOPY, ZDROT, + $ ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( QSIZ.LT.N ) THEN + INFO = -3 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN + INFO = -8 + ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAED8', -INFO ) + RETURN + END IF +* +* Need to initialize GIVPTR to O here in case of quick exit +* to prevent an unspecified code behavior (usually sigfault) +* when IWORK array on entry to *stedc is not zeroed +* (or at least some IWORK entries which used in *laed7 for GIVPTR). +* + GIVPTR = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + N1 = CUTPNT + N2 = N - N1 + N1P1 = N1 + 1 +* + IF( RHO.LT.ZERO ) THEN + CALL DSCAL( N2, MONE, Z( N1P1 ), 1 ) + END IF +* +* Normalize z so that norm(z) = 1 +* + T = ONE / SQRT( TWO ) + DO 10 J = 1, N + INDX( J ) = J + 10 CONTINUE + CALL DSCAL( N, T, Z, 1 ) + RHO = ABS( TWO*RHO ) +* +* Sort the eigenvalues into increasing order +* + DO 20 I = CUTPNT + 1, N + INDXQ( I ) = INDXQ( I ) + CUTPNT + 20 CONTINUE + DO 30 I = 1, N + DLAMDA( I ) = D( INDXQ( I ) ) + W( I ) = Z( INDXQ( I ) ) + 30 CONTINUE + I = 1 + J = CUTPNT + 1 + CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX ) + DO 40 I = 1, N + D( I ) = DLAMDA( INDX( I ) ) + Z( I ) = W( INDX( I ) ) + 40 CONTINUE +* +* Calculate the allowable deflation tolerance +* + IMAX = IDAMAX( N, Z, 1 ) + JMAX = IDAMAX( N, D, 1 ) + EPS = DLAMCH( 'Epsilon' ) + TOL = EIGHT*EPS*ABS( D( JMAX ) ) +* +* If the rank-1 modifier is small enough, no more needs to be done +* -- except to reorganize Q so that its columns correspond with the +* elements in D. +* + IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN + K = 0 + DO 50 J = 1, N + PERM( J ) = INDXQ( INDX( J ) ) + CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 50 CONTINUE + CALL ZLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ ) + RETURN + END IF +* +* If there are multiple eigenvalues then the problem deflates. Here +* the number of equal eigenvalues are found. As each equal +* eigenvalue is found, an elementary reflector is computed to rotate +* the corresponding eigensubspace so that the corresponding +* components of Z are zero in this new basis. +* + K = 0 + K2 = N + 1 + DO 60 J = 1, N + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + IF( J.EQ.N ) + $ GO TO 100 + ELSE + JLAM = J + GO TO 70 + END IF + 60 CONTINUE + 70 CONTINUE + J = J + 1 + IF( J.GT.N ) + $ GO TO 90 + IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN +* +* Deflate due to small z component. +* + K2 = K2 - 1 + INDXP( K2 ) = J + ELSE +* +* Check if eigenvalues are close enough to allow deflation. +* + S = Z( JLAM ) + C = Z( J ) +* +* Find sqrt(a**2+b**2) without overflow or +* destructive underflow. +* + TAU = DLAPY2( C, S ) + T = D( J ) - D( JLAM ) + C = C / TAU + S = -S / TAU + IF( ABS( T*C*S ).LE.TOL ) THEN +* +* Deflation is possible. +* + Z( J ) = TAU + Z( JLAM ) = ZERO +* +* Record the appropriate Givens rotation +* + GIVPTR = GIVPTR + 1 + GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) ) + GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) ) + GIVNUM( 1, GIVPTR ) = C + GIVNUM( 2, GIVPTR ) = S + CALL ZDROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1, + $ Q( 1, INDXQ( INDX( J ) ) ), 1, C, S ) + T = D( JLAM )*C*C + D( J )*S*S + D( J ) = D( JLAM )*S*S + D( J )*C*C + D( JLAM ) = T + K2 = K2 - 1 + I = 1 + 80 CONTINUE + IF( K2+I.LE.N ) THEN + IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN + INDXP( K2+I-1 ) = INDXP( K2+I ) + INDXP( K2+I ) = JLAM + I = I + 1 + GO TO 80 + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + ELSE + INDXP( K2+I-1 ) = JLAM + END IF + JLAM = J + ELSE + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM + JLAM = J + END IF + END IF + GO TO 70 + 90 CONTINUE +* +* Record the last eigenvalue. +* + K = K + 1 + W( K ) = Z( JLAM ) + DLAMDA( K ) = D( JLAM ) + INDXP( K ) = JLAM +* + 100 CONTINUE +* +* Sort the eigenvalues and corresponding eigenvectors into DLAMDA +* and Q2 respectively. The eigenvalues/vectors which were not +* deflated go into the first K slots of DLAMDA and Q2 respectively, +* while those which were deflated go into the last N - K slots. +* + DO 110 J = 1, N + JP = INDXP( J ) + DLAMDA( J ) = D( JP ) + PERM( J ) = INDXQ( INDX( JP ) ) + CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 ) + 110 CONTINUE +* +* The deflated eigenvalues and their corresponding vectors go back +* into the last N - K slots of D and Q respectively. +* + IF( K.LT.N ) THEN + CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 ) + CALL ZLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ), + $ LDQ ) + END IF +* + RETURN +* +* End of ZLAED8 +* + END diff --git a/dspl/liblapack/SRC/zlaein.f b/dspl/liblapack/SRC/zlaein.f new file mode 100644 index 0000000..0376e60 --- /dev/null +++ b/dspl/liblapack/SRC/zlaein.f @@ -0,0 +1,354 @@ +*> \brief \b ZLAEIN computes a specified right or left eigenvector of an upper Hessenberg matrix by inverse iteration. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, +* EPS3, SMLNUM, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL NOINIT, RIGHTV +* INTEGER INFO, LDB, LDH, N +* DOUBLE PRECISION EPS3, SMLNUM +* COMPLEX*16 W +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAEIN uses inverse iteration to find a right or left eigenvector +*> corresponding to the eigenvalue W of a complex upper Hessenberg +*> matrix H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] RIGHTV +*> \verbatim +*> RIGHTV is LOGICAL +*> = .TRUE. : compute right eigenvector; +*> = .FALSE.: compute left eigenvector. +*> \endverbatim +*> +*> \param[in] NOINIT +*> \verbatim +*> NOINIT is LOGICAL +*> = .TRUE. : no initial vector supplied in V +*> = .FALSE.: initial vector supplied in V. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> The upper Hessenberg matrix H. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX*16 +*> The eigenvalue of H whose corresponding right or left +*> eigenvector is to be computed. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (N) +*> On entry, if NOINIT = .FALSE., V must contain a starting +*> vector for inverse iteration; otherwise V need not be set. +*> On exit, V contains the computed eigenvector, normalized so +*> that the component of largest magnitude has magnitude 1; here +*> the magnitude of a complex number (x,y) is taken to be +*> |x| + |y|. +*> \endverbatim +*> +*> \param[out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[in] EPS3 +*> \verbatim +*> EPS3 is DOUBLE PRECISION +*> A small machine-dependent value which is used to perturb +*> close eigenvalues, and to replace zero pivots. +*> \endverbatim +*> +*> \param[in] SMLNUM +*> \verbatim +*> SMLNUM is DOUBLE PRECISION +*> A machine-dependent value close to the underflow threshold. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> = 1: inverse iteration did not converge; V is set to the +*> last iterate. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK, + $ EPS3, SMLNUM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL NOINIT, RIGHTV + INTEGER INFO, LDB, LDH, N + DOUBLE PRECISION EPS3, SMLNUM + COMPLEX*16 W +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 B( LDB, * ), H( LDH, * ), V( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, TENTH + PARAMETER ( ONE = 1.0D+0, TENTH = 1.0D-1 ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER NORMIN, TRANS + INTEGER I, IERR, ITS, J + DOUBLE PRECISION GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM + COMPLEX*16 CDUM, EI, EJ, TEMP, X +* .. +* .. External Functions .. + INTEGER IZAMAX + DOUBLE PRECISION DZASUM, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL IZAMAX, DZASUM, DZNRM2, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* GROWTO is the threshold used in the acceptance test for an +* eigenvector. +* + ROOTN = SQRT( DBLE( N ) ) + GROWTO = TENTH / ROOTN + NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM +* +* Form B = H - W*I (except that the subdiagonal elements are not +* stored). +* + DO 20 J = 1, N + DO 10 I = 1, J - 1 + B( I, J ) = H( I, J ) + 10 CONTINUE + B( J, J ) = H( J, J ) - W + 20 CONTINUE +* + IF( NOINIT ) THEN +* +* Initialize V. +* + DO 30 I = 1, N + V( I ) = EPS3 + 30 CONTINUE + ELSE +* +* Scale supplied initial vector. +* + VNORM = DZNRM2( N, V, 1 ) + CALL ZDSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 ) + END IF +* + IF( RIGHTV ) THEN +* +* LU decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 60 I = 1, N - 1 + EI = H( I+1, I ) + IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN +* +* Interchange rows and eliminate. +* + X = ZLADIV( B( I, I ), EI ) + B( I, I ) = EI + DO 40 J = I + 1, N + TEMP = B( I+1, J ) + B( I+1, J ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 40 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( I, I ).EQ.ZERO ) + $ B( I, I ) = EPS3 + X = ZLADIV( EI, B( I, I ) ) + IF( X.NE.ZERO ) THEN + DO 50 J = I + 1, N + B( I+1, J ) = B( I+1, J ) - X*B( I, J ) + 50 CONTINUE + END IF + END IF + 60 CONTINUE + IF( B( N, N ).EQ.ZERO ) + $ B( N, N ) = EPS3 +* + TRANS = 'N' +* + ELSE +* +* UL decomposition with partial pivoting of B, replacing zero +* pivots by EPS3. +* + DO 90 J = N, 2, -1 + EJ = H( J, J-1 ) + IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN +* +* Interchange columns and eliminate. +* + X = ZLADIV( B( J, J ), EJ ) + B( J, J ) = EJ + DO 70 I = 1, J - 1 + TEMP = B( I, J-1 ) + B( I, J-1 ) = B( I, J ) - X*TEMP + B( I, J ) = TEMP + 70 CONTINUE + ELSE +* +* Eliminate without interchange. +* + IF( B( J, J ).EQ.ZERO ) + $ B( J, J ) = EPS3 + X = ZLADIV( EJ, B( J, J ) ) + IF( X.NE.ZERO ) THEN + DO 80 I = 1, J - 1 + B( I, J-1 ) = B( I, J-1 ) - X*B( I, J ) + 80 CONTINUE + END IF + END IF + 90 CONTINUE + IF( B( 1, 1 ).EQ.ZERO ) + $ B( 1, 1 ) = EPS3 +* + TRANS = 'C' +* + END IF +* + NORMIN = 'N' + DO 110 ITS = 1, N +* +* Solve U*x = scale*v for a right eigenvector +* or U**H *x = scale*v for a left eigenvector, +* overwriting x on v. +* + CALL ZLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V, + $ SCALE, RWORK, IERR ) + NORMIN = 'Y' +* +* Test for sufficient growth in the norm of v. +* + VNORM = DZASUM( N, V, 1 ) + IF( VNORM.GE.GROWTO*SCALE ) + $ GO TO 120 +* +* Choose new orthogonal starting vector and try again. +* + RTEMP = EPS3 / ( ROOTN+ONE ) + V( 1 ) = EPS3 + DO 100 I = 2, N + V( I ) = RTEMP + 100 CONTINUE + V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN + 110 CONTINUE +* +* Failure to find eigenvector in N iterations. +* + INFO = 1 +* + 120 CONTINUE +* +* Normalize eigenvector. +* + I = IZAMAX( N, V, 1 ) + CALL ZDSCAL( N, ONE / CABS1( V( I ) ), V, 1 ) +* + RETURN +* +* End of ZLAEIN +* + END diff --git a/dspl/liblapack/SRC/zlaesy.f b/dspl/liblapack/SRC/zlaesy.f new file mode 100644 index 0000000..2ff9b35 --- /dev/null +++ b/dspl/liblapack/SRC/zlaesy.f @@ -0,0 +1,221 @@ +*> \brief \b ZLAESY computes the eigenvalues and eigenvectors of a 2-by-2 complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAESY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* .. Scalar Arguments .. +* COMPLEX*16 A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix +*> ( ( A, B );( B, C ) ) +*> provided the norm of the matrix of eigenvectors is larger than +*> some threshold value. +*> +*> RT1 is the eigenvalue of larger absolute value, and RT2 of +*> smaller absolute value. If the eigenvectors are computed, then +*> on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence +*> +*> [ CS1 SN1 ] . [ A B ] . [ CS1 -SN1 ] = [ RT1 0 ] +*> [ -SN1 CS1 ] [ B C ] [ SN1 CS1 ] [ 0 RT2 ] +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 +*> The ( 1, 1 ) element of input matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 +*> The ( 1, 2 ) element of input matrix. The ( 2, 1 ) element +*> is also given by B, since the 2-by-2 matrix is symmetric. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX*16 +*> The ( 2, 2 ) element of input matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is COMPLEX*16 +*> The eigenvalue of larger modulus. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is COMPLEX*16 +*> The eigenvalue of smaller modulus. +*> \endverbatim +*> +*> \param[out] EVSCAL +*> \verbatim +*> EVSCAL is COMPLEX*16 +*> The complex value by which the eigenvector matrix was scaled +*> to make it orthonormal. If EVSCAL is zero, the eigenvectors +*> were not computed. This means one of two things: the 2-by-2 +*> matrix could not be diagonalized, or the norm of the matrix +*> of eigenvectors before scaling was larger than the threshold +*> value THRESH (set below). +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is COMPLEX*16 +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is COMPLEX*16 +*> If EVSCAL .NE. 0, ( CS1, SN1 ) is the unit right eigenvector +*> for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + SUBROUTINE ZLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + COMPLEX*16 A, B, C, CS1, EVSCAL, RT1, RT2, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) + DOUBLE PRECISION HALF + PARAMETER ( HALF = 0.5D0 ) + DOUBLE PRECISION THRESH + PARAMETER ( THRESH = 0.1D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION BABS, EVNORM, TABS, Z + COMPLEX*16 S, T, TMP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* +* +* Special case: The matrix is actually diagonal. +* To avoid divide by zero later, we treat this case separately. +* + IF( ABS( B ).EQ.ZERO ) THEN + RT1 = A + RT2 = C + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + CS1 = ZERO + SN1 = ONE + ELSE + CS1 = ONE + SN1 = ZERO + END IF + ELSE +* +* Compute the eigenvalues and eigenvectors. +* The characteristic equation is +* lambda **2 - (A+C) lambda + (A*C - B*B) +* and we solve it using the quadratic formula. +* + S = ( A+C )*HALF + T = ( A-C )*HALF +* +* Take the square root carefully to avoid over/under flow. +* + BABS = ABS( B ) + TABS = ABS( T ) + Z = MAX( BABS, TABS ) + IF( Z.GT.ZERO ) + $ T = Z*SQRT( ( T / Z )**2+( B / Z )**2 ) +* +* Compute the two eigenvalues. RT1 and RT2 are exchanged +* if necessary so that RT1 will have the greater magnitude. +* + RT1 = S + T + RT2 = S - T + IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN + TMP = RT1 + RT1 = RT2 + RT2 = TMP + END IF +* +* Choose CS1 = 1 and SN1 to satisfy the first equation, then +* scale the components of this eigenvector so that the matrix +* of eigenvectors X satisfies X * X**T = I . (No scaling is +* done if the norm of the eigenvalue matrix is less than THRESH.) +* + SN1 = ( RT1-A ) / B + TABS = ABS( SN1 ) + IF( TABS.GT.ONE ) THEN + T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 ) + ELSE + T = SQRT( CONE+SN1*SN1 ) + END IF + EVNORM = ABS( T ) + IF( EVNORM.GE.THRESH ) THEN + EVSCAL = CONE / T + CS1 = EVSCAL + SN1 = SN1*EVSCAL + ELSE + EVSCAL = ZERO + END IF + END IF + RETURN +* +* End of ZLAESY +* + END diff --git a/dspl/liblapack/SRC/zlaev2.f b/dspl/liblapack/SRC/zlaev2.f new file mode 100644 index 0000000..d93dd60 --- /dev/null +++ b/dspl/liblapack/SRC/zlaev2.f @@ -0,0 +1,165 @@ +*> \brief \b ZLAEV2 computes the eigenvalues and eigenvectors of a 2-by-2 symmetric/Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAEV2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS1, RT1, RT2 +* COMPLEX*16 A, B, C, SN1 +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix +*> [ A B ] +*> [ CONJG(B) C ]. +*> On return, RT1 is the eigenvalue of larger absolute value, RT2 is the +*> eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right +*> eigenvector for RT1, giving the decomposition +*> +*> [ CS1 CONJG(SN1) ] [ A B ] [ CS1 -CONJG(SN1) ] = [ RT1 0 ] +*> [-SN1 CS1 ] [ CONJG(B) C ] [ SN1 CS1 ] [ 0 RT2 ]. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 +*> The (1,1) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 +*> The (1,2) element and the conjugate of the (2,1) element of +*> the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is COMPLEX*16 +*> The (2,2) element of the 2-by-2 matrix. +*> \endverbatim +*> +*> \param[out] RT1 +*> \verbatim +*> RT1 is DOUBLE PRECISION +*> The eigenvalue of larger absolute value. +*> \endverbatim +*> +*> \param[out] RT2 +*> \verbatim +*> RT2 is DOUBLE PRECISION +*> The eigenvalue of smaller absolute value. +*> \endverbatim +*> +*> \param[out] CS1 +*> \verbatim +*> CS1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SN1 +*> \verbatim +*> SN1 is COMPLEX*16 +*> The vector (CS1, SN1) is a unit right eigenvector for RT1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> RT1 is accurate to a few ulps barring over/underflow. +*> +*> RT2 may be inaccurate if there is massive cancellation in the +*> determinant A*C-B*B; higher precision or correctly rounded or +*> correctly truncated arithmetic would be needed to compute RT2 +*> accurately in all cases. +*> +*> CS1 and SN1 are accurate to a few ulps barring over/underflow. +*> +*> Overflow is possible only if RT1 is within a factor of 5 of overflow. +*> Underflow is harmless if the input data is 0 or exceeds +*> underflow_threshold / macheps. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLAEV2( A, B, C, RT1, RT2, CS1, SN1 ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS1, RT1, RT2 + COMPLEX*16 A, B, C, SN1 +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION T + COMPLEX*16 W +* .. +* .. External Subroutines .. + EXTERNAL DLAEV2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG +* .. +* .. Executable Statements .. +* + IF( ABS( B ).EQ.ZERO ) THEN + W = ONE + ELSE + W = DCONJG( B ) / ABS( B ) + END IF + CALL DLAEV2( DBLE( A ), ABS( B ), DBLE( C ), RT1, RT2, CS1, T ) + SN1 = W*T + RETURN +* +* End of ZLAEV2 +* + END diff --git a/dspl/liblapack/SRC/zlag2c.f b/dspl/liblapack/SRC/zlag2c.f new file mode 100644 index 0000000..f1fc59a --- /dev/null +++ b/dspl/liblapack/SRC/zlag2c.f @@ -0,0 +1,157 @@ +*> \brief \b ZLAG2C converts a complex double precision matrix to a complex single precision matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAG2C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAG2C( M, N, A, LDA, SA, LDSA, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. +* COMPLEX SA( LDSA, * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAG2C converts a COMPLEX*16 matrix, SA, to a COMPLEX matrix, A. +*> +*> RMAX is the overflow for the SINGLE PRECISION arithmetic +*> ZLAG2C checks that all the entries of A are between -RMAX and +*> RMAX. If not the conversion is aborted and a flag is raised. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of lines of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] SA +*> \verbatim +*> SA is COMPLEX array, dimension (LDSA,N) +*> On exit, if INFO=0, the M-by-N coefficient matrix SA; if +*> INFO>0, the content of SA is unspecified. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> = 1: an entry of the matrix A is greater than the SINGLE +*> PRECISION overflow threshold, in this case, the content +*> of SA in exit is unspecified. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAG2C( M, N, A, LDA, SA, LDSA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDSA, M, N +* .. +* .. Array Arguments .. + COMPLEX SA( LDSA, * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION RMAX +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DIMAG +* .. +* .. External Functions .. + REAL SLAMCH + EXTERNAL SLAMCH +* .. +* .. Executable Statements .. +* + RMAX = SLAMCH( 'O' ) + DO 20 J = 1, N + DO 10 I = 1, M + IF( ( DBLE( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DBLE( A( I, J ) ).GT.RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).GT.RMAX ) ) THEN + INFO = 1 + GO TO 30 + END IF + SA( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + INFO = 0 + 30 CONTINUE + RETURN +* +* End of ZLAG2C +* + END diff --git a/dspl/liblapack/SRC/zlags2.f b/dspl/liblapack/SRC/zlags2.f new file mode 100644 index 0000000..d00cd4e --- /dev/null +++ b/dspl/liblapack/SRC/zlags2.f @@ -0,0 +1,399 @@ +*> \brief \b ZLAGS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAGS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, +* SNV, CSQ, SNQ ) +* +* .. Scalar Arguments .. +* LOGICAL UPPER +* DOUBLE PRECISION A1, A3, B1, B3, CSQ, CSU, CSV +* COMPLEX*16 A2, B2, SNQ, SNU, SNV +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAGS2 computes 2-by-2 unitary matrices U, V and Q, such +*> that if ( UPPER ) then +*> +*> U**H *A*Q = U**H *( A1 A2 )*Q = ( x 0 ) +*> ( 0 A3 ) ( x x ) +*> and +*> V**H*B*Q = V**H *( B1 B2 )*Q = ( x 0 ) +*> ( 0 B3 ) ( x x ) +*> +*> or if ( .NOT.UPPER ) then +*> +*> U**H *A*Q = U**H *( A1 0 )*Q = ( x x ) +*> ( A2 A3 ) ( 0 x ) +*> and +*> V**H *B*Q = V**H *( B1 0 )*Q = ( x x ) +*> ( B2 B3 ) ( 0 x ) +*> where +*> +*> U = ( CSU SNU ), V = ( CSV SNV ), +*> ( -SNU**H CSU ) ( -SNV**H CSV ) +*> +*> Q = ( CSQ SNQ ) +*> ( -SNQ**H CSQ ) +*> +*> The rows of the transformed A and B are parallel. Moreover, if the +*> input 2-by-2 matrix A is not zero, then the transformed (1,1) entry +*> of A is not zero. If the input matrices A and B are both not zero, +*> then the transformed (2,2) element of B is not zero, except when the +*> first rows of input A and B are parallel and the second rows are +*> zero. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPPER +*> \verbatim +*> UPPER is LOGICAL +*> = .TRUE.: the input matrices A and B are upper triangular. +*> = .FALSE.: the input matrices A and B are lower triangular. +*> \endverbatim +*> +*> \param[in] A1 +*> \verbatim +*> A1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] A2 +*> \verbatim +*> A2 is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] A3 +*> \verbatim +*> A3 is DOUBLE PRECISION +*> On entry, A1, A2 and A3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix A. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B2 +*> \verbatim +*> B2 is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] B3 +*> \verbatim +*> B3 is DOUBLE PRECISION +*> On entry, B1, B2 and B3 are elements of the input 2-by-2 +*> upper (lower) triangular matrix B. +*> \endverbatim +*> +*> \param[out] CSU +*> \verbatim +*> CSU is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNU +*> \verbatim +*> SNU is COMPLEX*16 +*> The desired unitary matrix U. +*> \endverbatim +*> +*> \param[out] CSV +*> \verbatim +*> CSV is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNV +*> \verbatim +*> SNV is COMPLEX*16 +*> The desired unitary matrix V. +*> \endverbatim +*> +*> \param[out] CSQ +*> \verbatim +*> CSQ is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] SNQ +*> \verbatim +*> SNQ is COMPLEX*16 +*> The desired unitary matrix Q. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV, + $ SNV, CSQ, SNQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL UPPER + DOUBLE PRECISION A1, A3, B1, B3, CSQ, CSU, CSV + COMPLEX*16 A2, B2, SNQ, SNU, SNV +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION A, AUA11, AUA12, AUA21, AUA22, AVB12, AVB11, + $ AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, + $ SNL, SNR, UA11R, UA22R, VB11R, VB22R + COMPLEX*16 B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11, + $ VB12, VB21, VB22 +* .. +* .. External Subroutines .. + EXTERNAL DLASV2, ZLARTG +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) ) +* .. +* .. Executable Statements .. +* + IF( UPPER ) THEN +* +* Input matrices A and B are upper triangular matrices +* +* Form matrix C = A*adj(B) = ( a b ) +* ( 0 d ) +* + A = A1*B3 + D = A3*B1 + B = A2*B1 - A1*B2 + FB = ABS( B ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(1,D1). +* + D1 = ONE + IF( FB.NE.ZERO ) + $ D1 = B / FB +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A B )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( 0 D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) ) + $ THEN +* +* Compute the (1,1) and (1,2) elements of U**H *A and V**H *B, +* and (1,2) element of |U|**H *|A| and |V|**H *|B|. +* + UA11R = CSL*A1 + UA12 = CSL*A2 + D1*SNL*A3 +* + VB11R = CSR*B1 + VB12 = CSR*B2 + D1*SNR*B3 +* + AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 ) + AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 ) +* +* zero (1,2) elements of U**H *A and V**H *B +* + IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 / + $ ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN + CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ, + $ R ) + ELSE + CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = CSL + SNU = -D1*SNL + CSV = CSR + SNV = -D1*SNR +* + ELSE +* +* Compute the (2,1) and (2,2) elements of U**H *A and V**H *B, +* and (2,2) element of |U|**H *|A| and |V|**H *|B|. +* + UA21 = -DCONJG( D1 )*SNL*A1 + UA22 = -DCONJG( D1 )*SNL*A2 + CSL*A3 +* + VB21 = -DCONJG( D1 )*SNR*B1 + VB22 = -DCONJG( D1 )*SNR*B2 + CSR*B3 +* + AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 ) + AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 ) +* +* zero (2,2) elements of U**H *A and V**H *B, and then swap. +* + IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ, + $ R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN + CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ, + $ R ) + ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 / + $ ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN + CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ, + $ R ) + ELSE + CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ, + $ R ) + END IF +* + CSU = SNL + SNU = D1*CSL + CSV = SNR + SNV = D1*CSR +* + END IF +* + ELSE +* +* Input matrices A and B are lower triangular matrices +* +* Form matrix C = A*adj(B) = ( a 0 ) +* ( c d ) +* + A = A1*B3 + D = A3*B1 + C = A2*B3 - A3*B2 + FC = ABS( C ) +* +* Transform complex 2-by-2 matrix C to real matrix by unitary +* diagonal matrix diag(d1,1). +* + D1 = ONE + IF( FC.NE.ZERO ) + $ D1 = C / FC +* +* The SVD of real 2 by 2 triangular C +* +* ( CSL -SNL )*( A 0 )*( CSR SNR ) = ( R 0 ) +* ( SNL CSL ) ( C D ) ( -SNR CSR ) ( 0 T ) +* + CALL DLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL ) +* + IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) ) + $ THEN +* +* Compute the (2,1) and (2,2) elements of U**H *A and V**H *B, +* and (2,1) element of |U|**H *|A| and |V|**H *|B|. +* + UA21 = -D1*SNR*A1 + CSR*A2 + UA22R = CSR*A3 +* + VB21 = -D1*SNL*B1 + CSL*B2 + VB22R = CSL*B3 +* + AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 ) + AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 ) +* +* zero (2,1) elements of U**H *A and V**H *B. +* + IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN + CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN + CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 / + $ ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN + CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R ) + ELSE + CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R ) + END IF +* + CSU = CSR + SNU = -DCONJG( D1 )*SNR + CSV = CSL + SNV = -DCONJG( D1 )*SNL +* + ELSE +* +* Compute the (1,1) and (1,2) elements of U**H *A and V**H *B, +* and (1,1) element of |U|**H *|A| and |V|**H *|B|. +* + UA11 = CSR*A1 + DCONJG( D1 )*SNR*A2 + UA12 = DCONJG( D1 )*SNR*A3 +* + VB11 = CSL*B1 + DCONJG( D1 )*SNL*B2 + VB12 = DCONJG( D1 )*SNL*B3 +* + AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 ) + AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 ) +* +* zero (1,1) elements of U**H *A and V**H *B, and then swap. +* + IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( VB12, VB11, CSQ, SNQ, R ) + ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN + CALL ZLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 / + $ ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN + CALL ZLARTG( UA12, UA11, CSQ, SNQ, R ) + ELSE + CALL ZLARTG( VB12, VB11, CSQ, SNQ, R ) + END IF +* + CSU = SNR + SNU = DCONJG( D1 )*CSR + CSV = SNL + SNV = DCONJG( D1 )*CSL +* + END IF +* + END IF +* + RETURN +* +* End of ZLAGS2 +* + END diff --git a/dspl/liblapack/SRC/zlagtm.f b/dspl/liblapack/SRC/zlagtm.f new file mode 100644 index 0000000..5c3e99a --- /dev/null +++ b/dspl/liblapack/SRC/zlagtm.f @@ -0,0 +1,321 @@ +*> \brief \b ZLAGTM performs a matrix-matrix product of the form C = αAB+βC, where A is a tridiagonal matrix, B and C are rectangular matrices, and α and β are scalars, which may be 0, 1, or -1. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAGTM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER LDB, LDX, N, NRHS +* DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAGTM performs a matrix-vector product of the form +*> +*> B := alpha * A * X + beta * B +*> +*> where A is a tridiagonal matrix of order N, B and X are N by NRHS +*> matrices, and alpha and beta are real scalars, each of which may be +*> 0., 1., or -1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': No transpose, B := alpha * A * X + beta * B +*> = 'T': Transpose, B := alpha * A**T * X + beta * B +*> = 'C': Conjugate transpose, B := alpha * A**H * X + beta * B +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices X and B. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION +*> The scalar alpha. ALPHA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 0. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) sub-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The diagonal elements of T. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) super-diagonal elements of T. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> The N by NRHS matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(N,1). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION +*> The scalar beta. BETA must be 0., 1., or -1.; otherwise, +*> it is assumed to be 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N by NRHS matrix B. +*> On exit, B is overwritten by the matrix expression +*> B := alpha * A * X + beta * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(N,1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA, + $ B, LDB ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER LDB, LDX, N, NRHS + DOUBLE PRECISION ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 B( LDB, * ), D( * ), DL( * ), DU( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) + $ RETURN +* +* Multiply B by BETA if BETA.NE.1. +* + IF( BETA.EQ.ZERO ) THEN + DO 20 J = 1, NRHS + DO 10 I = 1, N + B( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE IF( BETA.EQ.-ONE ) THEN + DO 40 J = 1, NRHS + DO 30 I = 1, N + B( I, J ) = -B( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF +* + IF( ALPHA.EQ.ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B + A*X +* + DO 60 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 50 I = 2, N - 1 + B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DU( I )*X( I+1, J ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B + A**T * X +* + DO 80 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) + + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) + + $ D( N )*X( N, J ) + DO 70 I = 2, N - 1 + B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) + + $ D( I )*X( I, J ) + DL( I )*X( I+1, J ) + 70 CONTINUE + END IF + 80 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B + A**H * X +* + DO 100 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J ) + + $ DCONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) + DCONJG( DU( N-1 ) )* + $ X( N-1, J ) + DCONJG( D( N ) )*X( N, J ) + DO 90 I = 2, N - 1 + B( I, J ) = B( I, J ) + DCONJG( DU( I-1 ) )* + $ X( I-1, J ) + DCONJG( D( I ) )* + $ X( I, J ) + DCONJG( DL( I ) )* + $ X( I+1, J ) + 90 CONTINUE + END IF + 100 CONTINUE + END IF + ELSE IF( ALPHA.EQ.-ONE ) THEN + IF( LSAME( TRANS, 'N' ) ) THEN +* +* Compute B := B - A*X +* + DO 120 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DU( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 110 I = 2, N - 1 + B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DU( I )*X( I+1, J ) + 110 CONTINUE + END IF + 120 CONTINUE + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Compute B := B - A**T *X +* + DO 140 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) - + $ DL( 1 )*X( 2, J ) + B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) - + $ D( N )*X( N, J ) + DO 130 I = 2, N - 1 + B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) - + $ D( I )*X( I, J ) - DL( I )*X( I+1, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( TRANS, 'C' ) ) THEN +* +* Compute B := B - A**H *X +* + DO 160 J = 1, NRHS + IF( N.EQ.1 ) THEN + B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J ) + ELSE + B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J ) - + $ DCONJG( DL( 1 ) )*X( 2, J ) + B( N, J ) = B( N, J ) - DCONJG( DU( N-1 ) )* + $ X( N-1, J ) - DCONJG( D( N ) )*X( N, J ) + DO 150 I = 2, N - 1 + B( I, J ) = B( I, J ) - DCONJG( DU( I-1 ) )* + $ X( I-1, J ) - DCONJG( D( I ) )* + $ X( I, J ) - DCONJG( DL( I ) )* + $ X( I+1, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + END IF + RETURN +* +* End of ZLAGTM +* + END diff --git a/dspl/liblapack/SRC/zlahef.f b/dspl/liblapack/SRC/zlahef.f new file mode 100644 index 0000000..0c8484d --- /dev/null +++ b/dspl/liblapack/SRC/zlahef.f @@ -0,0 +1,972 @@ +*> \brief \b ZLAHEF computes a partial factorization of a complex Hermitian indefinite matrix using the Bunch-Kaufman diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHEF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAHEF computes a partial factorization of a complex Hermitian +*> matrix A using the Bunch-Kaufman diagonal pivoting method. The +*> partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**H denotes the conjugate transpose of U. +*> +*> ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T + COMPLEX*16 D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZDSCAL, ZGEMM, ZGEMV, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( A( K, K ) ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* BEGIN pivot search along IMAX row +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( A( IMAX, IMAX ) ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL ZLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* +* Case(2) + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* +* Case(3) + ELSE IF( ABS( DBLE( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* +* Case(4) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* +* +* END pivot search along IMAX row +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(4)) +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) +* +* (2) Conjugate column W(kw) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( conj(D21)*( D11 ) D21*( -1 ) ) +* ( ( -1 ) ( D22 ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = T/d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0, since in 2x2 pivot case(4) +* |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / DCONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( DBLE( D11*D22 )-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = DCONJG( D21 )* + $ ( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + CALL ZLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 +* +* Copy column K of A to column K of W and update it +* + W( K, K ) = DBLE( A( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = DBLE( W( K, K ) ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( A( K, K ) ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* BEGIN pivot search along IMAX row +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL ZLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( A( IMAX, IMAX ) ) + IF( IMAX.LT.N ) + $ CALL ZCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) + W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine only ROWMAX. +* + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* +* Case(2) + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* +* Case(3) + ELSE IF( ABS( DBLE( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX ) + $ THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* +* Case(4) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF +* +* +* END pivot search along IMAX row +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL ZLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = DBLE( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(4)) +* + R1 = ONE / DBLE( A( K, K ) ) + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) +* +* (2) Conjugate column W(k) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( conj(D21)*( D11 ) D21*( -1 ) ) +* ( ( -1 ) ( D22 ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = T/d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0, since in 2x2 pivot case(4) +* |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / DCONJG( D21 ) + T = ONE / ( DBLE( D11*D22 )-ONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = DCONJG( D21 )* + $ ( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLAHEF +* + END diff --git a/dspl/liblapack/SRC/zlahef_aa.f b/dspl/liblapack/SRC/zlahef_aa.f new file mode 100644 index 0000000..8bad4ab --- /dev/null +++ b/dspl/liblapack/SRC/zlahef_aa.f @@ -0,0 +1,501 @@ +*> \brief \b ZLAHEF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHEF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAHEF_AA factorizes a panel of a complex hermitian matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by ZHETRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16HEcomputational +* +* ===================================================================== + SUBROUTINE ZLAHEF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = (0.0D+0, 0.0D+0), ONE = (1.0D+0, 0.0D+0) ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + COMPLEX*16 PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX, ILAENV + EXTERNAL LSAME, ILAENV, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZGEMV, ZAXPY, ZLACGV, ZCOPY, ZSCAL, ZSWAP, + $ ZLASET, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from ZHETRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:N, J) := A(J, J:N) - H(J:N, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:N, J) has been initialized to be A(J, J:N) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZLACGV( J-K1, A( 1, J ), 1 ) + CALL ZGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + CALL ZLACGV( J-K1, A( 1, J ), 1 ) + END IF +* +* Copy H(i:n, i) into WORK +* + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:N) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:N) stores U(J-1, J:N) +* + ALPHA = -DCONJG( A( K-1, J ) ) + CALL ZAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = DBLE( WORK( 1 ) ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L(J, (J+1):N) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):N) stores U(J, (J+1):N) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL ZAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply hermitian pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:N) with A(I1+1:N, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) + CALL ZLACGV( I2-I1, A( J1+I1-1, I1+1 ), LDA ) + CALL ZLACGV( I2-I1-1, A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:N) with A(I2, I2+1:N) +* + CALL ZSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J:N, J), +* + CALL ZCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL ZSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL ZLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from ZHETRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:N, J) := A(J:N, J) - H(J:N, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:N, J) has been initialized to be A(J:N, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZLACGV( J-K1, A( J, 1 ), LDA ) + CALL ZGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + CALL ZLACGV( J-K1, A( J, 1 ), LDA ) + END IF +* +* Copy H(J:N, J) into WORK +* + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:N, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -DCONJG( A( J, K-1 ) ) + CALL ZAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = DBLE( WORK( 1 ) ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:N) = T(J, J) L((J+1):N, J) +* where A(J, J) = T(J, J) and A((J+1):N, J-1) = L((J+1):N, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL ZAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:n)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply hermitian pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:N, I1) with A(I2, I1+1:N) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) + CALL ZLACGV( I2-I1, A( I1+1, J1+I1-1 ), 1 ) + CALL ZLACGV( I2-I1-1, A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:N, I1) with A(I2+1:N, I2) +* + CALL ZSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:N, J+1) into H(J+1:N, J), +* + CALL ZCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:N ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:N, J) = L(J+2:N, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL ZSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL ZLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of ZLAHEF_AA +* + END diff --git a/dspl/liblapack/SRC/zlahef_rk.f b/dspl/liblapack/SRC/zlahef_rk.f new file mode 100644 index 0000000..d8d54f4 --- /dev/null +++ b/dspl/liblapack/SRC/zlahef_rk.f @@ -0,0 +1,1234 @@ +*> \brief \b ZLAHEF_RK computes a partial factorization of a complex Hermitian indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHEF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHEF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZLAHEF_RK computes a partial factorization of a complex Hermitian +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> ZLAHEF_RK is an auxiliary routine called by ZHETRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the Hermitian block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the Hermitian block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the Hermitian block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLAHEF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, K, KK, KKW, + $ KP, KSTEP, KW, P + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, DTEMP, R1, ROWMAX, T, + $ SFMIN + COMPLEX*16 D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZDSCAL, ZGEMM, ZGEMV, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + IF( K.GT.1 ) + $ CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( A( K, K ) ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( W( K, KW ) ) + IF( K.GT.1 ) + $ CALL ZCOPY( K-1, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* +* Lop until pivot found +* + DONE = .FALSE. +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + IF( IMAX.GT.1 ) + $ CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), + $ 1 ) + W( IMAX, KW-1 ) = DBLE( A( IMAX, IMAX ) ) +* + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL ZLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( W( IMAX,KW-1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns P and K. +* Updated column P is already stored in column KW of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P of submatrix A +* at step K. No need to copy element into columns +* K and K-1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = DBLE( A( K, K ) ) + CALL ZCOPY( K-1-P, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + CALL ZLACGV( K-1-P, A( P, P+1 ), LDA ) + IF( P.GT.1 ) + $ CALL ZCOPY( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in the last K+1 to N columns of A +* (columns K and K-1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), + $ LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = DBLE( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE + DO 14 II = 1, K-1 + A( II, K ) = A( II, K ) / T + 14 CONTINUE + END IF +* +* (2) Conjugate column W(kw) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / DCONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( DBLE( D11*D22 )-ONE ) +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( ( D11*W( J, KW-1 )-W( J, KW ) ) / + $ D21 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ DCONJG( D21 ) ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = CZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = CZERO +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + CALL ZLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update column K of W +* + W( K, K ) = DBLE( A( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + IF( K.GT.1 ) THEN + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = DBLE( W( K, K ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( W( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, W( K+1, K ), 1, A( K+1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column k+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL ZLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( A( IMAX, IMAX ) ) +* + IF( IMAX.LT.N ) + $ CALL ZCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) +* + IF( K.GT.1 ) THEN + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = CABS1( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( W( IMAX,K+1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* +* End pivot search loop body +* + IF( .NOT.DONE ) GOTO 72 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns P and K (only for 2-by-2 pivot). +* Updated column P is already stored in column K of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column KK-1 to column P of submatrix A +* at step K. No need to copy element into columns +* K and K+1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = DBLE( A( K, K ) ) + CALL ZCOPY( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + CALL ZLACGV( P-K-1, A( P, K+1 ), LDA ) + IF( P.LT.N ) + $ CALL ZCOPY( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* +* Interchange rows K and P in first K-1 columns of A +* (columns K and K+1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL ZSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL ZLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (column K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = DBLE( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / T + 74 CONTINUE + END IF +* +* (2) Conjugate column W(k) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored. +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / DCONJG( D21 ) + T = ONE / ( DBLE( D11*D22 )-ONE ) +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ DCONJG( D21 ) ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = CZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = CZERO +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLAHEF_RK +* + END diff --git a/dspl/liblapack/SRC/zlahef_rook.f b/dspl/liblapack/SRC/zlahef_rook.f new file mode 100644 index 0000000..1a344a3 --- /dev/null +++ b/dspl/liblapack/SRC/zlahef_rook.f @@ -0,0 +1,1176 @@ +* \brief \b ZLAHEF_ROOK computes a partial factorization of a complex Hermitian indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (blocked algorithm, calling Level 3 BLAS). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHEF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHEF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAHEF_ROOK computes a partial factorization of a complex Hermitian +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting +*> method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**H U22**H ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**H L21**H ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**H denotes the conjugate transpose of U. +*> +*> ZLAHEF_ROOK is an auxiliary routine called by ZHETRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16HEcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLAHEF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, II, J, JB, JJ, JMAX, JP1, JP2, K, + $ KK, KKW, KP, KSTEP, KW, P + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, DTEMP, R1, ROWMAX, T, + $ SFMIN + COMPLEX*16 D11, D21, D22, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZDSCAL, ZGEMM, ZGEMV, ZLACGV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 (note that conjg(W) is actually stored) +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + IF( K.GT.1 ) + $ CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( A( K, K ) ) + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) + W( K, KW ) = DBLE( W( K, KW ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, KW ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( W( K, KW ) ) + IF( K.GT.1 ) + $ CALL ZCOPY( K-1, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* +* Lop until pivot found +* + DONE = .FALSE. +* + 12 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + IF( IMAX.GT.1 ) + $ CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), + $ 1 ) + W( IMAX, KW-1 ) = DBLE( A( IMAX, IMAX ) ) +* + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + CALL ZLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) + W( IMAX, KW-1 ) = DBLE( W( IMAX, KW-1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,KW-1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( W( IMAX,KW-1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* +* END pivot search loop body +* + IF( .NOT.DONE ) GOTO 12 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns P and K. +* Updated column P is already stored in column KW of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P of submatrix A +* at step K. No need to copy element into columns +* K and K-1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = DBLE( A( K, K ) ) + CALL ZCOPY( K-1-P, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + CALL ZLACGV( K-1-P, A( P, P+1 ), LDA ) + IF( P.GT.1 ) + $ CALL ZCOPY( P-1, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in the last K+1 to N columns of A +* (columns K and K-1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), + $ LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA ) + IF( KP.GT.1 ) + $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* (1) Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = DBLE( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL ZDSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE + DO 14 II = 1, K-1 + A( II, K ) = A( II, K ) / T + 14 CONTINUE + END IF +* +* (2) Conjugate column W(kw) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* (1) Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* (NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored) +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / DCONJG( D21 ) + D22 = W( K-1, KW-1 ) / D21 + T = ONE / ( DBLE( D11*D22 )-ONE ) +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( ( D11*W( J, KW-1 )-W( J, KW ) ) / + $ D21 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ DCONJG( D21 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* +* (2) Conjugate columns W(kw) and W(kw-1) +* + CALL ZLACGV( K-1, W( 1, KW ), 1 ) + CALL ZLACGV( K-2, W( 1, KW-1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**H = A11 - U12*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in of rows in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP2 +* (or J and JP2, and J+1 and JP1) at each step J +* + KSTEP = 1 + JP1 = 1 +* (Here, J is a diagonal index) + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 +* (Here, J is a diagonal index) + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = JJ + 1 + IF( KSTEP.EQ.2 .AND. JP1.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 (note that conjg(W) is actually stored) +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update column K of W +* + W( K, K ) = DBLE( A( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 ) + IF( K.GT.1 ) THEN + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) + W( K, K ) = DBLE( W( K, K ) ) + END IF +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = ABS( DBLE( W( K, K ) ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + A( K, K ) = DBLE( W( K, K ) ) + IF( K.LT.N ) + $ CALL ZCOPY( N-K, W( K+1, K ), 1, A( K+1, K ), 1 ) + ELSE +* +* ============================================================ +* +* BEGIN pivot search +* +* Case(1) +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* BEGIN pivot search loop body +* +* +* Copy column IMAX to column k+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL ZLACGV( IMAX-K, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( A( IMAX, IMAX ) ) +* + IF( IMAX.LT.N ) + $ CALL ZCOPY( N-IMAX, A( IMAX+1, IMAX ), 1, + $ W( IMAX+1, K+1 ), 1 ) +* + IF( K.GT.1 ) THEN + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) + W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) ) + END IF +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = CABS1( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Case(2) +* Equivalent to testing for +* ABS( REAL( W( IMAX,K+1 ) ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABS( DBLE( W( IMAX,K+1 ) ) ) + $ .LT.ALPHA*ROWMAX ) ) THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Case(3) +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. +* +* Case(4) + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* +* End pivot search loop body +* + IF( .NOT.DONE ) GOTO 72 +* + END IF +* +* END pivot search +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns P and K (only for 2-by-2 pivot). +* Updated column P is already stored in column K of W. +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column KK-1 to column P of submatrix A +* at step K. No need to copy element into columns +* K and K+1 of A for 2-by-2 pivot, since these columns +* will be later overwritten. +* + A( P, P ) = DBLE( A( K, K ) ) + CALL ZCOPY( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + CALL ZLACGV( P-K-1, A( P, K+1 ), LDA ) + IF( P.LT.N ) + $ CALL ZCOPY( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) +* +* Interchange rows K and P in first K-1 columns of A +* (columns K and K+1 of A for 2-by-2 pivot will be +* later overwritten). Interchange rows K and P +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL ZSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = DBLE( A( KK, KK ) ) + CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + CALL ZLACGV( KP-KK-1, A( KP, KK+1 ), LDA ) + IF( KP.LT.N ) + $ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (column K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* (1) Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* +* (NOTE: No need to use for Hermitian matrix +* A( K, K ) = REAL( W( K, K) ) to separately copy diagonal +* element D(k,k) from W (potentially saves only one load)) + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN +* +* (NOTE: No need to check if A(k,k) is NOT ZERO, +* since that was ensured earlier in pivot search: +* case A(k,k) = 0 falls into 2x2 pivot case(3)) +* +* Handle division by a small number +* + T = DBLE( A( K, K ) ) + IF( ABS( T ).GE.SFMIN ) THEN + R1 = ONE / T + CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / T + 74 CONTINUE + END IF +* +* (2) Conjugate column W(k) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* (1) Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored. +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Factor out the columns of the inverse of 2-by-2 pivot +* block D, so that each column contains 1, to reduce the +* number of FLOPS when we multiply panel +* ( W(kw-1) W(kw) ) by this inverse, i.e. by D**(-1). +* +* D**(-1) = ( d11 cj(d21) )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-|d21|**2) * ( ( d22) (-cj(d21) ) ) = +* ( (-d21) ( d11 ) ) +* +* = 1/(|d21|**2) * 1/((d11/cj(d21))*(d22/d21)-1) * +* +* * ( d21*( d22/d21 ) conj(d21)*( - 1 ) ) = +* ( ( -1 ) ( d11/conj(d21) ) ) +* +* = 1/(|d21|**2) * 1/(D22*D11-1) * +* +* * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = (1/|d21|**2) * T * ( d21*( D11 ) conj(d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = ( (T/conj(d21))*( D11 ) (T/d21)*( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* Handle division by a small number. (NOTE: order of +* operations is important) +* +* = ( T*(( D11 )/conj(D21)) T*(( -1 )/D21 ) ) +* ( (( -1 ) ) (( D22 ) ) ), +* +* where D11 = d22/d21, +* D22 = d11/conj(d21), +* D21 = d21, +* T = 1/(D22*D11-1). +* +* (NOTE: No need to check for division by ZERO, +* since that was ensured earlier in pivot search: +* (a) d21 != 0 in 2x2 pivot case(4), +* since |d21| should be larger than |d11| and |d22|; +* (b) (D22*D11 - 1) != 0, since from (a), +* both |D11| < 1, |D22| < 1, hence |D22*D11| << 1.) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / DCONJG( D21 ) + T = ONE / ( DBLE( D11*D22 )-ONE ) +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ DCONJG( D21 ) ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* +* (2) Conjugate columns W(k) and W(k+1) +* + CALL ZLACGV( N-K, W( K+1, K ), 1 ) + CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**H = A22 - L21*W**H +* +* computing blocks of NB columns at a time (note that conjg(W) is +* actually stored) +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + A( JJ, JJ ) = DBLE( A( JJ, JJ ) ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows J and JP2 +* (or J and JP2, and J-1 and JP1) at each step J +* + KSTEP = 1 + JP1 = 1 +* (Here, J is a diagonal index) + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 +* (Here, J is a diagonal index) + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = JJ -1 + IF( KSTEP.EQ.2 .AND. JP1.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLAHEF_ROOK +* + END diff --git a/dspl/liblapack/SRC/zlahqr.f b/dspl/liblapack/SRC/zlahqr.f new file mode 100644 index 0000000..19015b3 --- /dev/null +++ b/dspl/liblapack/SRC/zlahqr.f @@ -0,0 +1,571 @@ +*> \brief \b ZLAHQR computes the eigenvalues and Schur factorization of an upper Hessenberg matrix, using the double-shift/single-shift QR algorithm. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAHQR is an auxiliary routine called by CHSEQR to update the +*> eigenvalues and Schur decomposition already computed by CHSEQR, by +*> dealing with the Hessenberg submatrix in rows and columns ILO to +*> IHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows and +*> columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1). +*> ZLAHQR works primarily with the Hessenberg submatrix in rows +*> and columns ILO to IHI, but applies transformations to all of +*> H if WANTT is .TRUE.. +*> 1 <= ILO <= max(1,IHI); IHI <= N. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO is zero and if WANTT is .TRUE., then H +*> is upper triangular in rows and columns ILO:IHI. If INFO +*> is zero and if WANTT is .FALSE., then the contents of H +*> are unspecified on exit. The output state of H in case +*> INF is positive is below under the description of INFO. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The computed eigenvalues ILO to IHI are stored in the +*> corresponding elements of W. If WANTT is .TRUE., the +*> eigenvalues are stored in the same order as on the diagonal +*> of the Schur form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 <= ILOZ <= ILO; IHI <= IHIZ <= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> If WANTZ is .TRUE., on entry Z must contain the current +*> matrix Z of transformations accumulated by CHSEQR, and on +*> exit Z has been updated; transformations are applied only to +*> the submatrix Z(ILOZ:IHIZ,ILO:IHI). +*> If WANTZ is .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, ZLAHQR failed to compute all the +*> eigenvalues ILO to IHI in a total of 30 iterations +*> per eigenvalue; elements i+1:ihi of W contain +*> those eigenvalues which have been successfully +*> computed. +*> +*> If INFO .GT. 0 and WANTT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the +*> eigenvalues of the upper Hessenberg matrix +*> rows and columns ILO thorugh INFO of the final, +*> output value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> (*) (initial value of H)*U = U*(final value of H) +*> where U is an orthognal matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> (final value of Z) = (initial value of Z)*U +*> where U is the orthogonal matrix in (*) +*> (regardless of the value of WANTT.) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 02-96 Based on modifications by +*> David Day, Sandia National Laboratory, USA +*> +*> 12-04 Further modifications by +*> Ralph Byers, University of Kansas, USA +*> This is a modified version of ZLAHQR from LAPACK version 3.0. +*> It is (1) more robust against overflow and underflow and +*> (2) adopts the more conservative Ahues & Tisseur stopping +*> criterion (LAWN 122, 1997). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), Z( LDZ, * ) +* .. +* +* ========================================================= +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO, RONE, HALF + PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0, HALF = 0.5d0 ) + DOUBLE PRECISION DAT1 + PARAMETER ( DAT1 = 3.0d0 / 4.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 CDUM, H11, H11S, H22, SC, SUM, T, T1, TEMP, U, + $ V2, X, Y + DOUBLE PRECISION AA, AB, BA, BB, H10, H21, RTEMP, S, SAFMAX, + $ SAFMIN, SMLNUM, SX, T2, TST, ULP + INTEGER I, I1, I2, ITS, ITMAX, J, JHI, JLO, K, L, M, + $ NH, NZ +* .. +* .. Local Arrays .. + COMPLEX*16 V( 2 ) +* .. +* .. External Functions .. + COMPLEX*16 ZLADIV + DOUBLE PRECISION DLAMCH + EXTERNAL ZLADIV, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZCOPY, ZLARFG, ZSCAL +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( ILO.EQ.IHI ) THEN + W( ILO ) = H( ILO, ILO ) + RETURN + END IF +* +* ==== clear out the trash ==== + DO 10 J = ILO, IHI - 3 + H( J+2, J ) = ZERO + H( J+3, J ) = ZERO + 10 CONTINUE + IF( ILO.LE.IHI-2 ) + $ H( IHI, IHI-2 ) = ZERO +* ==== ensure that subdiagonal entries are real ==== + IF( WANTT ) THEN + JLO = 1 + JHI = N + ELSE + JLO = ILO + JHI = IHI + END IF + DO 20 I = ILO + 1, IHI + IF( DIMAG( H( I, I-1 ) ).NE.RZERO ) THEN +* ==== The following redundant normalization +* . avoids problems with both gradual and +* . sudden underflow in ABS(H(I,I-1)) ==== + SC = H( I, I-1 ) / CABS1( H( I, I-1 ) ) + SC = DCONJG( SC ) / ABS( SC ) + H( I, I-1 ) = ABS( H( I, I-1 ) ) + CALL ZSCAL( JHI-I+1, SC, H( I, I ), LDH ) + CALL ZSCAL( MIN( JHI, I+1 )-JLO+1, DCONJG( SC ), + $ H( JLO, I ), 1 ) + IF( WANTZ ) + $ CALL ZSCAL( IHIZ-ILOZ+1, DCONJG( SC ), Z( ILOZ, I ), 1 ) + END IF + 20 CONTINUE +* + NH = IHI - ILO + 1 + NZ = IHIZ - ILOZ + 1 +* +* Set machine-dependent constants for the stopping criterion. +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( NH ) / ULP ) +* +* I1 and I2 are the indices of the first row and last column of H +* to which transformations must be applied. If eigenvalues only are +* being computed, I1 and I2 are set inside the main loop. +* + IF( WANTT ) THEN + I1 = 1 + I2 = N + END IF +* +* ITMAX is the total number of QR iterations allowed. +* + ITMAX = 30 * MAX( 10, NH ) +* +* The main loop begins here. I is the loop index and decreases from +* IHI to ILO in steps of 1. Each iteration of the loop works +* with the active submatrix in rows and columns L to I. +* Eigenvalues I+1 to IHI have already converged. Either L = ILO, or +* H(L,L-1) is negligible so that the matrix splits. +* + I = IHI + 30 CONTINUE + IF( I.LT.ILO ) + $ GO TO 150 +* +* Perform QR iterations on rows and columns ILO to I until a +* submatrix of order 1 splits off at the bottom because a +* subdiagonal element has become negligible. +* + L = ILO + DO 130 ITS = 0, ITMAX +* +* Look for a single small subdiagonal element. +* + DO 40 K = I, L + 1, -1 + IF( CABS1( H( K, K-1 ) ).LE.SMLNUM ) + $ GO TO 50 + TST = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) ) + IF( TST.EQ.ZERO ) THEN + IF( K-2.GE.ILO ) + $ TST = TST + ABS( DBLE( H( K-1, K-2 ) ) ) + IF( K+1.LE.IHI ) + $ TST = TST + ABS( DBLE( H( K+1, K ) ) ) + END IF +* ==== The following is a conservative small subdiagonal +* . deflation criterion due to Ahues & Tisseur (LAWN 122, +* . 1997). It has better mathematical foundation and +* . improves accuracy in some examples. ==== + IF( ABS( DBLE( H( K, K-1 ) ) ).LE.ULP*TST ) THEN + AB = MAX( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + BA = MIN( CABS1( H( K, K-1 ) ), CABS1( H( K-1, K ) ) ) + AA = MAX( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + BB = MIN( CABS1( H( K, K ) ), + $ CABS1( H( K-1, K-1 )-H( K, K ) ) ) + S = AA + AB + IF( BA*( AB / S ).LE.MAX( SMLNUM, + $ ULP*( BB*( AA / S ) ) ) )GO TO 50 + END IF + 40 CONTINUE + 50 CONTINUE + L = K + IF( L.GT.ILO ) THEN +* +* H(L,L-1) is negligible +* + H( L, L-1 ) = ZERO + END IF +* +* Exit from loop if a submatrix of order 1 has split off. +* + IF( L.GE.I ) + $ GO TO 140 +* +* Now the active submatrix is in rows and columns L to I. If +* eigenvalues only are being computed, only the active submatrix +* need be transformed. +* + IF( .NOT.WANTT ) THEN + I1 = L + I2 = I + END IF +* + IF( ITS.EQ.10 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( DBLE( H( L+1, L ) ) ) + T = S + H( L, L ) + ELSE IF( ITS.EQ.20 ) THEN +* +* Exceptional shift. +* + S = DAT1*ABS( DBLE( H( I, I-1 ) ) ) + T = S + H( I, I ) + ELSE +* +* Wilkinson's shift. +* + T = H( I, I ) + U = SQRT( H( I-1, I ) )*SQRT( H( I, I-1 ) ) + S = CABS1( U ) + IF( S.NE.RZERO ) THEN + X = HALF*( H( I-1, I-1 )-T ) + SX = CABS1( X ) + S = MAX( S, CABS1( X ) ) + Y = S*SQRT( ( X / S )**2+( U / S )**2 ) + IF( SX.GT.RZERO ) THEN + IF( DBLE( X / SX )*DBLE( Y )+DIMAG( X / SX )* + $ DIMAG( Y ).LT.RZERO )Y = -Y + END IF + T = T - U*ZLADIV( U, ( X+Y ) ) + END IF + END IF +* +* Look for two consecutive small subdiagonal elements. +* + DO 60 M = I - 1, L + 1, -1 +* +* Determine the effect of starting the single-shift QR +* iteration at row M, and see if this would make H(M,M-1) +* negligible. +* + H11 = H( M, M ) + H22 = H( M+1, M+1 ) + H11S = H11 - T + H21 = DBLE( H( M+1, M ) ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + H10 = DBLE( H( M, M-1 ) ) + IF( ABS( H10 )*ABS( H21 ).LE.ULP* + $ ( CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) ) ) ) + $ GO TO 70 + 60 CONTINUE + H11 = H( L, L ) + H22 = H( L+1, L+1 ) + H11S = H11 - T + H21 = DBLE( H( L+1, L ) ) + S = CABS1( H11S ) + ABS( H21 ) + H11S = H11S / S + H21 = H21 / S + V( 1 ) = H11S + V( 2 ) = H21 + 70 CONTINUE +* +* Single-shift QR step +* + DO 120 K = M, I - 1 +* +* The first iteration of this loop determines a reflection G +* from the vector V and applies it from left and right to H, +* thus creating a nonzero bulge below the subdiagonal. +* +* Each subsequent iteration determines a reflection G to +* restore the Hessenberg form in the (K-1)th column, and thus +* chases the bulge one step toward the bottom of the active +* submatrix. +* +* V(2) is always real before the call to ZLARFG, and hence +* after the call T2 ( = T1*V(2) ) is also real. +* + IF( K.GT.M ) + $ CALL ZCOPY( 2, H( K, K-1 ), 1, V, 1 ) + CALL ZLARFG( 2, V( 1 ), V( 2 ), 1, T1 ) + IF( K.GT.M ) THEN + H( K, K-1 ) = V( 1 ) + H( K+1, K-1 ) = ZERO + END IF + V2 = V( 2 ) + T2 = DBLE( T1*V2 ) +* +* Apply G from the left to transform the rows of the matrix +* in columns K to I2. +* + DO 80 J = K, I2 + SUM = DCONJG( T1 )*H( K, J ) + T2*H( K+1, J ) + H( K, J ) = H( K, J ) - SUM + H( K+1, J ) = H( K+1, J ) - SUM*V2 + 80 CONTINUE +* +* Apply G from the right to transform the columns of the +* matrix in rows I1 to min(K+2,I). +* + DO 90 J = I1, MIN( K+2, I ) + SUM = T1*H( J, K ) + T2*H( J, K+1 ) + H( J, K ) = H( J, K ) - SUM + H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 ) + 90 CONTINUE +* + IF( WANTZ ) THEN +* +* Accumulate transformations in the matrix Z +* + DO 100 J = ILOZ, IHIZ + SUM = T1*Z( J, K ) + T2*Z( J, K+1 ) + Z( J, K ) = Z( J, K ) - SUM + Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 ) + 100 CONTINUE + END IF +* + IF( K.EQ.M .AND. M.GT.L ) THEN +* +* If the QR step was started at row M > L because two +* consecutive small subdiagonals were found, then extra +* scaling must be performed to ensure that H(M,M-1) remains +* real. +* + TEMP = ONE - T1 + TEMP = TEMP / ABS( TEMP ) + H( M+1, M ) = H( M+1, M )*DCONJG( TEMP ) + IF( M+2.LE.I ) + $ H( M+2, M+1 ) = H( M+2, M+1 )*TEMP + DO 110 J = M, I + IF( J.NE.M+1 ) THEN + IF( I2.GT.J ) + $ CALL ZSCAL( I2-J, TEMP, H( J, J+1 ), LDH ) + CALL ZSCAL( J-I1, DCONJG( TEMP ), H( I1, J ), 1 ) + IF( WANTZ ) THEN + CALL ZSCAL( NZ, DCONJG( TEMP ), Z( ILOZ, J ), + $ 1 ) + END IF + END IF + 110 CONTINUE + END IF + 120 CONTINUE +* +* Ensure that H(I,I-1) is real. +* + TEMP = H( I, I-1 ) + IF( DIMAG( TEMP ).NE.RZERO ) THEN + RTEMP = ABS( TEMP ) + H( I, I-1 ) = RTEMP + TEMP = TEMP / RTEMP + IF( I2.GT.I ) + $ CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH ) + CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 ) + IF( WANTZ ) THEN + CALL ZSCAL( NZ, TEMP, Z( ILOZ, I ), 1 ) + END IF + END IF +* + 130 CONTINUE +* +* Failure to converge in remaining number of iterations +* + INFO = I + RETURN +* + 140 CONTINUE +* +* H(I,I-1) is negligible: one eigenvalue has converged. +* + W( I ) = H( I, I ) +* +* return to start of the main loop with new value of I. +* + I = L - 1 + GO TO 30 +* + 150 CONTINUE + RETURN +* +* End of ZLAHQR +* + END diff --git a/dspl/liblapack/SRC/zlahr2.f b/dspl/liblapack/SRC/zlahr2.f new file mode 100644 index 0000000..063b515 --- /dev/null +++ b/dspl/liblapack/SRC/zlahr2.f @@ -0,0 +1,328 @@ +*> \brief \b ZLAHR2 reduces the specified number of first columns of a general rectangular matrix A so that elements below the specified subdiagonal are zero, and returns auxiliary matrices which are needed to apply the transformation to the unreduced part of A. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAHR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* .. Scalar Arguments .. +* INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, NB ), TAU( NB ), +* $ Y( LDY, NB ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1) +*> matrix A so that elements below the k-th subdiagonal are zero. The +*> reduction is performed by an unitary similarity transformation +*> Q**H * A * Q. The routine returns the matrices V and T which determine +*> Q as a block reflector I - V*T*V**H, and also the matrix Y = A * V * T. +*> +*> This is an auxiliary routine called by ZGEHRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The offset for the reduction. Elements below the k-th +*> subdiagonal in the first NB columns are reduced to zero. +*> K < N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N-K+1) +*> On entry, the n-by-(n-k+1) general matrix A. +*> On exit, the elements on and above the k-th subdiagonal in +*> the first NB columns are overwritten with the corresponding +*> elements of the reduced matrix; the elements below the k-th +*> subdiagonal, with the array TAU, represent the matrix Q as a +*> product of elementary reflectors. The other columns of A are +*> unchanged. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (NB) +*> The scalar factors of the elementary reflectors. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,NB) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (LDY,NB) +*> The n-by-nb matrix Y. +*> \endverbatim +*> +*> \param[in] LDY +*> \verbatim +*> LDY is INTEGER +*> The leading dimension of the array Y. LDY >= N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix Q is represented as a product of nb elementary reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in +*> A(i+k+1:n,i), and tau in TAU(i). +*> +*> The elements of the vectors v together form the (n-k+1)-by-nb matrix +*> V which is needed, with T and Y, to apply the transformation to the +*> unreduced part of the matrix, using an update of the form: +*> A := (I - V*T*V**H) * (A - Y*V**H). +*> +*> The contents of A on exit are illustrated by the following example +*> with n = 7, k = 3 and nb = 2: +*> +*> ( a a a a a ) +*> ( a a a a a ) +*> ( a a a a a ) +*> ( h h a a a ) +*> ( v1 h a a a ) +*> ( v1 v2 a a a ) +*> ( v1 v2 a a a ) +*> +*> where a denotes an element of the original matrix A, h denotes a +*> modified element of the upper Hessenberg matrix H, and vi denotes an +*> element of the vector defining H(i). +*> +*> This subroutine is a slight modification of LAPACK-3.0's DLAHRD +*> incorporating improvements proposed by Quintana-Orti and Van de +*> Gejin. Note that the entries of A(1:K,2:NB) differ from those +*> returned by the original LAPACK-3.0's DLAHRD routine. (This +*> subroutine is not backward compatible with LAPACK-3.0's DLAHRD.) +*> \endverbatim +* +*> \par References: +* ================ +*> +*> Gregorio Quintana-Orti and Robert van de Geijn, "Improving the +*> performance of reduction to Hessenberg form," ACM Transactions on +*> Mathematical Software, 32(2):180-194, June 2006. +*> +* ===================================================================== + SUBROUTINE ZLAHR2( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER K, LDA, LDT, LDY, N, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), T( LDT, NB ), TAU( NB ), + $ Y( LDY, NB ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 EI +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMM, ZGEMV, ZLACPY, + $ ZLARFG, ZSCAL, ZTRMM, ZTRMV, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, NB + IF( I.GT.1 ) THEN +* +* Update A(K+1:N,I) +* +* Update I-th column of A - Y * V**H +* + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) + CALL ZGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, Y(K+1,1), LDY, + $ A( K+I-1, 1 ), LDA, ONE, A( K+1, I ), 1 ) + CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA ) +* +* Apply I - V * T**H * V**H to this column (call it b) from the +* left, using the last column of T as workspace +* +* Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) +* ( V2 ) ( b2 ) +* +* where V1 is unit lower triangular +* +* w := V1**H * b1 +* + CALL ZCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 ) + CALL ZTRMV( 'Lower', 'Conjugate transpose', 'UNIT', + $ I-1, A( K+1, 1 ), + $ LDA, T( 1, NB ), 1 ) +* +* w := w + V2**H * b2 +* + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), + $ LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 ) +* +* w := T**H * w +* + CALL ZTRMV( 'Upper', 'Conjugate transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, NB ), 1 ) +* +* b2 := b2 - V2*w +* + CALL ZGEMV( 'NO TRANSPOSE', N-K-I+1, I-1, -ONE, + $ A( K+I, 1 ), + $ LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 ) +* +* b1 := b1 - V1*w +* + CALL ZTRMV( 'Lower', 'NO TRANSPOSE', + $ 'UNIT', I-1, + $ A( K+1, 1 ), LDA, T( 1, NB ), 1 ) + CALL ZAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 ) +* + A( K+I-1, I-1 ) = EI + END IF +* +* Generate the elementary reflector H(I) to annihilate +* A(K+I+1:N,I) +* + CALL ZLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1, + $ TAU( I ) ) + EI = A( K+I, I ) + A( K+I, I ) = ONE +* +* Compute Y(K+1:N,I) +* + CALL ZGEMV( 'NO TRANSPOSE', N-K, N-K-I+1, + $ ONE, A( K+1, I+1 ), + $ LDA, A( K+I, I ), 1, ZERO, Y( K+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, + $ ONE, A( K+I, 1 ), LDA, + $ A( K+I, I ), 1, ZERO, T( 1, I ), 1 ) + CALL ZGEMV( 'NO TRANSPOSE', N-K, I-1, -ONE, + $ Y( K+1, 1 ), LDY, + $ T( 1, I ), 1, ONE, Y( K+1, I ), 1 ) + CALL ZSCAL( N-K, TAU( I ), Y( K+1, I ), 1 ) +* +* Compute T(1:I,I) +* + CALL ZSCAL( I-1, -TAU( I ), T( 1, I ), 1 ) + CALL ZTRMV( 'Upper', 'No Transpose', 'NON-UNIT', + $ I-1, T, LDT, + $ T( 1, I ), 1 ) + T( I, I ) = TAU( I ) +* + 10 CONTINUE + A( K+NB, NB ) = EI +* +* Compute Y(1:K,1:NB) +* + CALL ZLACPY( 'ALL', K, NB, A( 1, 2 ), LDA, Y, LDY ) + CALL ZTRMM( 'RIGHT', 'Lower', 'NO TRANSPOSE', + $ 'UNIT', K, NB, + $ ONE, A( K+1, 1 ), LDA, Y, LDY ) + IF( N.GT.K+NB ) + $ CALL ZGEMM( 'NO TRANSPOSE', 'NO TRANSPOSE', K, + $ NB, N-K-NB, ONE, + $ A( 1, 2+NB ), LDA, A( K+1+NB, 1 ), LDA, ONE, Y, + $ LDY ) + CALL ZTRMM( 'RIGHT', 'Upper', 'NO TRANSPOSE', + $ 'NON-UNIT', K, NB, + $ ONE, T, LDT, Y, LDY ) +* + RETURN +* +* End of ZLAHR2 +* + END diff --git a/dspl/liblapack/SRC/zlaic1.f b/dspl/liblapack/SRC/zlaic1.f new file mode 100644 index 0000000..1000922 --- /dev/null +++ b/dspl/liblapack/SRC/zlaic1.f @@ -0,0 +1,371 @@ +*> \brief \b ZLAIC1 applies one step of incremental condition estimation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAIC1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* .. Scalar Arguments .. +* INTEGER J, JOB +* DOUBLE PRECISION SEST, SESTPR +* COMPLEX*16 C, GAMMA, S +* .. +* .. Array Arguments .. +* COMPLEX*16 W( J ), X( J ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAIC1 applies one step of incremental condition estimation in +*> its simplest version: +*> +*> Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j +*> lower triangular matrix L, such that +*> twonorm(L*x) = sest +*> Then ZLAIC1 computes sestpr, s, c such that +*> the vector +*> [ s*x ] +*> xhat = [ c ] +*> is an approximate singular vector of +*> [ L 0 ] +*> Lhat = [ w**H gamma ] +*> in the sense that +*> twonorm(Lhat*xhat) = sestpr. +*> +*> Depending on JOB, an estimate for the largest or smallest singular +*> value is computed. +*> +*> Note that [s c]**H and sestpr**2 is an eigenpair of the system +*> +*> diag(sest*sest, 0) + [alpha gamma] * [ conjg(alpha) ] +*> [ conjg(gamma) ] +*> +*> where alpha = x**H * w. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is INTEGER +*> = 1: an estimate for the largest singular value is computed. +*> = 2: an estimate for the smallest singular value is computed. +*> \endverbatim +*> +*> \param[in] J +*> \verbatim +*> J is INTEGER +*> Length of X and W +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (J) +*> The j-vector x. +*> \endverbatim +*> +*> \param[in] SEST +*> \verbatim +*> SEST is DOUBLE PRECISION +*> Estimated singular value of j by j matrix L +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (J) +*> The j-vector w. +*> \endverbatim +*> +*> \param[in] GAMMA +*> \verbatim +*> GAMMA is COMPLEX*16 +*> The diagonal element gamma. +*> \endverbatim +*> +*> \param[out] SESTPR +*> \verbatim +*> SESTPR is DOUBLE PRECISION +*> Estimated singular value of (j+1) by (j+1) matrix Lhat. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is COMPLEX*16 +*> Sine needed in forming xhat. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX*16 +*> Cosine needed in forming xhat. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER J, JOB + DOUBLE PRECISION SEST, SESTPR + COMPLEX*16 C, GAMMA, S +* .. +* .. Array Arguments .. + COMPLEX*16 W( J ), X( J ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + DOUBLE PRECISION HALF, FOUR + PARAMETER ( HALF = 0.5D0, FOUR = 4.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2, + $ SCL, T, TEST, TMP, ZETA1, ZETA2 + COMPLEX*16 ALPHA, COSINE, SINE +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZDOTC + EXTERNAL DLAMCH, ZDOTC +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Epsilon' ) + ALPHA = ZDOTC( J, X, 1, W, 1 ) +* + ABSALP = ABS( ALPHA ) + ABSGAM = ABS( GAMMA ) + ABSEST = ABS( SEST ) +* + IF( JOB.EQ.1 ) THEN +* +* Estimating largest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + S1 = MAX( ABSGAM, ABSALP ) + IF( S1.EQ.ZERO ) THEN + S = ZERO + C = ONE + SESTPR = ZERO + ELSE + S = ALPHA / S1 + C = GAMMA / S1 + TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) ) + S = S / TMP + C = C / TMP + SESTPR = S1*TMP + END IF + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ONE + C = ZERO + TMP = MAX( ABSEST, ABSALP ) + S1 = ABSEST / TMP + S2 = ABSALP / TMP + SESTPR = TMP*SQRT( S1*S1+S2*S2 ) + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ONE + C = ZERO + SESTPR = S2 + ELSE + S = ZERO + C = ONE + SESTPR = S1 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S2*SCL + S = ( ALPHA / S2 ) / SCL + C = ( GAMMA / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = S1*SCL + S = ( ALPHA / S1 ) / SCL + C = ( GAMMA / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF + C = ZETA1*ZETA1 + IF( B.GT.ZERO ) THEN + T = C / ( B+SQRT( B*B+C ) ) + ELSE + T = SQRT( B*B+C ) - B + END IF +* + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + SESTPR = SQRT( T+ONE )*ABSEST + RETURN + END IF +* + ELSE IF( JOB.EQ.2 ) THEN +* +* Estimating smallest singular value +* +* special cases +* + IF( SEST.EQ.ZERO ) THEN + SESTPR = ZERO + IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN + SINE = ONE + COSINE = ZERO + ELSE + SINE = -DCONJG( GAMMA ) + COSINE = DCONJG( ALPHA ) + END IF + S1 = MAX( ABS( SINE ), ABS( COSINE ) ) + S = SINE / S1 + C = COSINE / S1 + TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) ) + S = S / TMP + C = C / TMP + RETURN + ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN + S = ZERO + C = ONE + SESTPR = ABSGAM + RETURN + ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN + S1 = ABSGAM + S2 = ABSEST + IF( S1.LE.S2 ) THEN + S = ZERO + C = ONE + SESTPR = S1 + ELSE + S = ONE + C = ZERO + SESTPR = S2 + END IF + RETURN + ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN + S1 = ABSGAM + S2 = ABSALP + IF( S1.LE.S2 ) THEN + TMP = S1 / S2 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST*( TMP / SCL ) + S = -( DCONJG( GAMMA ) / S2 ) / SCL + C = ( DCONJG( ALPHA ) / S2 ) / SCL + ELSE + TMP = S2 / S1 + SCL = SQRT( ONE+TMP*TMP ) + SESTPR = ABSEST / SCL + S = -( DCONJG( GAMMA ) / S1 ) / SCL + C = ( DCONJG( ALPHA ) / S1 ) / SCL + END IF + RETURN + ELSE +* +* normal case +* + ZETA1 = ABSALP / ABSEST + ZETA2 = ABSGAM / ABSEST +* + NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2, + $ ZETA1*ZETA2+ZETA2*ZETA2 ) +* +* See if root is closer to zero or to ONE +* + TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 ) + IF( TEST.GE.ZERO ) THEN +* +* root is close to zero, compute directly +* + B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF + C = ZETA2*ZETA2 + T = C / ( B+SQRT( ABS( B*B-C ) ) ) + SINE = ( ALPHA / ABSEST ) / ( ONE-T ) + COSINE = -( GAMMA / ABSEST ) / T + SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST + ELSE +* +* root is closer to ONE, shift by that amount +* + B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF + C = ZETA1*ZETA1 + IF( B.GE.ZERO ) THEN + T = -C / ( B+SQRT( B*B+C ) ) + ELSE + T = B - SQRT( B*B+C ) + END IF + SINE = -( ALPHA / ABSEST ) / T + COSINE = -( GAMMA / ABSEST ) / ( ONE+T ) + SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST + END IF + TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) ) + S = SINE / TMP + C = COSINE / TMP + RETURN +* + END IF + END IF + RETURN +* +* End of ZLAIC1 +* + END diff --git a/dspl/liblapack/SRC/zlals0.f b/dspl/liblapack/SRC/zlals0.f new file mode 100644 index 0000000..1946ad9 --- /dev/null +++ b/dspl/liblapack/SRC/zlals0.f @@ -0,0 +1,556 @@ +*> \brief \b ZLALS0 applies back multiplying factors in solving the least squares problem using divide and conquer SVD approach. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLALS0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, +* PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, +* POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, +* $ LDGNUM, NL, NR, NRHS, SQRE +* DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), PERM( * ) +* DOUBLE PRECISION DIFL( * ), DIFR( LDGNUM, * ), +* $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), +* $ RWORK( * ), Z( * ) +* COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLALS0 applies back the multiplying factors of either the left or the +*> right singular vector matrix of a diagonal matrix appended by a row +*> to the right hand side matrix B in solving the least squares problem +*> using the divide-and-conquer SVD approach. +*> +*> For the left singular vector matrix, three types of orthogonal +*> matrices are involved: +*> +*> (1L) Givens rotations: the number of such rotations is GIVPTR; the +*> pairs of columns/rows they were applied to are stored in GIVCOL; +*> and the C- and S-values of these rotations are stored in GIVNUM. +*> +*> (2L) Permutation. The (NL+1)-st row of B is to be moved to the first +*> row, and for J=2:N, PERM(J)-th row of B is to be moved to the +*> J-th row. +*> +*> (3L) The left singular vector matrix of the remaining matrix. +*> +*> For the right singular vector matrix, four types of orthogonal +*> matrices are involved: +*> +*> (1R) The right singular vector matrix of the remaining matrix. +*> +*> (2R) If SQRE = 1, one extra Givens rotation to generate the right +*> null space. +*> +*> (3R) The inverse transformation of (2L). +*> +*> (4R) The inverse transformation of (1L). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether singular vectors are to be computed in +*> factored form: +*> = 0: Left singular vector matrix. +*> = 1: Right singular vector matrix. +*> \endverbatim +*> +*> \param[in] NL +*> \verbatim +*> NL is INTEGER +*> The row dimension of the upper block. NL >= 1. +*> \endverbatim +*> +*> \param[in] NR +*> \verbatim +*> NR is INTEGER +*> The row dimension of the lower block. NR >= 1. +*> \endverbatim +*> +*> \param[in] SQRE +*> \verbatim +*> SQRE is INTEGER +*> = 0: the lower block is an NR-by-NR square matrix. +*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix. +*> +*> The bidiagonal matrix has row dimension N = NL + NR + 1, +*> and column dimension M = N + SQRE. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. On output, B contains +*> the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B. LDB must be at least +*> max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is COMPLEX*16 array, dimension ( LDBX, NRHS ) +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( N ) +*> The permutations (from deflation and sorting) applied +*> to the two blocks. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER +*> The number of Givens rotations which took place in this +*> subproblem. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 ) +*> Each pair of numbers indicates a pair of rows/columns +*> involved in a Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER +*> The leading dimension of GIVCOL, must be at least N. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> Each number indicates the C or S value used in the +*> corresponding Givens rotation. +*> \endverbatim +*> +*> \param[in] LDGNUM +*> \verbatim +*> LDGNUM is INTEGER +*> The leading dimension of arrays DIFR, POLES and +*> GIVNUM, must be at least K. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ) +*> On entry, POLES(1:K, 1) contains the new singular +*> values obtained from solving the secular equation, and +*> POLES(1:K, 2) is an array containing the poles in the secular +*> equation. +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( K ). +*> On entry, DIFL(I) is the distance between I-th updated +*> (undeflated) singular value and the I-th (undeflated) old +*> singular value. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, dimension ( LDGNUM, 2 ). +*> On entry, DIFR(I, 1) contains the distances between I-th +*> updated (undeflated) singular value and the I+1-th +*> (undeflated) old singular value. And DIFR(I, 2) is the +*> normalizing factor for the I-th right singular vector. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( K ) +*> Contain the components of the deflation-adjusted updating row +*> vector. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> Contains the dimension of the non-deflated matrix, +*> This is the order of the related secular equation. 1 <= K <=N. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> C contains garbage if SQRE =0 and the C-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION +*> S contains garbage if SQRE =0 and the S-value of a Givens +*> rotation related to the right null space if SQRE = 1. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension +*> ( K*(1+NRHS) + 2*NRHS ) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX, + $ PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, + $ POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL, + $ LDGNUM, NL, NR, NRHS, SQRE + DOUBLE PRECISION C, S +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), PERM( * ) + DOUBLE PRECISION DIFL( * ), DIFR( LDGNUM, * ), + $ GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ), + $ RWORK( * ), Z( * ) + COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO, NEGONE + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JCOL, JROW, M, N, NLP1 + DOUBLE PRECISION DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP +* .. +* .. External Subroutines .. + EXTERNAL DGEMV, XERBLA, ZCOPY, ZDROT, ZDSCAL, ZLACPY, + $ ZLASCL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMC3, DNRM2 + EXTERNAL DLAMC3, DNRM2 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + N = NL + NR + 1 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( NL.LT.1 ) THEN + INFO = -2 + ELSE IF( NR.LT.1 ) THEN + INFO = -3 + ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN + INFO = -4 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -5 + ELSE IF( LDB.LT.N ) THEN + INFO = -7 + ELSE IF( LDBX.LT.N ) THEN + INFO = -9 + ELSE IF( GIVPTR.LT.0 ) THEN + INFO = -11 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -13 + ELSE IF( LDGNUM.LT.N ) THEN + INFO = -15 + ELSE IF( K.LT.1 ) THEN + INFO = -20 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALS0', -INFO ) + RETURN + END IF +* + M = N + SQRE + NLP1 = NL + 1 +* + IF( ICOMPQ.EQ.0 ) THEN +* +* Apply back orthogonal transformations from the left. +* +* Step (1L): apply back the Givens rotations performed. +* + DO 10 I = 1, GIVPTR + CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ GIVNUM( I, 1 ) ) + 10 CONTINUE +* +* Step (2L): permute rows of B. +* + CALL ZCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX ) + DO 20 I = 2, N + CALL ZCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX ) + 20 CONTINUE +* +* Step (3L): apply the inverse of the left singular vector +* matrix to BX. +* + IF( K.EQ.1 ) THEN + CALL ZCOPY( NRHS, BX, LDBX, B, LDB ) + IF( Z( 1 ).LT.ZERO ) THEN + CALL ZDSCAL( NRHS, NEGONE, B, LDB ) + END IF + ELSE + DO 100 J = 1, K + DIFLJ = DIFL( J ) + DJ = POLES( J, 1 ) + DSIGJ = -POLES( J, 2 ) + IF( J.LT.K ) THEN + DIFRJ = -DIFR( J, 1 ) + DSIGJP = -POLES( J+1, 2 ) + END IF + IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) ) + $ THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ / + $ ( POLES( J, 2 )+DJ ) + END IF + DO 30 I = 1, J - 1 + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJ )- + $ DIFLJ ) / ( POLES( I, 2 )+DJ ) + END IF + 30 CONTINUE + DO 40 I = J + 1, K + IF( ( Z( I ).EQ.ZERO ) .OR. + $ ( POLES( I, 2 ).EQ.ZERO ) ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = POLES( I, 2 )*Z( I ) / + $ ( DLAMC3( POLES( I, 2 ), DSIGJP )+ + $ DIFRJ ) / ( POLES( I, 2 )+DJ ) + END IF + 40 CONTINUE + RWORK( 1 ) = NEGONE + TEMP = DNRM2( K, RWORK, 1 ) +* +* Since B and BX are complex, the following call to DGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO, +* $ B( J, 1 ), LDB ) +* + I = K + NRHS*2 + DO 60 JCOL = 1, NRHS + DO 50 JROW = 1, K + I = I + 1 + RWORK( I ) = DBLE( BX( JROW, JCOL ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = 1, K + I = I + 1 + RWORK( I ) = DIMAG( BX( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 90 JCOL = 1, NRHS + B( J, JCOL ) = DCMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 90 CONTINUE + CALL ZLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ), + $ LDB, INFO ) + 100 CONTINUE + END IF +* +* Move the deflated rows of BX to B also. +* + IF( K.LT.MAX( M, N ) ) + $ CALL ZLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX, + $ B( K+1, 1 ), LDB ) + ELSE +* +* Apply back the right orthogonal transformations. +* +* Step (1R): apply back the new right singular vector matrix +* to B. +* + IF( K.EQ.1 ) THEN + CALL ZCOPY( NRHS, B, LDB, BX, LDBX ) + ELSE + DO 180 J = 1, K + DSIGJ = POLES( J, 2 ) + IF( Z( J ).EQ.ZERO ) THEN + RWORK( J ) = ZERO + ELSE + RWORK( J ) = -Z( J ) / DIFL( J ) / + $ ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 ) + END IF + DO 110 I = 1, J - 1 + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1, + $ 2 ) )-DIFR( I, 1 ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 110 CONTINUE + DO 120 I = J + 1, K + IF( Z( J ).EQ.ZERO ) THEN + RWORK( I ) = ZERO + ELSE + RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I, + $ 2 ) )-DIFL( I ) ) / + $ ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 ) + END IF + 120 CONTINUE +* +* Since B and BX are complex, the following call to DGEMV +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO, +* $ BX( J, 1 ), LDBX ) +* + I = K + NRHS*2 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, K + I = I + 1 + RWORK( I ) = DBLE( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 ) + I = K + NRHS*2 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, K + I = I + 1 + RWORK( I ) = DIMAG( B( JROW, JCOL ) ) + 150 CONTINUE + 160 CONTINUE + CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K, + $ RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 ) + DO 170 JCOL = 1, NRHS + BX( J, JCOL ) = DCMPLX( RWORK( JCOL+K ), + $ RWORK( JCOL+K+NRHS ) ) + 170 CONTINUE + 180 CONTINUE + END IF +* +* Step (2R): if SQRE = 1, apply back the rotation that is +* related to the right null space of the subproblem. +* + IF( SQRE.EQ.1 ) THEN + CALL ZCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX ) + CALL ZDROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S ) + END IF + IF( K.LT.MAX( M, N ) ) + $ CALL ZLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ), + $ LDBX ) +* +* Step (3R): permute rows of B. +* + CALL ZCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB ) + IF( SQRE.EQ.1 ) THEN + CALL ZCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB ) + END IF + DO 190 I = 2, N + CALL ZCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB ) + 190 CONTINUE +* +* Step (4R): apply back the Givens rotations performed. +* + DO 200 I = GIVPTR, 1, -1 + CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB, + $ B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ), + $ -GIVNUM( I, 1 ) ) + 200 CONTINUE + END IF +* + RETURN +* +* End of ZLALS0 +* + END diff --git a/dspl/liblapack/SRC/zlalsa.f b/dspl/liblapack/SRC/zlalsa.f new file mode 100644 index 0000000..ed5845b --- /dev/null +++ b/dspl/liblapack/SRC/zlalsa.f @@ -0,0 +1,635 @@ +*> \brief \b ZLALSA computes the SVD of the coefficient matrix in compact form. Used by sgelsd. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLALSA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, +* LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, +* GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, +* $ SMLSIZ +* .. +* .. Array Arguments .. +* INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), +* $ K( * ), PERM( LDGCOL, * ) +* DOUBLE PRECISION C( * ), DIFL( LDU, * ), DIFR( LDU, * ), +* $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), +* $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) +* COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLALSA is an itermediate step in solving the least squares problem +*> by computing the SVD of the coefficient matrix in compact form (The +*> singular vectors are computed as products of simple orthorgonal +*> matrices.). +*> +*> If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector +*> matrix of an upper bidiagonal matrix to the right hand side; and if +*> ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the +*> right hand side. The singular vector matrices were generated in +*> compact form by ZLALSA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ICOMPQ +*> \verbatim +*> ICOMPQ is INTEGER +*> Specifies whether the left or the right singular vector +*> matrix is involved. +*> = 0: Left singular vector matrix +*> = 1: Right singular vector matrix +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The row and column dimensions of the upper bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B and BX. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension ( LDB, NRHS ) +*> On input, B contains the right hand sides of the least +*> squares problem in rows 1 through M. +*> On output, B contains the solution X in rows 1 through N. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,MAX( M, N ) ). +*> \endverbatim +*> +*> \param[out] BX +*> \verbatim +*> BX is COMPLEX*16 array, dimension ( LDBX, NRHS ) +*> On exit, the result of applying the left or right singular +*> vector matrix to B. +*> \endverbatim +*> +*> \param[in] LDBX +*> \verbatim +*> LDBX is INTEGER +*> The leading dimension of BX. +*> \endverbatim +*> +*> \param[in] U +*> \verbatim +*> U is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ). +*> On entry, U contains the left singular vector matrices of all +*> subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER, LDU = > N. +*> The leading dimension of arrays U, VT, DIFL, DIFR, +*> POLES, GIVNUM, and Z. +*> \endverbatim +*> +*> \param[in] VT +*> \verbatim +*> VT is DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ). +*> On entry, VT**H contains the right singular vector matrices of +*> all subproblems at the bottom level. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER array, dimension ( N ). +*> \endverbatim +*> +*> \param[in] DIFL +*> \verbatim +*> DIFL is DOUBLE PRECISION array, dimension ( LDU, NLVL ). +*> where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1. +*> \endverbatim +*> +*> \param[in] DIFR +*> \verbatim +*> DIFR is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record +*> distances between singular values on the I-th level and +*> singular values on the (I -1)-th level, and DIFR(*, 2 * I) +*> record the normalizing factors of the right singular vectors +*> matrices of subproblems on I-th level. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION array, dimension ( LDU, NLVL ). +*> On entry, Z(1, I) contains the components of the deflation- +*> adjusted updating row vector for subproblems on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] POLES +*> \verbatim +*> POLES is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old +*> singular values involved in the secular equations on the I-th +*> level. +*> \endverbatim +*> +*> \param[in] GIVPTR +*> \verbatim +*> GIVPTR is INTEGER array, dimension ( N ). +*> On entry, GIVPTR( I ) records the number of Givens +*> rotations performed on the I-th problem on the computation +*> tree. +*> \endverbatim +*> +*> \param[in] GIVCOL +*> \verbatim +*> GIVCOL is INTEGER array, dimension ( LDGCOL, 2 * NLVL ). +*> On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the +*> locations of Givens rotations performed on the I-th level on +*> the computation tree. +*> \endverbatim +*> +*> \param[in] LDGCOL +*> \verbatim +*> LDGCOL is INTEGER, LDGCOL = > N. +*> The leading dimension of arrays GIVCOL and PERM. +*> \endverbatim +*> +*> \param[in] PERM +*> \verbatim +*> PERM is INTEGER array, dimension ( LDGCOL, NLVL ). +*> On entry, PERM(*, I) records permutations done on the I-th +*> level of the computation tree. +*> \endverbatim +*> +*> \param[in] GIVNUM +*> \verbatim +*> GIVNUM is DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ). +*> On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S- +*> values of Givens rotations performed on the I-th level on the +*> computation tree. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> C( I ) contains the C-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension ( N ). +*> On entry, if the I-th subproblem is not square, +*> S( I ) contains the S-value of a Givens rotation related to +*> the right null space of the I-th subproblem. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension at least +*> MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (3*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U, + $ LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR, + $ GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS, + $ SMLSIZ +* .. +* .. Array Arguments .. + INTEGER GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ), + $ K( * ), PERM( LDGCOL, * ) + DOUBLE PRECISION C( * ), DIFL( LDU, * ), DIFR( LDU, * ), + $ GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ), + $ S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * ) + COMPLEX*16 B( LDB, * ), BX( LDBX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL, + $ JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML, + $ NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN + INFO = -1 + ELSE IF( SMLSIZ.LT.3 ) THEN + INFO = -2 + ELSE IF( N.LT.SMLSIZ ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( LDB.LT.N ) THEN + INFO = -6 + ELSE IF( LDBX.LT.N ) THEN + INFO = -8 + ELSE IF( LDU.LT.N ) THEN + INFO = -10 + ELSE IF( LDGCOL.LT.N ) THEN + INFO = -19 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALSA', -INFO ) + RETURN + END IF +* +* Book-keeping and setting up the computation tree. +* + INODE = 1 + NDIML = INODE + N + NDIMR = NDIML + N +* + CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ), + $ IWORK( NDIMR ), SMLSIZ ) +* +* The following code applies back the left singular vector factors. +* For applying back the right singular vector factors, go to 170. +* + IF( ICOMPQ.EQ.1 ) THEN + GO TO 170 + END IF +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding left and right singular vector +* matrices are in explicit form. First apply back the left +* singular vector matrices. +* + NDB1 = ( ND+1 ) / 2 + DO 130 I = NDB1, ND +* +* IC : center row of each node +* NL : number of rows of left subproblem +* NR : number of rows of right subproblem +* NLF: starting row of the left subproblem +* NRF: starting row of the right subproblem +* + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NL*NRHS*2 + DO 20 JCOL = 1, NRHS + DO 10 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 10 CONTINUE + 20 CONTINUE + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL ) + J = NL*NRHS*2 + DO 40 JCOL = 1, NRHS + DO 30 JROW = NLF, NLF + NL - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 30 CONTINUE + 40 CONTINUE + CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU, + $ RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ), + $ NL ) + JREAL = 0 + JIMAG = NL*NRHS + DO 60 JCOL = 1, NRHS + DO 50 JROW = NLF, NLF + NL - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 50 CONTINUE + 60 CONTINUE +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NR*NRHS*2 + DO 80 JCOL = 1, NRHS + DO 70 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 70 CONTINUE + 80 CONTINUE + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR ) + J = NR*NRHS*2 + DO 100 JCOL = 1, NRHS + DO 90 JROW = NRF, NRF + NR - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 90 CONTINUE + 100 CONTINUE + CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU, + $ RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ), + $ NR ) + JREAL = 0 + JIMAG = NR*NRHS + DO 120 JCOL = 1, NRHS + DO 110 JROW = NRF, NRF + NR - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 110 CONTINUE + 120 CONTINUE +* + 130 CONTINUE +* +* Next copy the rows of B that correspond to unchanged rows +* in the bidiagonal matrix to BX. +* + DO 140 I = 1, ND + IC = IWORK( INODE+I-1 ) + CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX ) + 140 CONTINUE +* +* Finally go through the left singular vector matrices of all +* the other subproblems bottom-up on the tree. +* + J = 2**NLVL + SQRE = 0 +* + DO 160 LVL = NLVL, 1, -1 + LVL2 = 2*LVL - 1 +* +* find the first node LF and last node LL on +* the current level LVL +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 150 I = LF, LL + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + J = J - 1 + CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX, + $ B( NLF, 1 ), LDB, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 150 CONTINUE + 160 CONTINUE + GO TO 330 +* +* ICOMPQ = 1: applying back the right singular vector factors. +* + 170 CONTINUE +* +* First now go through the right singular vector matrices of all +* the tree nodes top-down. +* + J = 0 + DO 190 LVL = 1, NLVL + LVL2 = 2*LVL - 1 +* +* Find the first node LF and last node LL on +* the current level LVL. +* + IF( LVL.EQ.1 ) THEN + LF = 1 + LL = 1 + ELSE + LF = 2**( LVL-1 ) + LL = 2*LF - 1 + END IF + DO 180 I = LL, LF, -1 + IM1 = I - 1 + IC = IWORK( INODE+IM1 ) + NL = IWORK( NDIML+IM1 ) + NR = IWORK( NDIMR+IM1 ) + NLF = IC - NL + NRF = IC + 1 + IF( I.EQ.LL ) THEN + SQRE = 0 + ELSE + SQRE = 1 + END IF + J = J + 1 + CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB, + $ BX( NLF, 1 ), LDBX, PERM( NLF, LVL ), + $ GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL, + $ GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ), + $ DIFL( NLF, LVL ), DIFR( NLF, LVL2 ), + $ Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK, + $ INFO ) + 180 CONTINUE + 190 CONTINUE +* +* The nodes on the bottom level of the tree were solved +* by DLASDQ. The corresponding right singular vector +* matrices are in explicit form. Apply them back. +* + NDB1 = ( ND+1 ) / 2 + DO 320 I = NDB1, ND + I1 = I - 1 + IC = IWORK( INODE+I1 ) + NL = IWORK( NDIML+I1 ) + NR = IWORK( NDIMR+I1 ) + NLP1 = NL + 1 + IF( I.EQ.ND ) THEN + NRP1 = NR + ELSE + NRP1 = NR + 1 + END IF + NLF = IC - NL + NRF = IC + 1 +* +* Since B and BX are complex, the following call to DGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, +* $ B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX ) +* + J = NLP1*NRHS*2 + DO 210 JCOL = 1, NRHS + DO 200 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ), + $ NLP1 ) + J = NLP1*NRHS*2 + DO 230 JCOL = 1, NRHS + DO 220 JROW = NLF, NLF + NLP1 - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 220 CONTINUE + 230 CONTINUE + CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU, + $ RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, + $ RWORK( 1+NLP1*NRHS ), NLP1 ) + JREAL = 0 + JIMAG = NLP1*NRHS + DO 250 JCOL = 1, NRHS + DO 240 JROW = NLF, NLF + NLP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 240 CONTINUE + 250 CONTINUE +* +* Since B and BX are complex, the following call to DGEMM is +* performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, +* $ B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX ) +* + J = NRP1*NRHS*2 + DO 270 JCOL = 1, NRHS + DO 260 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 260 CONTINUE + 270 CONTINUE + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ), + $ NRP1 ) + J = NRP1*NRHS*2 + DO 290 JCOL = 1, NRHS + DO 280 JROW = NRF, NRF + NRP1 - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 280 CONTINUE + 290 CONTINUE + CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU, + $ RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, + $ RWORK( 1+NRP1*NRHS ), NRP1 ) + JREAL = 0 + JIMAG = NRP1*NRHS + DO 310 JCOL = 1, NRHS + DO 300 JROW = NRF, NRF + NRP1 - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE +* + 320 CONTINUE +* + 330 CONTINUE +* + RETURN +* +* End of ZLALSA +* + END diff --git a/dspl/liblapack/SRC/zlalsd.f b/dspl/liblapack/SRC/zlalsd.f new file mode 100644 index 0000000..409ebe3 --- /dev/null +++ b/dspl/liblapack/SRC/zlalsd.f @@ -0,0 +1,693 @@ +*> \brief \b ZLALSD uses the singular value decomposition of A to solve the least squares problem. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLALSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, +* RANK, WORK, RWORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) +* COMPLEX*16 B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLALSD uses the singular value decomposition of A to solve the least +*> squares problem of finding X to minimize the Euclidean norm of each +*> column of A*X-B, where A is N-by-N upper bidiagonal, and X and B +*> are N-by-NRHS. The solution X overwrites B. +*> +*> The singular values of A smaller than RCOND times the largest +*> singular value are treated as zero in solving the least squares +*> problem; in this case a minimum norm solution is returned. +*> The actual singular values are returned in D in ascending order. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': D and E define an upper bidiagonal matrix. +*> = 'L': D and E define a lower bidiagonal matrix. +*> \endverbatim +*> +*> \param[in] SMLSIZ +*> \verbatim +*> SMLSIZ is INTEGER +*> The maximum size of the subproblems at the bottom of the +*> computation tree. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the bidiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of columns of B. NRHS must be at least 1. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry D contains the main diagonal of the bidiagonal +*> matrix. On exit, if INFO = 0, D contains its singular values. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> Contains the super-diagonal entries of the bidiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On input, B contains the right hand sides of the least +*> squares problem. On output, B contains the solution X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of B in the calling subprogram. +*> LDB must be at least max(1,N). +*> \endverbatim +*> +*> \param[in] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The singular values of A less than or equal to RCOND times +*> the largest singular value are treated as zero in solving +*> the least squares problem. If RCOND is negative, +*> machine precision is used instead. +*> For example, if diag(S)*X=B were the least squares problem, +*> where diag(S) is a diagonal matrix of singular values, the +*> solution would be X(i) = B(i) / S(i) if S(i) is greater than +*> RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to +*> RCOND*max(S). +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The number of singular values of A greater than RCOND times +*> the largest singular value. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N * NRHS) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension at least +*> (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + +*> MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ), +*> where +*> NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 ) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension at least +*> (3*N*NLVL + 11*N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute a singular value while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through MOD(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Ming Gu and Ren-Cang Li, Computer Science Division, University of +*> California at Berkeley, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND, + $ RANK, WORK, RWORK, IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS, RANK, SMLSIZ + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + INTEGER BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM, + $ GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB, + $ IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG, + $ JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB, + $ PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1, + $ U, VT, Z + DOUBLE PRECISION CS, EPS, ORGNRM, RCND, R, SN, TOL +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL IDAMAX, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DGEMM, DLARTG, DLASCL, DLASDA, DLASDQ, DLASET, + $ DLASRT, XERBLA, ZCOPY, ZDROT, ZLACPY, ZLALSA, + $ ZLASCL, ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, LOG, SIGN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.1 ) THEN + INFO = -4 + ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLALSD', -INFO ) + RETURN + END IF +* + EPS = DLAMCH( 'Epsilon' ) +* +* Set up the tolerance. +* + IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN + RCND = EPS + ELSE + RCND = RCOND + END IF +* + RANK = 0 +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + IF( D( 1 ).EQ.ZERO ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB ) + ELSE + RANK = 1 + CALL ZLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO ) + D( 1 ) = ABS( D( 1 ) ) + END IF + RETURN + END IF +* +* Rotate the matrix if it is lower bidiagonal. +* + IF( UPLO.EQ.'L' ) THEN + DO 10 I = 1, N - 1 + CALL DLARTG( D( I ), E( I ), CS, SN, R ) + D( I ) = R + E( I ) = SN*D( I+1 ) + D( I+1 ) = CS*D( I+1 ) + IF( NRHS.EQ.1 ) THEN + CALL ZDROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN ) + ELSE + RWORK( I*2-1 ) = CS + RWORK( I*2 ) = SN + END IF + 10 CONTINUE + IF( NRHS.GT.1 ) THEN + DO 30 I = 1, NRHS + DO 20 J = 1, N - 1 + CS = RWORK( J*2-1 ) + SN = RWORK( J*2 ) + CALL ZDROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN ) + 20 CONTINUE + 30 CONTINUE + END IF + END IF +* +* Scale. +* + NM1 = N - 1 + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) THEN + CALL ZLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB ) + RETURN + END IF +* + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO ) +* +* If N is smaller than the minimum divide size SMLSIZ, then solve +* the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN + IRWU = 1 + IRWVT = IRWU + N*N + IRWWRK = IRWVT + N*N + IRWRB = IRWWRK + IRWIB = IRWRB + N*NRHS + IRWB = IRWIB + N*NRHS + CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N ) + CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N ) + CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N, + $ RWORK( IRWU ), N, RWORK( IRWWRK ), 1, + $ RWORK( IRWWRK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to DLASDQ and multiplied +* internally by Q**H. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 50 JCOL = 1, NRHS + DO 40 JROW = 1, N + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 40 CONTINUE + 50 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 70 JCOL = 1, NRHS + DO 60 JROW = 1, N + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 60 CONTINUE + 70 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 90 JCOL = 1, NRHS + DO 80 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 80 CONTINUE + 90 CONTINUE +* + TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) ) + DO 100 I = 1, N + IF( D( I ).LE.TOL ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB ) + ELSE + CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ), + $ LDB, INFO ) + RANK = RANK + 1 + END IF + 100 CONTINUE +* +* Since B is complex, the following call to DGEMM is performed +* in two steps (real and imaginary parts). That is for V * B +* (in the real version of the code V**H is stored in WORK). +* +* CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO, +* $ WORK( NWORK ), N ) +* + J = IRWB - 1 + DO 120 JCOL = 1, NRHS + DO 110 JROW = 1, N + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 110 CONTINUE + 120 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N ) + J = IRWB - 1 + DO 140 JCOL = 1, NRHS + DO 130 JROW = 1, N + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 130 CONTINUE + 140 CONTINUE + CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N, + $ RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 160 JCOL = 1, NRHS + DO 150 JROW = 1, N + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 150 CONTINUE + 160 CONTINUE +* +* Unscale. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN + END IF +* +* Book-keeping and setting up some constants. +* + NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1 +* + SMLSZP = SMLSIZ + 1 +* + U = 1 + VT = 1 + SMLSIZ*N + DIFL = VT + SMLSZP*N + DIFR = DIFL + NLVL*N + Z = DIFR + NLVL*N*2 + C = Z + NLVL*N + S = C + N + POLES = S + N + GIVNUM = POLES + 2*NLVL*N + NRWORK = GIVNUM + 2*NLVL*N + BX = 1 +* + IRWRB = NRWORK + IRWIB = IRWRB + SMLSIZ*NRHS + IRWB = IRWIB + SMLSIZ*NRHS +* + SIZEI = 1 + N + K = SIZEI + N + GIVPTR = K + N + PERM = GIVPTR + N + GIVCOL = PERM + NLVL*N + IWK = GIVCOL + NLVL*N*2 +* + ST = 1 + SQRE = 0 + ICMPQ1 = 1 + ICMPQ2 = 0 + NSUB = 0 +* + DO 170 I = 1, N + IF( ABS( D( I ) ).LT.EPS ) THEN + D( I ) = SIGN( EPS, D( I ) ) + END IF + 170 CONTINUE +* + DO 240 I = 1, NM1 + IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN + NSUB = NSUB + 1 + IWORK( NSUB ) = ST +* +* Subproblem found. First determine its size and then +* apply divide and conquer on it. +* + IF( I.LT.NM1 ) THEN +* +* A subproblem with E(I) small for I < NM1. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE IF( ABS( E( I ) ).GE.EPS ) THEN +* +* A subproblem with E(NM1) not too small but I = NM1. +* + NSIZE = N - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + ELSE +* +* A subproblem with E(NM1) small. This implies an +* 1-by-1 subproblem at D(N), which is not solved +* explicitly. +* + NSIZE = I - ST + 1 + IWORK( SIZEI+NSUB-1 ) = NSIZE + NSUB = NSUB + 1 + IWORK( NSUB ) = N + IWORK( SIZEI+NSUB-1 ) = 1 + CALL ZCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N ) + END IF + ST1 = ST - 1 + IF( NSIZE.EQ.1 ) THEN +* +* This is a 1-by-1 subproblem and is not solved +* explicitly. +* + CALL ZCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* This is a small subproblem and is solved by DLASDQ. +* + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( VT+ST1 ), N ) + CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE, + $ RWORK( U+ST1 ), N ) + CALL DLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ), + $ E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ), + $ N, RWORK( NRWORK ), 1, RWORK( NRWORK ), + $ INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF +* +* In the real version, B is passed to DLASDQ and multiplied +* internally by Q**H. Here B is complex and that product is +* computed below in two steps (real and imaginary parts). +* + J = IRWB - 1 + DO 190 JCOL = 1, NRHS + DO 180 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = DBLE( B( JROW, JCOL ) ) + 180 CONTINUE + 190 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWRB ), NSIZE ) + J = IRWB - 1 + DO 210 JCOL = 1, NRHS + DO 200 JROW = ST, ST + NSIZE - 1 + J = J + 1 + RWORK( J ) = DIMAG( B( JROW, JCOL ) ) + 200 CONTINUE + 210 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE, + $ ZERO, RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 230 JCOL = 1, NRHS + DO 220 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 220 CONTINUE + 230 CONTINUE +* + CALL ZLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB, + $ WORK( BX+ST1 ), N ) + ELSE +* +* A large problem. Solve it using divide and conquer. +* + CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ), + $ E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ), + $ IWORK( K+ST1 ), RWORK( DIFL+ST1 ), + $ RWORK( DIFR+ST1 ), RWORK( Z+ST1 ), + $ RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ), + $ IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ), + $ RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ), + $ RWORK( S+ST1 ), RWORK( NRWORK ), + $ IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + BXST = BX + ST1 + CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ), + $ LDB, WORK( BXST ), N, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + ST = I + 1 + END IF + 240 CONTINUE +* +* Apply the singular values and treat the tiny ones as zero. +* + TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) ) +* + DO 250 I = 1, N +* +* Some of the elements in D can be negative because 1-by-1 +* subproblems were not solved explicitly. +* + IF( ABS( D( I ) ).LE.TOL ) THEN + CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N ) + ELSE + RANK = RANK + 1 + CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, + $ WORK( BX+I-1 ), N, INFO ) + END IF + D( I ) = ABS( D( I ) ) + 250 CONTINUE +* +* Now apply back the right singular vectors. +* + ICMPQ2 = 1 + DO 320 I = 1, NSUB + ST = IWORK( I ) + ST1 = ST - 1 + NSIZE = IWORK( SIZEI+I-1 ) + BXST = BX + ST1 + IF( NSIZE.EQ.1 ) THEN + CALL ZCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB ) + ELSE IF( NSIZE.LE.SMLSIZ ) THEN +* +* Since B and BX are complex, the following call to DGEMM +* is performed in two steps (real and imaginary parts). +* +* CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, +* $ RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO, +* $ B( ST, 1 ), LDB ) +* + J = BXST - N - 1 + JREAL = IRWB - 1 + DO 270 JCOL = 1, NRHS + J = J + N + DO 260 JROW = 1, NSIZE + JREAL = JREAL + 1 + RWORK( JREAL ) = DBLE( WORK( J+JROW ) ) + 260 CONTINUE + 270 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWRB ), NSIZE ) + J = BXST - N - 1 + JIMAG = IRWB - 1 + DO 290 JCOL = 1, NRHS + J = J + N + DO 280 JROW = 1, NSIZE + JIMAG = JIMAG + 1 + RWORK( JIMAG ) = DIMAG( WORK( J+JROW ) ) + 280 CONTINUE + 290 CONTINUE + CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE, + $ RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO, + $ RWORK( IRWIB ), NSIZE ) + JREAL = IRWRB - 1 + JIMAG = IRWIB - 1 + DO 310 JCOL = 1, NRHS + DO 300 JROW = ST, ST + NSIZE - 1 + JREAL = JREAL + 1 + JIMAG = JIMAG + 1 + B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ), + $ RWORK( JIMAG ) ) + 300 CONTINUE + 310 CONTINUE + ELSE + CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N, + $ B( ST, 1 ), LDB, RWORK( U+ST1 ), N, + $ RWORK( VT+ST1 ), IWORK( K+ST1 ), + $ RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ), + $ RWORK( Z+ST1 ), RWORK( POLES+ST1 ), + $ IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N, + $ IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ), + $ RWORK( C+ST1 ), RWORK( S+ST1 ), + $ RWORK( NRWORK ), IWORK( IWK ), INFO ) + IF( INFO.NE.0 ) THEN + RETURN + END IF + END IF + 320 CONTINUE +* +* Unscale and sort the singular values. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO ) + CALL DLASRT( 'D', N, D, INFO ) + CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO ) +* + RETURN +* +* End of ZLALSD +* + END diff --git a/dspl/liblapack/SRC/zlamswlq.f b/dspl/liblapack/SRC/zlamswlq.f new file mode 100644 index 0000000..0e0b0a1 --- /dev/null +++ b/dspl/liblapack/SRC/zlamswlq.f @@ -0,0 +1,419 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAMQRTS overwrites the general real M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> where Q is a real orthogonal matrix defined as the product of blocked +*> elementary reflectors computed by short wide LQ +*> factorization (ZLASWLQ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> M >= K >= 0; +*> +*> \endverbatim +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > M. +*> +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the blocked +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZLASWLQ in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension +*> ( M * Number of blocks(CEIL(N-K/NB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,NB) * MB; +*> if SIDE = 'R', LWORK >= max(1,M) * MB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL ZTPMLQT, ZGEMLQT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * MB + ELSE + LW = M * MB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, MB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAMSWLQ', -INFO ) + WORK(1) = LW + RETURN + ELSE IF (LQUERY) THEN + WORK(1) = LW + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN + CALL ZGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(NB-K)) + CTR = (M-K)/(NB-K) +* + IF (KK.GT.0) THEN + II=M-KK+1 + CALL ZTPMLQT('L','C',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1,CTR*K+1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+NB) +* + CTR = CTR - 1 + CALL ZTPMLQT('L','C',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1,CTR*K+1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:NB) +* + CALL ZGEMLQT('L','C',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(NB-K)) + II=M-KK+1 + CTR = 1 + CALL ZGEMLQT('L','N',NB , N, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (I:I+NB,1:N) +* + CALL ZTPMLQT('L','N',NB-K , N, K, 0,MB, A(1,I), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL ZTPMLQT('L','N',KK , N, K, 0, MB, A(1,II), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(NB-K)) + CTR = (N-K)/(NB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL ZTPMLQT('R','N',M , KK, K, 0, MB, A(1, II), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(NB-K),NB+1,-(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL ZTPMLQT('R','N', M, NB-K, K, 0, MB, A(1, I), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL ZGEMLQT('R','N',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(NB-K)) + II=N-KK+1 + CALL ZGEMLQT('R','C',M , NB, K, MB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) + CTR = 1 +* + DO I=NB+1,II-NB+K,(NB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL ZTPMLQT('R','C',M , NB-K, K, 0,MB, A(1,I), LDA, + $ T(1,CTR *K+1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL ZTPMLQT('R','C',M , KK, K, 0,MB, A(1,II), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of ZLAMSWLQ +* + END diff --git a/dspl/liblapack/SRC/zlamtsqr.f b/dspl/liblapack/SRC/zlamtsqr.f new file mode 100644 index 0000000..1ee7324 --- /dev/null +++ b/dspl/liblapack/SRC/zlamtsqr.f @@ -0,0 +1,415 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, +* $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), WORK( * ), C(LDC, * ), +* $ T( LDT, * ) +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAMTSQR overwrites the general complex M-by-N matrix C with +*> +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> where Q is a real orthogonal matrix defined as the product +*> of blocked elementary reflectors computed by tall skinny +*> QR factorization (ZLATSQR) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >=0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> N >= K >= 0; +*> +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. +*> MB > N. (must be the same as DLATSQR) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> blockedelementary reflector H(i), for i = 1,2,...,k, as +*> returned by DLATSQR in the first k columns of +*> its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension +*> ( N * Number of blocks(CEIL(M-K/MB-K)), +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See below +*> for further details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If SIDE = 'L', LWORK >= max(1,N)*NB; +*> if SIDE = 'R', LWORK >= max(1,MB)*NB. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLAMTSQR( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T, + $ LDT, C, LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ), C(LDC, * ), + $ T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY + INTEGER I, II, KK, LW, CTR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. External Subroutines .. + EXTERNAL ZGEMQRT, ZTPMQRT, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + LQUERY = LWORK.LT.0 + NOTRAN = LSAME( TRANS, 'N' ) + TRAN = LSAME( TRANS, 'C' ) + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + IF (LEFT) THEN + LW = N * NB + ELSE + LW = M * NB + END IF +* + INFO = 0 + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -9 + ELSE IF( LDT.LT.MAX( 1, NB) ) THEN + INFO = -11 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -13 + ELSE IF(( LWORK.LT.MAX(1,LW)).AND.(.NOT.LQUERY)) THEN + INFO = -15 + END IF +* +* Determine the block size if it is tall skinny or short and wide +* + IF( INFO.EQ.0) THEN + WORK(1) = LW + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAMTSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N,K).EQ.0 ) THEN + RETURN + END IF +* + IF((MB.LE.K).OR.(MB.GE.MAX(M,N,K))) THEN + CALL ZGEMQRT( SIDE, TRANS, M, N, K, NB, A, LDA, + $ T, LDT, C, LDC, WORK, INFO) + RETURN + END IF +* + IF(LEFT.AND.NOTRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((M-K),(MB-K)) + CTR = (M-K)/(MB-K) + IF (KK.GT.0) THEN + II=M-KK+1 + CALL ZTPMQRT('L','N',KK , N, K, 0, NB, A(II,1), LDA, + $ T(1, CTR * K + 1),LDT , C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) + ELSE + II=M+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CTR = CTR - 1 + CALL ZTPMQRT('L','N',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR * K + 1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:MB,1:N) +* + CALL ZGEMQRT('L','N',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (LEFT.AND.TRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((M-K),(MB-K)) + II=M-KK+1 + CTR = 1 + CALL ZGEMQRT('L','C',MB , N, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (I:I+MB,1:N) +* + CALL ZTPMQRT('L','C',MB-K , N, K, 0,NB, A(I,1), LDA, + $ T(1,CTR * K + 1),LDT, C(1,1), LDC, + $ C(I,1), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.M) THEN +* +* Multiply Q to the last block of C +* + CALL ZTPMQRT('L','C',KK , N, K, 0,NB, A(II,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(II,1), LDC, WORK, INFO ) +* + END IF +* + ELSE IF(RIGHT.AND.TRAN) THEN +* +* Multiply Q to the last block of C +* + KK = MOD((N-K),(MB-K)) + CTR = (N-K)/(MB-K) + IF (KK.GT.0) THEN + II=N-KK+1 + CALL ZTPMQRT('R','C',M , KK, K, 0, NB, A(II,1), LDA, + $ T(1,CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) + ELSE + II=N+1 + END IF +* + DO I=II-(MB-K),MB+1,-(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CTR = CTR - 1 + CALL ZTPMQRT('R','C',M , MB-K, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1), LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + + END DO +* +* Multiply Q to the first block of C (1:M,1:MB) +* + CALL ZGEMQRT('R','C',M , MB, K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + ELSE IF (RIGHT.AND.NOTRAN) THEN +* +* Multiply Q to the first block of C +* + KK = MOD((N-K),(MB-K)) + II=N-KK+1 + CTR = 1 + CALL ZGEMQRT('R','N', M, MB , K, NB, A(1,1), LDA, T + $ ,LDT ,C(1,1), LDC, WORK, INFO ) +* + DO I=MB+1,II-MB+K,(MB-K) +* +* Multiply Q to the current block of C (1:M,I:I+MB) +* + CALL ZTPMQRT('R','N', M, MB-K, K, 0,NB, A(I,1), LDA, + $ T(1, CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,I), LDC, WORK, INFO ) + CTR = CTR + 1 +* + END DO + IF(II.LE.N) THEN +* +* Multiply Q to the last block of C +* + CALL ZTPMQRT('R','N', M, KK , K, 0,NB, A(II,1), LDA, + $ T(1,CTR * K + 1),LDT, C(1,1), LDC, + $ C(1,II), LDC, WORK, INFO ) +* + END IF +* + END IF +* + WORK(1) = LW + RETURN +* +* End of ZLAMTSQR +* + END diff --git a/dspl/liblapack/SRC/zlangb.f b/dspl/liblapack/SRC/zlangb.f new file mode 100644 index 0000000..949bb2c --- /dev/null +++ b/dspl/liblapack/SRC/zlangb.f @@ -0,0 +1,226 @@ +*> \brief \b ZLANGB returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of general band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANGB( NORM, N, KL, KU, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANGB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n band matrix A, with kl sub-diagonals and ku super-diagonals. +*> \endverbatim +*> +*> \return ZLANGB +*> \verbatim +*> +*> ZLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANGB as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANGB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of sub-diagonals of the matrix A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of super-diagonals of the matrix A. KU >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The band matrix A, stored in rows 1 to KL+KU+1. The j-th +*> column of A is stored in the j-th column of the array AB as +*> follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KL+KU+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANGB( NORM, N, KL, KU, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER KL, KU, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K, L + DOUBLE PRECISION SCALE, SUM, VALUE, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + TEMP = ABS( AB( I, J ) ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + K = KU + 1 - J + DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL ) + WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + L = MAX( 1, J-KU ) + K = KU + 1 - J + L + CALL ZLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANGB = VALUE + RETURN +* +* End of ZLANGB +* + END diff --git a/dspl/liblapack/SRC/zlange.f b/dspl/liblapack/SRC/zlange.f new file mode 100644 index 0000000..5407dec --- /dev/null +++ b/dspl/liblapack/SRC/zlange.f @@ -0,0 +1,213 @@ +*> \brief \b ZLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANGE( NORM, M, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANGE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex matrix A. +*> \endverbatim +*> +*> \return ZLANGE +*> \verbatim +*> +*> ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANGE as described +*> above. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. When M = 0, +*> ZLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. When N = 0, +*> ZLANGE is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANGE( NORM, M, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, M + TEMP = ABS( A( I, J ) ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, M + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE.LT.SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, M + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, M + TEMP = WORK( I ) + IF( VALUE.LT.TEMP .OR. DISNAN( TEMP ) ) VALUE = TEMP + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL ZLASSQ( M, A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANGE = VALUE + RETURN +* +* End of ZLANGE +* + END diff --git a/dspl/liblapack/SRC/zlangt.f b/dspl/liblapack/SRC/zlangt.f new file mode 100644 index 0000000..f55904a --- /dev/null +++ b/dspl/liblapack/SRC/zlangt.f @@ -0,0 +1,208 @@ +*> \brief \b ZLANGT returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of a general tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANGT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANGT( NORM, N, DL, D, DU ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* COMPLEX*16 D( * ), DL( * ), DU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANGT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex tridiagonal matrix A. +*> \endverbatim +*> +*> \return ZLANGT +*> \verbatim +*> +*> ZLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANGT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANGT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] DL +*> \verbatim +*> DL is COMPLEX*16 array, dimension (N-1) +*> The (n-1) sub-diagonal elements of A. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] DU +*> \verbatim +*> DU is COMPLEX*16 array, dimension (N-1) +*> The (n-1) super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANGT( NORM, N, DL, D, DU ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + COMPLEX*16 D( * ), DL( * ), DU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + IF( ANORM.LT.ABS( DL( I ) ) .OR. DISNAN( ABS( DL( I ) ) ) ) + $ ANORM = ABS(DL(I)) + IF( ANORM.LT.ABS( D( I ) ) .OR. DISNAN( ABS( D( I ) ) ) ) + $ ANORM = ABS(D(I)) + IF( ANORM.LT.ABS( DU( I ) ) .OR. DISNAN (ABS( DU( I ) ) ) ) + $ ANORM = ABS(DU(I)) + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DL( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DU( N-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + DO 20 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DL( I ) )+ABS( DU( I-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + 20 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( DU( 1 ) ) + TEMP = ABS( D( N ) )+ABS( DL( N-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + DO 30 I = 2, N - 1 + TEMP = ABS( D( I ) )+ABS( DU( I ) )+ABS( DL( I-1 ) ) + IF( ANORM .LT. TEMP .OR. DISNAN( TEMP ) ) ANORM = TEMP + 30 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + CALL ZLASSQ( N, D, 1, SCALE, SUM ) + IF( N.GT.1 ) THEN + CALL ZLASSQ( N-1, DL, 1, SCALE, SUM ) + CALL ZLASSQ( N-1, DU, 1, SCALE, SUM ) + END IF + ANORM = SCALE*SQRT( SUM ) + END IF +* + ZLANGT = ANORM + RETURN +* +* End of ZLANGT +* + END diff --git a/dspl/liblapack/SRC/zlanhb.f b/dspl/liblapack/SRC/zlanhb.f new file mode 100644 index 0000000..b371780 --- /dev/null +++ b/dspl/liblapack/SRC/zlanhb.f @@ -0,0 +1,276 @@ +*> \brief \b ZLANHB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n hermitian band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return ZLANHB +*> \verbatim +*> +*> ZLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangle of the hermitian band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that the imaginary parts of the diagonal elements need +*> not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + SUM = ABS( DBLE( AB( K+1, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + DO 40 J = 1, N + SUM = ABS( DBLE( AB( 1, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( AB( K+1, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( AB( 1, J ) ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + DO 130 J = 1, N + IF( DBLE( AB( L, J ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AB( L, J ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHB = VALUE + RETURN +* +* End of ZLANHB +* + END diff --git a/dspl/liblapack/SRC/zlanhe.f b/dspl/liblapack/SRC/zlanhe.f new file mode 100644 index 0000000..7c7f7f3 --- /dev/null +++ b/dspl/liblapack/SRC/zlanhe.f @@ -0,0 +1,258 @@ +*> \brief \b ZLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex hermitian matrix A. +*> \endverbatim +*> +*> \return ZLANHE +*> \verbatim +*> +*> ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHE as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> hermitian matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHE is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The hermitian matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. Note that the imaginary parts of the diagonal +*> elements need not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + SUM = ABS( DBLE( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + DO 40 J = 1, N + SUM = ABS( DBLE( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + DO 30 I = J + 1, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( DBLE( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHE = VALUE + RETURN +* +* End of ZLANHE +* + END diff --git a/dspl/liblapack/SRC/zlanhf.f b/dspl/liblapack/SRC/zlanhf.f new file mode 100644 index 0000000..0e73c86 --- /dev/null +++ b/dspl/liblapack/SRC/zlanhf.f @@ -0,0 +1,1573 @@ +*> \brief \b ZLANHF returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a Hermitian matrix in RFP format. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, TRANSR, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( 0: * ) +* COMPLEX*16 A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHF returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex Hermitian matrix A in RFP format. +*> \endverbatim +*> +*> \return ZLANHF +*> \verbatim +*> +*> ZLANHF = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER +*> Specifies the value to be returned in ZLANHF as described +*> above. +*> \endverbatim +*> +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER +*> Specifies whether the RFP format of A is normal or +*> conjugate-transposed format. +*> = 'N': RFP format is Normal +*> = 'C': RFP format is Conjugate-transposed +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> +*> UPLO = 'U' or 'u' RFP A came from an upper triangular +*> matrix +*> +*> UPLO = 'L' or 'l' RFP A came from a lower triangular +*> matrix +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHF is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( N*(N+1)/2 ); +*> On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A +*> as defined when TRANSR = 'N'. The contents of RFP A are +*> defined by UPLO as follows: If UPLO = 'U' the RFP A +*> contains the ( N*(N+1)/2 ) elements of upper packed A +*> either in normal or conjugate-transpose Format. If +*> UPLO = 'L' the RFP A contains the ( N*(N+1) /2 ) elements +*> of lower packed A either in normal or conjugate-transpose +*> Format. The LDA of RFP A is (N+1)/2 when TRANSR = 'C'. When +*> TRANSR is 'N' the LDA is N+1 when N is even and is N when +*> is odd. See the Note below for more details. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHF( NORM, TRANSR, UPLO, N, A, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, TRANSR, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( 0: * ) + COMPLEX*16 A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, IFM, ILU, NOE, N1, K, L, LDA + DOUBLE PRECISION SCALE, S, VALUE, AA, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + ZLANHF = ZERO + RETURN + ELSE IF( N.EQ.1 ) THEN + ZLANHF = ABS(DBLE(A(0))) + RETURN + END IF +* +* set noe = 1 if n is odd. if n is even set noe=0 +* + NOE = 1 + IF( MOD( N, 2 ).EQ.0 ) + $ NOE = 0 +* +* set ifm = 0 when form='C' or 'c' and 1 otherwise +* + IFM = 1 + IF( LSAME( TRANSR, 'C' ) ) + $ IFM = 0 +* +* set ilu = 0 when uplo='U or 'u' and 1 otherwise +* + ILU = 1 + IF( LSAME( UPLO, 'U' ) ) + $ ILU = 0 +* +* set lda = (n+1)/2 when ifm = 0 +* set lda = n when ifm = 1 and noe = 1 +* set lda = n+1 when ifm = 1 and noe = 0 +* + IF( IFM.EQ.1 ) THEN + IF( NOE.EQ.1 ) THEN + LDA = N + ELSE +* noe=0 + LDA = N + 1 + END IF + ELSE +* ifm=0 + LDA = ( N+1 ) / 2 + END IF +* + IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = ( N+1 ) / 2 + VALUE = ZERO + IF( NOE.EQ.1 ) THEN +* n is odd & n = k + k - 1 + IF( IFM.EQ.1 ) THEN +* A is n by k + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(0,0) + TEMP = ABS( DBLE( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - 1 +* L(k+j,k+j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J +* -> L(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K + J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K + J - 1 +* -> U(i,i) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = I + 1 +* =k+j; i -> U(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = K + J + 1, N - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + DO I = 0, N - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP +* j=k-1 + END DO +* i=n-1 -> U(n-1,n-1) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + ELSE +* xpose case; A is k by n + IF( ILU.EQ.1 ) THEN +* uplo ='L' + DO J = 0, K - 2 + DO I = 0, J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J +* L(i,i) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J + 1 +* L(j+k,j+k) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 2, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K - 1 + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* -> L(i,i) is at A(i,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO J = K, N - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K - 1 +* -> U(j,j) is at A(0,j) + TEMP = ABS( DBLE( A( 0+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = K, N - 1 + DO I = 0, J - K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - K +* -> U(i,i) at A(i,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J - K + 1 +* U(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J - K + 2, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + END IF + END IF + ELSE +* n is even & k = n/2 + IF( IFM.EQ.1 ) THEN +* A is n+1 by k + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(k,k) & j=1 -> L(0,0) + TEMP = ABS( DBLE( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + TEMP = ABS( DBLE( A( J+1+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J +* L(k+j,k+j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J + 1 +* -> L(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 2 + DO I = 0, K + J - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K + J +* -> U(i,i) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = I + 1 +* =k+j+1; i -> U(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = K + J + 2, N + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + DO I = 0, N - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP +* j=k-1 + END DO +* i=n-1 -> U(n-1,n-1) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = N +* -> U(k-1,k-1) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + ELSE +* xpose case; A is k by n+1 + IF( ILU.EQ.1 ) THEN +* uplo ='L' + J = 0 +* -> L(k,k) at A(0,0) + TEMP = ABS( DBLE( A( J+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = 1, K - 1 + DO I = 0, J - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - 1 +* L(i,i) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J +* L(j+k,j+k) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J + 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* -> L(i,i) is at A(i,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO J = K + 1, N + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + ELSE +* uplo = 'U' + DO J = 0, K - 1 + DO I = 0, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = K +* -> U(j,j) is at A(0,j) + TEMP = ABS( DBLE( A( 0+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + DO J = K + 1, N - 1 + DO I = 0, J - K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = J - K - 1 +* -> U(i,i) at A(i,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + I = J - K +* U(j,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + DO I = J - K + 1, K - 1 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END DO + J = N + DO I = 0, K - 2 + TEMP = ABS( A( I+J*LDA ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + I = K - 1 +* U(k,k) at A(i,j) + TEMP = ABS( DBLE( A( I+J*LDA ) ) ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is Hermitian). +* + IF( IFM.EQ.1 ) THEN +* A is 'N' + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd & A is n by (n+1)/2 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + IF( I.EQ.K+K ) + $ GO TO 10 + I = I + 1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + 10 CONTINUE + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 & uplo = 'L' + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + IF( J.GT.0 ) THEN + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + END IF + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even & A is n+1 by k = n/2 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = 0, K - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K + J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(i,j+k) + S = S + AA + WORK( I ) = WORK( I ) + AA + END DO + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + WORK( J+K ) = S + AA + I = I + 1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = WORK( J ) + AA + S = ZERO + DO L = J + 1, K - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu = 1 & uplo = 'L' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = K - 1, 0, -1 + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(j+k,i+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + AA + END DO + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j+k,j+k) + S = S + AA + WORK( I+K ) = WORK( I+K ) + S +* i=j + I = I + 1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* -> A(j,j) + WORK( J ) = AA + S = ZERO + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* -> A(l,j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + ELSE +* ifm=0 + K = N / 2 + IF( NOE.EQ.1 ) THEN +* n is odd & A is (n+1)/2 by n + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + N1 = K +* n/2 + K = K + 1 +* k is the row size and lda + DO I = N1, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, N1 - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,n1+i) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=n1=k-1 is special + S = ABS( DBLE( A( 0+J*LDA ) ) ) +* A(k-1,k-1) + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k-1,i+n1) + WORK( I+N1 ) = WORK( I+N1 ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K, N - 1 + S = ZERO + DO I = 0, J - K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-k + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(j-k,j-k) + S = S + AA + WORK( J-K ) = WORK( J-K ) + S + I = I + 1 + S = ABS( DBLE( A( I+J*LDA ) ) ) +* A(j,j) + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 & uplo = 'L' + K = K + 1 +* k=(n+1)/2 for n odd and ilu=1 + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 2 +* process + S = ZERO + DO I = 0, J - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* i=j so process of A(j,j) + S = S + AA + WORK( J ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( DBLE( A( I+J*LDA ) ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k-1 is special :process col A(k-1,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K, N - 1 +* process col j of A = A(j,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + ELSE +* n is even & A is k=n/2 by n+1 + IF( ILU.EQ.0 ) THEN +* uplo = 'U' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO + DO J = 0, K - 1 + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,i+k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = S + END DO +* j=k + AA = ABS( DBLE( A( 0+J*LDA ) ) ) +* A(k,k) + S = AA + DO I = 1, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(k,k+i) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + DO J = K + 1, N - 1 + S = ZERO + DO I = 0, J - 2 - K + AA = ABS( A( I+J*LDA ) ) +* A(i,j-k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=j-1-k + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(j-k-1,j-k-1) + S = S + AA + WORK( J-K-1 ) = WORK( J-K-1 ) + S + I = I + 1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(j,j) + S = AA + DO L = J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(j,l) + WORK( L ) = WORK( L ) + AA + S = S + AA + END DO + WORK( J ) = WORK( J ) + S + END DO +* j=n + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(i,k-1) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* i=k-1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = WORK( I ) + S + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + ELSE +* ilu=1 & uplo = 'L' + DO I = K, N - 1 + WORK( I ) = ZERO + END DO +* j=0 is special :process col A(k:n-1,k) + S = ABS( DBLE( A( 0 ) ) ) +* A(k,k) + DO I = 1, K - 1 + AA = ABS( A( I ) ) +* A(k+i,k) + WORK( I+K ) = WORK( I+K ) + AA + S = S + AA + END DO + WORK( K ) = WORK( K ) + S + DO J = 1, K - 1 +* process + S = ZERO + DO I = 0, J - 2 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* i=j-1 so process of A(j-1,j-1) + S = S + AA + WORK( J-1 ) = S +* is initialised here + I = I + 1 +* i=j process A(j+k,j+k) + AA = ABS( DBLE( A( I+J*LDA ) ) ) + S = AA + DO L = K + J + 1, N - 1 + I = I + 1 + AA = ABS( A( I+J*LDA ) ) +* A(l,k+j) + S = S + AA + WORK( L ) = WORK( L ) + AA + END DO + WORK( K+J ) = WORK( K+J ) + S + END DO +* j=k is special :process col A(k,0:k-1) + S = ZERO + DO I = 0, K - 2 + AA = ABS( A( I+J*LDA ) ) +* A(k,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO +* +* i=k-1 + AA = ABS( DBLE( A( I+J*LDA ) ) ) +* A(k-1,k-1) + S = S + AA + WORK( I ) = S +* done with col j=k+1 + DO J = K + 1, N +* +* process col j-1 of A = A(j-1,0:k-1) + S = ZERO + DO I = 0, K - 1 + AA = ABS( A( I+J*LDA ) ) +* A(j-1,i) + WORK( I ) = WORK( I ) + AA + S = S + AA + END DO + WORK( J-1 ) = WORK( J-1 ) + S + END DO + VALUE = WORK( 0 ) + DO I = 1, N-1 + TEMP = WORK( I ) + IF( VALUE .LT. TEMP .OR. DISNAN( TEMP ) ) + $ VALUE = TEMP + END DO + END IF + END IF + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + K = ( N+1 ) / 2 + SCALE = ZERO + S = ONE + IF( NOE.EQ.1 ) THEN +* n is odd + IF( IFM.EQ.1 ) THEN +* A is normal & A is n by k + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 3 + CALL ZLASSQ( K-J-2, A( K+J+1+J*LDA ), 1, SCALE, S ) +* L at A(k,0) + END DO + DO J = 0, K - 1 + CALL ZLASSQ( K+J-1, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = K - 1 +* -> U(k,k) at A(k-1,0) + DO I = 0, K - 2 + AA = DBLE( A( L ) ) +* U(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* U(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + AA = DBLE( A( L ) ) +* U(n-1,n-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL ZLASSQ( N-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* trap L at A(0,0) + END DO + DO J = 1, K - 2 + CALL ZLASSQ( J, A( 0+( 1+J )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + S = S + S +* double s for the off diagonal elements + AA = DBLE( A( 0 ) ) +* L(0,0) at A(0,0) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = LDA +* -> L(k,k) at A(0,1) + DO I = 1, K - 1 + AA = DBLE( A( L ) ) +* L(k-1+i,k-1+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + END IF + ELSE +* A is xpose & A is k by n + IF( ILU.EQ.0 ) THEN +* A**H is upper + DO J = 1, K - 2 + CALL ZLASSQ( J, A( 0+( K+J )*LDA ), 1, SCALE, S ) +* U at A(0,k) + END DO + DO J = 0, K - 2 + CALL ZLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL ZLASSQ( K-J-1, A( J+1+( J+K-1 )*LDA ), 1, + $ SCALE, S ) +* L at A(0,k-1) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 + K*LDA - LDA +* -> U(k-1,k-1) at A(0,k-1) + AA = DBLE( A( L ) ) +* U(k-1,k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA +* -> U(0,0) at A(0,k) + DO J = K, N - 1 + AA = DBLE( A( L ) ) +* -> U(j-k,j-k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* -> U(j,j) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + ELSE +* A**H is lower + DO J = 1, K - 1 + CALL ZLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + DO J = K, N - 1 + CALL ZLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k-1 rect. at A(0,k) + END DO + DO J = 0, K - 3 + CALL ZLASSQ( K-J-2, A( J+2+J*LDA ), 1, SCALE, S ) +* L at A(1,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(0,0) at A(0,0) + DO I = 0, K - 2 + AA = DBLE( A( L ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* L(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L-> k-1 + (k-1)*lda or L(k-1,k-1) at A(k-1,k-1) + AA = DBLE( A( L ) ) +* L(k-1,k-1) at A(k-1,k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + END IF + END IF + ELSE +* n is even + IF( IFM.EQ.1 ) THEN +* A is normal + IF( ILU.EQ.0 ) THEN +* A is upper + DO J = 0, K - 2 + CALL ZLASSQ( K-J-1, A( K+J+2+J*LDA ), 1, SCALE, S ) +* L at A(k+1,0) + END DO + DO J = 0, K - 1 + CALL ZLASSQ( K+J, A( 0+J*LDA ), 1, SCALE, S ) +* trap U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = K +* -> U(k,k) at A(k,0) + DO I = 0, K - 1 + AA = DBLE( A( L ) ) +* U(k+i,k+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* U(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + ELSE +* ilu=1 & A is lower + DO J = 0, K - 1 + CALL ZLASSQ( N-J-1, A( J+2+J*LDA ), 1, SCALE, S ) +* trap L at A(1,0) + END DO + DO J = 1, K - 1 + CALL ZLASSQ( J, A( 0+J*LDA ), 1, SCALE, S ) +* U at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(k,k) at A(0,0) + DO I = 0, K - 1 + AA = DBLE( A( L ) ) +* L(k-1+i,k-1+i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO + END IF + ELSE +* A is xpose + IF( ILU.EQ.0 ) THEN +* A**H is upper + DO J = 1, K - 1 + CALL ZLASSQ( J, A( 0+( K+1+J )*LDA ), 1, SCALE, S ) +* U at A(0,k+1) + END DO + DO J = 0, K - 1 + CALL ZLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,0) + END DO + DO J = 0, K - 2 + CALL ZLASSQ( K-J-1, A( J+1+( J+K )*LDA ), 1, SCALE, + $ S ) +* L at A(0,k) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 + K*LDA +* -> U(k,k) at A(0,k) + AA = DBLE( A( L ) ) +* U(k,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA +* -> U(0,0) at A(0,k+1) + DO J = K + 1, N - 1 + AA = DBLE( A( L ) ) +* -> U(j-k-1,j-k-1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* -> U(j,j) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L=k-1+n*lda +* -> U(k-1,k-1) at A(k-1,n) + AA = DBLE( A( L ) ) +* U(k,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + ELSE +* A**H is lower + DO J = 1, K - 1 + CALL ZLASSQ( J, A( 0+( J+1 )*LDA ), 1, SCALE, S ) +* U at A(0,1) + END DO + DO J = K + 1, N + CALL ZLASSQ( K, A( 0+J*LDA ), 1, SCALE, S ) +* k by k rect. at A(0,k+1) + END DO + DO J = 0, K - 2 + CALL ZLASSQ( K-J-1, A( J+1+J*LDA ), 1, SCALE, S ) +* L at A(0,0) + END DO + S = S + S +* double s for the off diagonal elements + L = 0 +* -> L(k,k) at A(0,0) + AA = DBLE( A( L ) ) +* L(k,k) at A(0,0) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = LDA +* -> L(0,0) at A(0,1) + DO I = 0, K - 2 + AA = DBLE( A( L ) ) +* L(i,i) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + AA = DBLE( A( L+1 ) ) +* L(k+i+1,k+i+1) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + L = L + LDA + 1 + END DO +* L-> k - 1 + k*lda or L(k-1,k-1) at A(k-1,k) + AA = DBLE( A( L ) ) +* L(k-1,k-1) at A(k-1,k) + IF( AA.NE.ZERO ) THEN + IF( SCALE.LT.AA ) THEN + S = ONE + S*( SCALE / AA )**2 + SCALE = AA + ELSE + S = S + ( AA / SCALE )**2 + END IF + END IF + END IF + END IF + END IF + VALUE = SCALE*SQRT( S ) + END IF +* + ZLANHF = VALUE + RETURN +* +* End of ZLANHF +* + END diff --git a/dspl/liblapack/SRC/zlanhp.f b/dspl/liblapack/SRC/zlanhp.f new file mode 100644 index 0000000..9ded607 --- /dev/null +++ b/dspl/liblapack/SRC/zlanhp.f @@ -0,0 +1,269 @@ +*> \brief \b ZLANHP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex hermitian matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return ZLANHP +*> \verbatim +*> +*> ZLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> hermitian matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that the imaginary parts of the diagonal elements need +*> not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 0 + DO 20 J = 1, N + DO 10 I = K + 1, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + SUM = ABS( DBLE( AP( K ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + SUM = ABS( DBLE( AP( K ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( AP( K ) ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( AP( K ) ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( DBLE( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHP = VALUE + RETURN +* +* End of ZLANHP +* + END diff --git a/dspl/liblapack/SRC/zlanhs.f b/dspl/liblapack/SRC/zlanhs.f new file mode 100644 index 0000000..f2d36b3 --- /dev/null +++ b/dspl/liblapack/SRC/zlanhs.f @@ -0,0 +1,207 @@ +*> \brief \b ZLANHS returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value of any element of an upper Hessenberg matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHS( NORM, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHS returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> Hessenberg matrix A. +*> \endverbatim +*> +*> \return ZLANHS +*> \verbatim +*> +*> ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHS as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHS is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The n by n upper Hessenberg matrix A; the part of A below the +*> first sub-diagonal is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHS( NORM, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + DO 20 J = 1, N + DO 10 I = 1, MIN( N, J+1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + DO 40 J = 1, N + SUM = ZERO + DO 30 I = 1, MIN( N, J+1 ) + SUM = SUM + ABS( A( I, J ) ) + 30 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 40 CONTINUE + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + DO 50 I = 1, N + WORK( I ) = ZERO + 50 CONTINUE + DO 70 J = 1, N + DO 60 I = 1, MIN( N, J+1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 60 CONTINUE + 70 CONTINUE + VALUE = ZERO + DO 80 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 80 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + DO 90 J = 1, N + CALL ZLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM ) + 90 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHS = VALUE + RETURN +* +* End of ZLANHS +* + END diff --git a/dspl/liblapack/SRC/zlanht.f b/dspl/liblapack/SRC/zlanht.f new file mode 100644 index 0000000..70f9e3c --- /dev/null +++ b/dspl/liblapack/SRC/zlanht.f @@ -0,0 +1,188 @@ +*> \brief \b ZLANHT returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian tridiagonal matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHT( NORM, N, D, E ) +* +* .. Scalar Arguments .. +* CHARACTER NORM +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHT returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex Hermitian tridiagonal matrix A. +*> \endverbatim +*> +*> \return ZLANHT +*> \verbatim +*> +*> ZLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHT as described +*> above. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHT is +*> set to zero. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> The (n-1) sub-diagonal or super-diagonal elements of A. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHT( NORM, N, D, E ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION ANORM, SCALE, SUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL DLASSQ, ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + ANORM = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + ANORM = ABS( D( N ) ) + DO 10 I = 1, N - 1 + SUM = ABS( D( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + SUM = ABS( E( I ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 10 CONTINUE + ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR. + $ LSAME( NORM, 'I' ) ) THEN +* +* Find norm1(A). +* + IF( N.EQ.1 ) THEN + ANORM = ABS( D( 1 ) ) + ELSE + ANORM = ABS( D( 1 ) )+ABS( E( 1 ) ) + SUM = ABS( E( N-1 ) )+ABS( D( N ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + DO 20 I = 2, N - 1 + SUM = ABS( D( I ) )+ABS( E( I ) )+ABS( E( I-1 ) ) + IF( ANORM .LT. SUM .OR. DISNAN( SUM ) ) ANORM = SUM + 20 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( N.GT.1 ) THEN + CALL ZLASSQ( N-1, E, 1, SCALE, SUM ) + SUM = 2*SUM + END IF + CALL DLASSQ( N, D, 1, SCALE, SUM ) + ANORM = SCALE*SQRT( SUM ) + END IF +* + ZLANHT = ANORM + RETURN +* +* End of ZLANHT +* + END diff --git a/dspl/liblapack/SRC/zlansb.f b/dspl/liblapack/SRC/zlansb.f new file mode 100644 index 0000000..3468c49 --- /dev/null +++ b/dspl/liblapack/SRC/zlansb.f @@ -0,0 +1,260 @@ +*> \brief \b ZLANSB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANSB( NORM, UPLO, N, K, AB, LDAB, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANSB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n symmetric band matrix A, with k super-diagonals. +*> \endverbatim +*> +*> \return ZLANSB +*> \verbatim +*> +*> ZLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANSB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> band matrix A is supplied. +*> = 'U': Upper triangular part is supplied +*> = 'L': Lower triangular part is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANSB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals or sub-diagonals of the +*> band matrix A. K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangle of the symmetric band matrix A, +*> stored in the first K+1 rows of AB. The j-th column of A is +*> stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANSB( NORM, UPLO, N, K, AB, LDAB, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + L = K + 1 - J + DO 50 I = MAX( 1, J-K ), J - 1 + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( AB( K+1, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AB( 1, J ) ) + L = 1 - J + DO 90 I = J + 1, MIN( N, J+K ) + ABSA = ABS( AB( L+I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( K.GT.0 ) THEN + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 110 CONTINUE + L = K + 1 + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 120 CONTINUE + L = 1 + END IF + SUM = 2*SUM + ELSE + L = 1 + END IF + CALL ZLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSB = VALUE + RETURN +* +* End of ZLANSB +* + END diff --git a/dspl/liblapack/SRC/zlansp.f b/dspl/liblapack/SRC/zlansp.f new file mode 100644 index 0000000..84fb972 --- /dev/null +++ b/dspl/liblapack/SRC/zlansp.f @@ -0,0 +1,272 @@ +*> \brief \b ZLANSP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a symmetric matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANSP( NORM, UPLO, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANSP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex symmetric matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return ZLANSP +*> \verbatim +*> +*> ZLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANSP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is supplied. +*> = 'U': Upper triangular part of A is supplied +*> = 'L': Lower triangular part of A is supplied +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANSP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANSP( NORM, UPLO, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, K + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + K = 1 + DO 20 J = 1, N + DO 10 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + K = 1 + DO 40 J = 1, N + DO 30 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 50 CONTINUE + WORK( J ) = SUM + ABS( AP( K ) ) + K = K + 1 + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( AP( K ) ) + K = K + 1 + DO 90 I = J + 1, N + ABSA = ABS( AP( K ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + K = K + 1 + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + K = 2 + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 120 CONTINUE + END IF + SUM = 2*SUM + K = 1 + DO 130 I = 1, N + IF( DBLE( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( DIMAG( AP( K ) ).NE.ZERO ) THEN + ABSA = ABS( DIMAG( AP( K ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + IF( LSAME( UPLO, 'U' ) ) THEN + K = K + I + 1 + ELSE + K = K + N - I + 1 + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSP = VALUE + RETURN +* +* End of ZLANSP +* + END diff --git a/dspl/liblapack/SRC/zlansy.f b/dspl/liblapack/SRC/zlansy.f new file mode 100644 index 0000000..58269a9 --- /dev/null +++ b/dspl/liblapack/SRC/zlansy.f @@ -0,0 +1,243 @@ +*> \brief \b ZLANSY returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANSY returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex symmetric matrix A. +*> \endverbatim +*> +*> \return ZLANSY +*> \verbatim +*> +*> ZLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANSY as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANSY is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANSY( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is symmetric). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( A( J, J ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( A( J, J ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + CALL ZLASSQ( N, A, LDA+1, SCALE, SUM ) + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANSY = VALUE + RETURN +* +* End of ZLANSY +* + END diff --git a/dspl/liblapack/SRC/zlantb.f b/dspl/liblapack/SRC/zlantb.f new file mode 100644 index 0000000..3077ba1 --- /dev/null +++ b/dspl/liblapack/SRC/zlantb.f @@ -0,0 +1,363 @@ +*> \brief \b ZLANTB returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular band matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANTB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANTB( NORM, UPLO, DIAG, N, K, AB, +* LDAB, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER K, LDAB, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANTB returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of an +*> n by n triangular band matrix A, with ( k + 1 ) diagonals. +*> \endverbatim +*> +*> \return ZLANTB +*> \verbatim +*> +*> ZLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANTB as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANTB is +*> set to zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals of the matrix A if UPLO = 'L'. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first k+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+k). +*> Note that when DIAG = 'U', the elements of the array AB +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= K+1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANTB( NORM, UPLO, DIAG, N, K, AB, + $ LDAB, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER K, LDAB, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, L + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = MAX( K+2-J, 1 ), K + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = 2, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = MAX( K+2-J, 1 ), K + 1 + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = 1, MIN( N+1-J, K+1 ) + SUM = ABS( AB( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = MAX( K+2-J, 1 ), K + SUM = SUM + ABS( AB( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = MAX( K+2-J, 1 ), K + 1 + SUM = SUM + ABS( AB( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = 2, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = 1, MIN( N+1-J, K+1 ) + SUM = SUM + ABS( AB( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + L = K + 1 - J + DO 160 I = MAX( 1, J-K ), J - 1 + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + L = K + 1 - J + DO 190 I = MAX( 1, J-K ), J + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + L = 1 - J + DO 220 I = J + 1, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + L = 1 - J + DO 250 I = J, MIN( N, J+K ) + WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) ) + 250 CONTINUE + 260 CONTINUE + END IF + END IF + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 280 J = 2, N + CALL ZLASSQ( MIN( J-1, K ), + $ AB( MAX( K+2-J, 1 ), J ), 1, SCALE, + $ SUM ) + 280 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 290 J = 1, N + CALL ZLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ), + $ 1, SCALE, SUM ) + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + IF( K.GT.0 ) THEN + DO 300 J = 1, N - 1 + CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE, + $ SUM ) + 300 CONTINUE + END IF + ELSE + SCALE = ZERO + SUM = ONE + DO 310 J = 1, N + CALL ZLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTB = VALUE + RETURN +* +* End of ZLANTB +* + END diff --git a/dspl/liblapack/SRC/zlantp.f b/dspl/liblapack/SRC/zlantp.f new file mode 100644 index 0000000..69dbaa5 --- /dev/null +++ b/dspl/liblapack/SRC/zlantp.f @@ -0,0 +1,357 @@ +*> \brief \b ZLANTP returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a triangular matrix supplied in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANTP returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> triangular matrix A, supplied in packed form. +*> \endverbatim +*> +*> \return ZLANTP +*> \verbatim +*> +*> ZLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANTP as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANTP is +*> set to zero. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> Note that when DIAG = 'U', the elements of the array AP +*> corresponding to the diagonal elements of the matrix A are +*> not referenced, but are assumed to be one. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANTP( NORM, UPLO, DIAG, N, AP, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J, K + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + K = 1 + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = K, K + J - 2 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + K = K + J + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = K + 1, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + K = K + N - J + 1 + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = K, K + J - 1 + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + K = K + J + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = K, K + N - J + SUM = ABS( AP( I ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + K = K + N - J + 1 + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + K = 1 + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 90 I = K, K + J - 2 + SUM = SUM + ABS( AP( I ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = K, K + J - 1 + SUM = SUM + ABS( AP( I ) ) + 100 CONTINUE + END IF + K = K + J + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = K + 1, K + N - J + SUM = SUM + ABS( AP( I ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = K, K + N - J + SUM = SUM + ABS( AP( I ) ) + 130 CONTINUE + END IF + K = K + N - J + 1 + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + K = 1 + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, N + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, J - 1 + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 160 CONTINUE + K = K + 1 + 170 CONTINUE + ELSE + DO 180 I = 1, N + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, J + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 230 J = 1, N + K = K + 1 + DO 220 I = J + 1, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 220 CONTINUE + 230 CONTINUE + ELSE + DO 240 I = 1, N + WORK( I ) = ZERO + 240 CONTINUE + DO 260 J = 1, N + DO 250 I = J, N + WORK( I ) = WORK( I ) + ABS( AP( K ) ) + K = K + 1 + 250 CONTINUE + 260 CONTINUE + END IF + END IF + VALUE = ZERO + DO 270 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 270 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 280 J = 2, N + CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM ) + K = K + J + 280 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 290 J = 1, N + CALL ZLASSQ( J, AP( K ), 1, SCALE, SUM ) + K = K + J + 290 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = N + K = 2 + DO 300 J = 1, N - 1 + CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 300 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + K = 1 + DO 310 J = 1, N + CALL ZLASSQ( N-J+1, AP( K ), 1, SCALE, SUM ) + K = K + N - J + 1 + 310 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTP = VALUE + RETURN +* +* End of ZLANTP +* + END diff --git a/dspl/liblapack/SRC/zlantr.f b/dspl/liblapack/SRC/zlantr.f new file mode 100644 index 0000000..04ee482 --- /dev/null +++ b/dspl/liblapack/SRC/zlantr.f @@ -0,0 +1,355 @@ +*> \brief \b ZLANTR returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a trapezoidal or triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, +* WORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANTR returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> trapezoidal or triangular matrix A. +*> \endverbatim +*> +*> \return ZLANTR +*> \verbatim +*> +*> ZLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANTR as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower trapezoidal. +*> = 'U': Upper trapezoidal +*> = 'L': Lower trapezoidal +*> Note that A is triangular instead of trapezoidal if M = N. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A has unit diagonal. +*> = 'N': Non-unit diagonal +*> = 'U': Unit diagonal +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0, and if +*> UPLO = 'U', M <= N. When M = 0, ZLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0, and if +*> UPLO = 'L', N <= M. When N = 0, ZLANTR is set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The trapezoidal matrix A (A is triangular if M = N). +*> If UPLO = 'U', the leading m by n upper trapezoidal part of +*> the array A contains the upper trapezoidal matrix, and the +*> strictly lower triangular part of A is not referenced. +*> If UPLO = 'L', the leading m by n lower trapezoidal part of +*> the array A contains the lower trapezoidal matrix, and the +*> strictly upper triangular part of A is not referenced. Note +*> that when DIAG = 'U', the diagonal elements of A are not +*> referenced and are assumed to be one. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= M when NORM = 'I'; otherwise, WORK is not +*> referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UDIAG + INTEGER I, J + DOUBLE PRECISION SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MIN, SQRT +* .. +* .. Executable Statements .. +* + IF( MIN( M, N ).EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + IF( LSAME( DIAG, 'U' ) ) THEN + VALUE = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, MIN( M, J-1 ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J + 1, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + DO 50 I = 1, MIN( M, J ) + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1, N + DO 70 I = J, M + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + 80 CONTINUE + END IF + END IF + ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN +* +* Find norm1(A). +* + VALUE = ZERO + UDIAG = LSAME( DIAG, 'U' ) + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 1, N + IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN + SUM = ONE + DO 90 I = 1, J - 1 + SUM = SUM + ABS( A( I, J ) ) + 90 CONTINUE + ELSE + SUM = ZERO + DO 100 I = 1, MIN( M, J ) + SUM = SUM + ABS( A( I, J ) ) + 100 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 110 CONTINUE + ELSE + DO 140 J = 1, N + IF( UDIAG ) THEN + SUM = ONE + DO 120 I = J + 1, M + SUM = SUM + ABS( A( I, J ) ) + 120 CONTINUE + ELSE + SUM = ZERO + DO 130 I = J, M + SUM = SUM + ABS( A( I, J ) ) + 130 CONTINUE + END IF + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 140 CONTINUE + END IF + ELSE IF( LSAME( NORM, 'I' ) ) THEN +* +* Find normI(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + DO 150 I = 1, M + WORK( I ) = ONE + 150 CONTINUE + DO 170 J = 1, N + DO 160 I = 1, MIN( M, J-1 ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 160 CONTINUE + 170 CONTINUE + ELSE + DO 180 I = 1, M + WORK( I ) = ZERO + 180 CONTINUE + DO 200 J = 1, N + DO 190 I = 1, MIN( M, J ) + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 190 CONTINUE + 200 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + DO 210 I = 1, N + WORK( I ) = ONE + 210 CONTINUE + DO 220 I = N + 1, M + WORK( I ) = ZERO + 220 CONTINUE + DO 240 J = 1, N + DO 230 I = J + 1, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 230 CONTINUE + 240 CONTINUE + ELSE + DO 250 I = 1, M + WORK( I ) = ZERO + 250 CONTINUE + DO 270 J = 1, N + DO 260 I = J, M + WORK( I ) = WORK( I ) + ABS( A( I, J ) ) + 260 CONTINUE + 270 CONTINUE + END IF + END IF + VALUE = ZERO + DO 280 I = 1, M + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 280 CONTINUE + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + IF( LSAME( UPLO, 'U' ) ) THEN + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 290 J = 2, N + CALL ZLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM ) + 290 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 300 J = 1, N + CALL ZLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM ) + 300 CONTINUE + END IF + ELSE + IF( LSAME( DIAG, 'U' ) ) THEN + SCALE = ONE + SUM = MIN( M, N ) + DO 310 J = 1, N + CALL ZLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE, + $ SUM ) + 310 CONTINUE + ELSE + SCALE = ZERO + SUM = ONE + DO 320 J = 1, N + CALL ZLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM ) + 320 CONTINUE + END IF + END IF + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANTR = VALUE + RETURN +* +* End of ZLANTR +* + END diff --git a/dspl/liblapack/SRC/zlapll.f b/dspl/liblapack/SRC/zlapll.f new file mode 100644 index 0000000..2eebe88 --- /dev/null +++ b/dspl/liblapack/SRC/zlapll.f @@ -0,0 +1,169 @@ +*> \brief \b ZLAPLL measures the linear dependence of two vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAPLL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given two column vectors X and Y, let +*> +*> A = ( X Y ). +*> +*> The subroutine first computes the QR factorization of A = Q*R, +*> and then computes the SVD of the 2-by-2 upper triangular matrix R. +*> The smaller singular value of R is returned in SSMIN, which is used +*> as the measurement of the linear dependency of the vectors X and Y. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vectors X and Y. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> On entry, X contains the N-vector X. +*> On exit, X is overwritten. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (1+(N-1)*INCY) +*> On entry, Y contains the N-vector Y. +*> On exit, Y is overwritten. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] SSMIN +*> \verbatim +*> SSMIN is DOUBLE PRECISION +*> The smallest singular value of the N-by-2 matrix A = ( X Y ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAPLL( N, X, INCX, Y, INCY, SSMIN ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION SSMIN +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SSMAX + COMPLEX*16 A11, A12, A22, C, TAU +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG +* .. +* .. External Functions .. + COMPLEX*16 ZDOTC + EXTERNAL ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL DLAS2, ZAXPY, ZLARFG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + SSMIN = ZERO + RETURN + END IF +* +* Compute the QR factorization of the N-by-2 matrix ( X Y ) +* + CALL ZLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU ) + A11 = X( 1 ) + X( 1 ) = CONE +* + C = -DCONJG( TAU )*ZDOTC( N, X, INCX, Y, INCY ) + CALL ZAXPY( N, C, X, INCX, Y, INCY ) +* + CALL ZLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU ) +* + A12 = Y( 1 ) + A22 = Y( 1+INCY ) +* +* Compute the SVD of 2-by-2 Upper triangular matrix. +* + CALL DLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX ) +* + RETURN +* +* End of ZLAPLL +* + END diff --git a/dspl/liblapack/SRC/zlapmr.f b/dspl/liblapack/SRC/zlapmr.f new file mode 100644 index 0000000..757a959 --- /dev/null +++ b/dspl/liblapack/SRC/zlapmr.f @@ -0,0 +1,204 @@ +*> \brief \b ZLAPMR rearranges rows of a matrix as specified by a permutation vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAPMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAPMR( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* COMPLEX*16 X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAPMR rearranges the rows of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(M) of the integers 1,...,M. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(K(I),*) is moved X(I,*) for I = 1,2,...,M. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(I,*) is moved to X(K(I),*) for I = 1,2,...,M. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (M) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAPMR( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX*16 X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IN, J, JJ + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* + IF( M.LE.1 ) + $ RETURN +* + DO 10 I = 1, M + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 JJ = 1, N + TEMP = X( J, JJ ) + X( J, JJ ) = X( IN, JJ ) + X( IN, JJ ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, M +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 JJ = 1, N + TEMP = X( I, JJ ) + X( I, JJ ) = X( J, JJ ) + X( J, JJ ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END + diff --git a/dspl/liblapack/SRC/zlapmt.f b/dspl/liblapack/SRC/zlapmt.f new file mode 100644 index 0000000..963ced2 --- /dev/null +++ b/dspl/liblapack/SRC/zlapmt.f @@ -0,0 +1,203 @@ +*> \brief \b ZLAPMT performs a forward or backward permutation of the columns of a matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAPMT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAPMT( FORWRD, M, N, X, LDX, K ) +* +* .. Scalar Arguments .. +* LOGICAL FORWRD +* INTEGER LDX, M, N +* .. +* .. Array Arguments .. +* INTEGER K( * ) +* COMPLEX*16 X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAPMT rearranges the columns of the M by N matrix X as specified +*> by the permutation K(1),K(2),...,K(N) of the integers 1,...,N. +*> If FORWRD = .TRUE., forward permutation: +*> +*> X(*,K(J)) is moved X(*,J) for J = 1,2,...,N. +*> +*> If FORWRD = .FALSE., backward permutation: +*> +*> X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FORWRD +*> \verbatim +*> FORWRD is LOGICAL +*> = .TRUE., forward permutation +*> = .FALSE., backward permutation +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,N) +*> On entry, the M by N matrix X. +*> On exit, X contains the permuted matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X, LDX >= MAX(1,M). +*> \endverbatim +*> +*> \param[in,out] K +*> \verbatim +*> K is INTEGER array, dimension (N) +*> On entry, K contains the permutation vector. K is used as +*> internal workspace, but reset to its original value on +*> output. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAPMT( FORWRD, M, N, X, LDX, K ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL FORWRD + INTEGER LDX, M, N +* .. +* .. Array Arguments .. + INTEGER K( * ) + COMPLEX*16 X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, II, IN, J + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* + IF( N.LE.1 ) + $ RETURN +* + DO 10 I = 1, N + K( I ) = -K( I ) + 10 CONTINUE +* + IF( FORWRD ) THEN +* +* Forward permutation +* + DO 50 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 40 +* + J = I + K( J ) = -K( J ) + IN = K( J ) +* + 20 CONTINUE + IF( K( IN ).GT.0 ) + $ GO TO 40 +* + DO 30 II = 1, M + TEMP = X( II, J ) + X( II, J ) = X( II, IN ) + X( II, IN ) = TEMP + 30 CONTINUE +* + K( IN ) = -K( IN ) + J = IN + IN = K( IN ) + GO TO 20 +* + 40 CONTINUE +* + 50 CONTINUE +* + ELSE +* +* Backward permutation +* + DO 90 I = 1, N +* + IF( K( I ).GT.0 ) + $ GO TO 80 +* + K( I ) = -K( I ) + J = K( I ) + 60 CONTINUE + IF( J.EQ.I ) + $ GO TO 80 +* + DO 70 II = 1, M + TEMP = X( II, I ) + X( II, I ) = X( II, J ) + X( II, J ) = TEMP + 70 CONTINUE +* + K( J ) = -K( J ) + J = K( J ) + GO TO 60 +* + 80 CONTINUE +* + 90 CONTINUE +* + END IF +* + RETURN +* +* End of ZLAPMT +* + END diff --git a/dspl/liblapack/SRC/zlaqgb.f b/dspl/liblapack/SRC/zlaqgb.f new file mode 100644 index 0000000..9aaa941 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqgb.f @@ -0,0 +1,258 @@ +*> \brief \b ZLAQGB scales a general band matrix, using row and column scaling factors computed by sgbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQGB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, +* AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER KL, KU, LDAB, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQGB equilibrates a general M by N band matrix A with KL +*> subdiagonals and KU superdiagonals using the row and scaling factors +*> in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The number of subdiagonals within the band of A. KL >= 0. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The number of superdiagonals within the band of A. KU >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the matrix A in band storage, in rows 1 to KL+KU+1. +*> The j-th column of A is stored in the j-th column of the +*> array AB as follows: +*> AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) +*> +*> On exit, the equilibrated matrix, in the same storage format +*> as A. See EQUED for the form of the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDA >= KL+KU+1. +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GBauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND, + $ AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER KL, KU, LDAB, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL ) + AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of ZLAQGB +* + END diff --git a/dspl/liblapack/SRC/zlaqge.f b/dspl/liblapack/SRC/zlaqge.f new file mode 100644 index 0000000..c7e2cc1 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqge.f @@ -0,0 +1,238 @@ +*> \brief \b ZLAQGE scales a general rectangular matrix, using row and column scaling factors computed by sgeequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, +* EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED +* INTEGER LDA, M, N +* DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), R( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQGE equilibrates a general M by N matrix A using the row and +*> column scaling factors in the vectors R and C. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M by N matrix A. +*> On exit, the equilibrated matrix. See EQUED for the form of +*> the equilibrated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(M,1). +*> \endverbatim +*> +*> \param[in] R +*> \verbatim +*> R is DOUBLE PRECISION array, dimension (M) +*> The row scale factors for A. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (N) +*> The column scale factors for A. +*> \endverbatim +*> +*> \param[in] ROWCND +*> \verbatim +*> ROWCND is DOUBLE PRECISION +*> Ratio of the smallest R(i) to the largest R(i). +*> \endverbatim +*> +*> \param[in] COLCND +*> \verbatim +*> COLCND is DOUBLE PRECISION +*> Ratio of the smallest C(i) to the largest C(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration +*> = 'R': Row equilibration, i.e., A has been premultiplied by +*> diag(R). +*> = 'C': Column equilibration, i.e., A has been postmultiplied +*> by diag(C). +*> = 'B': Both row and column equilibration, i.e., A has been +*> replaced by diag(R) * A * diag(C). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if row or column scaling +*> should be done based on the ratio of the row or column scaling +*> factors. If ROWCND < THRESH, row scaling is done, and if +*> COLCND < THRESH, column scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if row scaling +*> should be done based on the absolute size of the largest matrix +*> element. If AMAX > LARGE or AMAX < SMALL, row scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX, + $ EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED + INTEGER LDA, M, N + DOUBLE PRECISION AMAX, COLCND, ROWCND +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), R( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) + $ THEN +* +* No row scaling +* + IF( COLCND.GE.THRESH ) THEN +* +* No column scaling +* + EQUED = 'N' + ELSE +* +* Column scaling +* + DO 20 J = 1, N + CJ = C( J ) + DO 10 I = 1, M + A( I, J ) = CJ*A( I, J ) + 10 CONTINUE + 20 CONTINUE + EQUED = 'C' + END IF + ELSE IF( COLCND.GE.THRESH ) THEN +* +* Row scaling, no column scaling +* + DO 40 J = 1, N + DO 30 I = 1, M + A( I, J ) = R( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + EQUED = 'R' + ELSE +* +* Row and column scaling +* + DO 60 J = 1, N + CJ = C( J ) + DO 50 I = 1, M + A( I, J ) = CJ*R( I )*A( I, J ) + 50 CONTINUE + 60 CONTINUE + EQUED = 'B' + END IF +* + RETURN +* +* End of ZLAQGE +* + END diff --git a/dspl/liblapack/SRC/zlaqhb.f b/dspl/liblapack/SRC/zlaqhb.f new file mode 100644 index 0000000..8d2a53c --- /dev/null +++ b/dspl/liblapack/SRC/zlaqhb.f @@ -0,0 +1,230 @@ +*> \brief \b ZLAQHB scales a Hermitian band matrix, using scaling factors computed by cpbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQHB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQHB equilibrates a Hermitian band matrix A +*> using the scaling factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J - 1 + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + AB( KD+1, J ) = CJ*CJ*DBLE( AB( KD+1, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + AB( 1, J ) = CJ*CJ*DBLE( AB( 1, J ) ) + DO 30 I = J + 1, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHB +* + END diff --git a/dspl/liblapack/SRC/zlaqhe.f b/dspl/liblapack/SRC/zlaqhe.f new file mode 100644 index 0000000..5e6520b --- /dev/null +++ b/dspl/liblapack/SRC/zlaqhe.f @@ -0,0 +1,223 @@ +*> \brief \b ZLAQHE scales a Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQHE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQHE equilibrates a Hermitian matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16HEauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + A( J, J ) = CJ*CJ*DBLE( A( J, J ) ) + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + A( J, J ) = CJ*CJ*DBLE( A( J, J ) ) + DO 30 I = J + 1, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHE +* + END diff --git a/dspl/liblapack/SRC/zlaqhp.f b/dspl/liblapack/SRC/zlaqhp.f new file mode 100644 index 0000000..03f2ac2 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqhp.f @@ -0,0 +1,219 @@ +*> \brief \b ZLAQHP scales a Hermitian matrix stored in packed form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQHP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQHP equilibrates a Hermitian matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J - 1 + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + AP( JC+J-1 ) = CJ*CJ*DBLE( AP( JC+J-1 ) ) + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + AP( JC ) = CJ*CJ*DBLE( AP( JC ) ) + DO 30 I = J + 1, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQHP +* + END diff --git a/dspl/liblapack/SRC/zlaqp2.f b/dspl/liblapack/SRC/zlaqp2.f new file mode 100644 index 0000000..e7bb15f --- /dev/null +++ b/dspl/liblapack/SRC/zlaqp2.f @@ -0,0 +1,266 @@ +*> \brief \b ZLAQP2 computes a QR factorization with column pivoting of the matrix block. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQP2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, +* WORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION VN1( * ), VN2( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQP2 computes a QR factorization with column pivoting of +*> the block A(OFFSET+1:M,1:N). +*> The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of the matrix A that must be pivoted +*> but no factorized. OFFSET >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the upper triangle of block A(OFFSET+1:M,1:N) is +*> the triangular factor obtained; the elements in block +*> A(OFFSET+1:M,1:N) below the diagonal, together with the +*> array TAU, represent the orthogonal matrix Q as a product of +*> elementary reflectors. Block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted +*> to the front of A*P (a leading column); if JPVT(i) = 0, +*> the i-th column of A is a free column. +*> On exit, if JPVT(i) = k, then the i-th column of A*P +*> was the k-th column of A. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(M,N)) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is DOUBLE PRECISION array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is DOUBLE PRECISION array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2, + $ WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, M, N, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION VN1( * ), VN2( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + COMPLEX*16 CONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, ITEMP, J, MN, OFFPI, PVT + DOUBLE PRECISION TEMP, TEMP2, TOL3Z + COMPLEX*16 AII +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFG, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCONJG, MAX, MIN, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL IDAMAX, DLAMCH, DZNRM2 +* .. +* .. Executable Statements .. +* + MN = MIN( M-OFFSET, N ) + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Compute factorization. +* + DO 20 I = 1, MN +* + OFFPI = OFFSET + I +* +* Determine ith pivot column and swap if necessary. +* + PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 ) +* + IF( PVT.NE.I ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( I ) + JPVT( I ) = ITEMP + VN1( PVT ) = VN1( I ) + VN2( PVT ) = VN2( I ) + END IF +* +* Generate elementary reflector H(i). +* + IF( OFFPI.LT.M ) THEN + CALL ZLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1, + $ TAU( I ) ) + ELSE + CALL ZLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) ) + END IF +* + IF( I.LT.N ) THEN +* +* Apply H(i)**H to A(offset+i:m,i+1:n) from the left. +* + AII = A( OFFPI, I ) + A( OFFPI, I ) = CONE + CALL ZLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1, + $ DCONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA, + $ WORK( 1 ) ) + A( OFFPI, I ) = AII + END IF +* +* Update partial column norms. +* + DO 10 J = I + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2 + TEMP = MAX( TEMP, ZERO ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + IF( OFFPI.LT.M ) THEN + VN1( J ) = DZNRM2( M-OFFPI, A( OFFPI+1, J ), 1 ) + VN2( J ) = VN1( J ) + ELSE + VN1( J ) = ZERO + VN2( J ) = ZERO + END IF + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 10 CONTINUE +* + 20 CONTINUE +* + RETURN +* +* End of ZLAQP2 +* + END diff --git a/dspl/liblapack/SRC/zlaqps.f b/dspl/liblapack/SRC/zlaqps.f new file mode 100644 index 0000000..c142e8c --- /dev/null +++ b/dspl/liblapack/SRC/zlaqps.f @@ -0,0 +1,370 @@ +*> \brief \b ZLAQPS computes a step of QR factorization with column pivoting of a real m-by-n matrix A by using BLAS level 3. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, +* VN2, AUXV, F, LDF ) +* +* .. Scalar Arguments .. +* INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. +* INTEGER JPVT( * ) +* DOUBLE PRECISION VN1( * ), VN2( * ) +* COMPLEX*16 A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQPS computes a step of QR factorization with column pivoting +*> of a complex M-by-N matrix A by using Blas-3. It tries to factorize +*> NB columns from A starting from the row OFFSET+1, and updates all +*> of the matrix with Blas-3 xGEMM. +*> +*> In some cases, due to catastrophic cancellations, it cannot +*> factorize NB columns. Hence, the actual number of factorized +*> columns is returned in KB. +*> +*> Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0 +*> \endverbatim +*> +*> \param[in] OFFSET +*> \verbatim +*> OFFSET is INTEGER +*> The number of rows of A that have been factorized in +*> previous steps. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of columns to factorize. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns actually factorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, block A(OFFSET+1:M,1:KB) is the triangular +*> factor obtained and block A(1:OFFSET,1:N) has been +*> accordingly pivoted, but no factorized. +*> The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has +*> been updated. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] JPVT +*> \verbatim +*> JPVT is INTEGER array, dimension (N) +*> JPVT(I) = K <==> Column K of the full matrix A has been +*> permuted into position I in AP. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (KB) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[in,out] VN1 +*> \verbatim +*> VN1 is DOUBLE PRECISION array, dimension (N) +*> The vector with the partial column norms. +*> \endverbatim +*> +*> \param[in,out] VN2 +*> \verbatim +*> VN2 is DOUBLE PRECISION array, dimension (N) +*> The vector with the exact column norms. +*> \endverbatim +*> +*> \param[in,out] AUXV +*> \verbatim +*> AUXV is COMPLEX*16 array, dimension (NB) +*> Auxiliar vector. +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX*16 array, dimension (LDF,NB) +*> Matrix F**H = L * Y**H * A. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain +*> X. Sun, Computer Science Dept., Duke University, USA +*> \n +*> Partial column norm updating strategy modified on April 2011 +*> Z. Drmac and Z. Bujanovic, Dept. of Mathematics, +*> University of Zagreb, Croatia. +* +*> \par References: +* ================ +*> +*> LAPACK Working Note 176 +* +*> \htmlonly +*> [PDF] +*> \endhtmlonly +* +* ===================================================================== + SUBROUTINE ZLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1, + $ VN2, AUXV, F, LDF ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER KB, LDA, LDF, M, N, NB, OFFSET +* .. +* .. Array Arguments .. + INTEGER JPVT( * ) + DOUBLE PRECISION VN1( * ), VN2( * ) + COMPLEX*16 A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + COMPLEX*16 CZERO, CONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, + $ CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER ITEMP, J, K, LASTRK, LSTICC, PVT, RK + DOUBLE PRECISION TEMP, TEMP2, TOL3Z + COMPLEX*16 AKK +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZGEMV, ZLARFG, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN, NINT, SQRT +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + EXTERNAL IDAMAX, DLAMCH, DZNRM2 +* .. +* .. Executable Statements .. +* + LASTRK = MIN( M, N+OFFSET ) + LSTICC = 0 + K = 0 + TOL3Z = SQRT(DLAMCH('Epsilon')) +* +* Beginning of while loop. +* + 10 CONTINUE + IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN + K = K + 1 + RK = OFFSET + K +* +* Determine ith pivot column and swap if necessary +* + PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 ) + IF( PVT.NE.K ) THEN + CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 ) + CALL ZSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF ) + ITEMP = JPVT( PVT ) + JPVT( PVT ) = JPVT( K ) + JPVT( K ) = ITEMP + VN1( PVT ) = VN1( K ) + VN2( PVT ) = VN2( K ) + END IF +* +* Apply previous Householder reflectors to column K: +* A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**H. +* + IF( K.GT.1 ) THEN + DO 20 J = 1, K - 1 + F( K, J ) = DCONJG( F( K, J ) ) + 20 CONTINUE + CALL ZGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ), + $ LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 ) + DO 30 J = 1, K - 1 + F( K, J ) = DCONJG( F( K, J ) ) + 30 CONTINUE + END IF +* +* Generate elementary reflector H(k). +* + IF( RK.LT.M ) THEN + CALL ZLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) ) + ELSE + CALL ZLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) ) + END IF +* + AKK = A( RK, K ) + A( RK, K ) = CONE +* +* Compute Kth column of F: +* +* Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**H*A(RK:M,K). +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ), + $ A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO, + $ F( K+1, K ), 1 ) + END IF +* +* Padding F(1:K,K) with zeros. +* + DO 40 J = 1, K + F( J, K ) = CZERO + 40 CONTINUE +* +* Incremental updating of F: +* F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**H +* *A(RK:M,K). +* + IF( K.GT.1 ) THEN + CALL ZGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ), + $ A( RK, 1 ), LDA, A( RK, K ), 1, CZERO, + $ AUXV( 1 ), 1 ) +* + CALL ZGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF, + $ AUXV( 1 ), 1, CONE, F( 1, K ), 1 ) + END IF +* +* Update the current row of A: +* A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**H. +* + IF( K.LT.N ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', 1, N-K, + $ K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF, + $ CONE, A( RK, K+1 ), LDA ) + END IF +* +* Update partial column norms. +* + IF( RK.LT.LASTRK ) THEN + DO 50 J = K + 1, N + IF( VN1( J ).NE.ZERO ) THEN +* +* NOTE: The following 4 lines follow from the analysis in +* Lapack Working Note 176. +* + TEMP = ABS( A( RK, J ) ) / VN1( J ) + TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) ) + TEMP2 = TEMP*( VN1( J ) / VN2( J ) )**2 + IF( TEMP2 .LE. TOL3Z ) THEN + VN2( J ) = DBLE( LSTICC ) + LSTICC = J + ELSE + VN1( J ) = VN1( J )*SQRT( TEMP ) + END IF + END IF + 50 CONTINUE + END IF +* + A( RK, K ) = AKK +* +* End of while loop. +* + GO TO 10 + END IF + KB = K + RK = OFFSET + KB +* +* Apply the block reflector to the rest of the matrix: +* A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - +* A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**H. +* + IF( KB.LT.MIN( N, M-OFFSET ) ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB, + $ KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, + $ CONE, A( RK+1, KB+1 ), LDA ) + END IF +* +* Recomputation of difficult columns. +* + 60 CONTINUE + IF( LSTICC.GT.0 ) THEN + ITEMP = NINT( VN2( LSTICC ) ) + VN1( LSTICC ) = DZNRM2( M-RK, A( RK+1, LSTICC ), 1 ) +* +* NOTE: The computation of VN1( LSTICC ) relies on the fact that +* SNRM2 does not fail on vectors with norm below the value of +* SQRT(DLAMCH('S')) +* + VN2( LSTICC ) = VN1( LSTICC ) + LSTICC = ITEMP + GO TO 60 + END IF +* + RETURN +* +* End of ZLAQPS +* + END diff --git a/dspl/liblapack/SRC/zlaqr0.f b/dspl/liblapack/SRC/zlaqr0.f new file mode 100644 index 0000000..59b8ed7 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr0.f @@ -0,0 +1,703 @@ +*> \brief \b ZLAQR0 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR0 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQR0 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to ZGEBAL, and then passed to ZGEHRD when the +*> matrix output by ZGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H +*> contains the upper triangular matrix T from the Schur +*> decomposition (the Schur form). If INFO = 0 and WANT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +*> in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then ZLAQR0 does a workspace query. +*> In this case, ZLAQR0 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, ZLAQR0 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE ZLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . ZLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constant WILK1 is used to form the exceptional +* . shifts. ==== + DOUBLE PRECISION WILK1 + PARAMETER ( WILK1 = 0.75d0 ) + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + DOUBLE PRECISION S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX*16 ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL ZLACPY, ZLAHQR, ZLAQR3, ZLAQR4, ZLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, MAX, MIN, MOD, + $ SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use ZLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'ZLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'ZLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to ZLAQR3 ==== +* + CALL ZLAQR3( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(ZLAQR5, ZLAQR3) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== ZLAHQR/ZLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'ZLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'ZLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'ZLAQR0', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if ZLAQR3 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . ZLAQR3 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use ZLAQR4 or +* . ZLAHQR on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL ZLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + IF( NS.GT.NMIN ) THEN + CALL ZLAQR4( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, WORK, LWORK, INF ) + ELSE + CALL ZLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, + $ ZDUM, 1, INF ) + END IF + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) +* +* ==== End of ZLAQR0 ==== +* + END diff --git a/dspl/liblapack/SRC/zlaqr1.f b/dspl/liblapack/SRC/zlaqr1.f new file mode 100644 index 0000000..03afb87 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr1.f @@ -0,0 +1,172 @@ +*> \brief \b ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and specified shifts. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR1( N, H, LDH, S1, S2, V ) +* +* .. Scalar Arguments .. +* COMPLEX*16 S1, S2 +* INTEGER LDH, N +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), V( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1 sets v to a +*> scalar multiple of the first column of the product +*> +*> (*) K = (H - s1*I)*(H - s2*I) +*> +*> scaling to avoid overflows and most underflows. +*> +*> This is useful for starting double implicit shift bulges +*> in the QR algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> Order of the matrix H. N must be either 2 or 3. +*> \endverbatim +*> +*> \param[in] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> The 2-by-2 or 3-by-3 matrix H in (*). +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of H as declared in +*> the calling procedure. LDH.GE.N +*> \endverbatim +*> +*> \param[in] S1 +*> \verbatim +*> S1 is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] S2 +*> \verbatim +*> S2 is COMPLEX*16 +*> +*> S1 and S2 are the shifts defining K in (*) above. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (N) +*> A scalar multiple of the first column of the +*> matrix K in (*). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE ZLAQR1( N, H, LDH, S1, S2, V ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + COMPLEX*16 S1, S2 + INTEGER LDH, N +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), V( * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO + PARAMETER ( RZERO = 0.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 CDUM, H21S, H31S + DOUBLE PRECISION S +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + IF( N.EQ.2 ) THEN + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + IF( S.EQ.RZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + V( 1 ) = H21S*H( 1, 2 ) + ( H( 1, 1 )-S1 )* + $ ( ( H( 1, 1 )-S2 ) / S ) + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + END IF + ELSE + S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) + + $ CABS1( H( 3, 1 ) ) + IF( S.EQ.ZERO ) THEN + V( 1 ) = ZERO + V( 2 ) = ZERO + V( 3 ) = ZERO + ELSE + H21S = H( 2, 1 ) / S + H31S = H( 3, 1 ) / S + V( 1 ) = ( H( 1, 1 )-S1 )*( ( H( 1, 1 )-S2 ) / S ) + + $ H( 1, 2 )*H21S + H( 1, 3 )*H31S + V( 2 ) = H21S*( H( 1, 1 )+H( 2, 2 )-S1-S2 ) + H( 2, 3 )*H31S + V( 3 ) = H31S*( H( 1, 1 )+H( 3, 3 )-S1-S2 ) + H21S*H( 3, 2 ) + END IF + END IF + END diff --git a/dspl/liblapack/SRC/zlaqr2.f b/dspl/liblapack/SRC/zlaqr2.f new file mode 100644 index 0000000..e6e2ea4 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr2.f @@ -0,0 +1,567 @@ +*> \brief \b ZLAQR2 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, +* NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), +* $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQR2 is identical to ZLAQR3 except that it avoids +*> recursion by calling ZLAHQR instead of ZLAQR4. +*> +*> Aggressive early deflation: +*> +*> ZLAQR2 accepts as input an upper Hessenberg matrix +*> H and performs an unitary similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an unitary similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the unitary matrix Z is updated so +*> so that the unitary Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the unitary matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by a unitary +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the unitary +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SH +*> \verbatim +*> SH is COMPLEX*16 array, dimension (KBOT) +*> On output, approximate eigenvalues that may +*> be used for shifts are stored in SH(KBOT-ND-NS+1) +*> through SR(KBOT-ND). Converged eigenvalues are +*> stored in SH(KBOT-ND+1) through SH(KBOT). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX*16 array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; ZLAQR2 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO, RONE + PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 BETA, CDUM, S, TAU + DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, + $ ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to ZGEHRD ==== +* + CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to ZUNMHR ==== +* + CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = JW + MAX( LWK1, LWK2 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undeflatable eigenvalue. Move it up out of the +* . way. (ZTREXC can not fail in this case.) ==== +* + IFST = NS + CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL ZCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = DCONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) ) + CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) +* +* ==== End of ZLAQR2 ==== +* + END diff --git a/dspl/liblapack/SRC/zlaqr3.f b/dspl/liblapack/SRC/zlaqr3.f new file mode 100644 index 0000000..64ab59f --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr3.f @@ -0,0 +1,578 @@ +*> \brief \b ZLAQR3 performs the unitary similarity transformation of a Hessenberg matrix to detect and deflate fully converged eigenvalues from a trailing principal submatrix (aggressive early deflation). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, +* IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, +* NV, WV, LDWV, WORK, LWORK ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, +* $ LDZ, LWORK, N, ND, NH, NS, NV, NW +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), +* $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Aggressive early deflation: +*> +*> ZLAQR3 accepts as input an upper Hessenberg matrix +*> H and performs an unitary similarity transformation +*> designed to detect and deflate fully converged eigenvalues from +*> a trailing principal submatrix. On output H has been over- +*> written by a new Hessenberg matrix that is a perturbation of +*> an unitary similarity transformation of H. It is to be +*> hoped that the final version of H has many zero subdiagonal +*> entries. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> If .TRUE., then the Hessenberg matrix H is fully updated +*> so that the triangular Schur factor may be +*> computed (in cooperation with the calling subroutine). +*> If .FALSE., then only enough of H is updated to preserve +*> the eigenvalues. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> If .TRUE., then the unitary matrix Z is updated so +*> so that the unitary Schur factor may be computed +*> (in cooperation with the calling subroutine). +*> If .FALSE., then Z is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H and (if WANTZ is .TRUE.) the +*> order of the unitary matrix Z. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. +*> KBOT and KTOP together determine an isolated block +*> along the diagonal of the Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> It is assumed without a check that either +*> KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together +*> determine an isolated block along the diagonal of the +*> Hessenberg matrix. +*> \endverbatim +*> +*> \param[in] NW +*> \verbatim +*> NW is INTEGER +*> Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On input the initial N-by-N section of H stores the +*> Hessenberg matrix undergoing aggressive early deflation. +*> On output H has been transformed by a unitary +*> similarity transformation, perturbed, and the returned +*> to Hessenberg form that (it is to be hoped) has some +*> zero subdiagonal entries. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> Leading dimension of H just as declared in the calling +*> subroutine. N .LE. LDH +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> IF WANTZ is .TRUE., then on output, the unitary +*> similarity transformation mentioned above has been +*> accumulated into Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ is .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of Z just as declared in the +*> calling subroutine. 1 .LE. LDZ. +*> \endverbatim +*> +*> \param[out] NS +*> \verbatim +*> NS is INTEGER +*> The number of unconverged (ie approximate) eigenvalues +*> returned in SR and SI that may be used as shifts by the +*> calling subroutine. +*> \endverbatim +*> +*> \param[out] ND +*> \verbatim +*> ND is INTEGER +*> The number of converged eigenvalues uncovered by this +*> subroutine. +*> \endverbatim +*> +*> \param[out] SH +*> \verbatim +*> SH is COMPLEX*16 array, dimension (KBOT) +*> On output, approximate eigenvalues that may +*> be used for shifts are stored in SH(KBOT-ND-NS+1) +*> through SR(KBOT-ND). Converged eigenvalues are +*> stored in SH(KBOT-ND+1) through SH(KBOT). +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,NW) +*> An NW-by-NW work array. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of V just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> The number of columns of T. NH.GE.NW. +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,NW) +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of T just as declared in the +*> calling subroutine. NW .LE. LDT +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> The number of rows of work array WV available for +*> workspace. NV.GE.NW. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX*16 array, dimension (LDWV,NW) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> The leading dimension of W just as declared in the +*> calling subroutine. NW .LE. LDV +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> On exit, WORK(1) is set to an estimate of the optimal value +*> of LWORK for the given values of N, NW, KTOP and KBOT. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the work array WORK. LWORK = 2*NW +*> suffices, but greater efficiency may result from larger +*> values of LWORK. +*> +*> If LWORK = -1, then a workspace query is assumed; ZLAQR3 +*> only estimates the optimal workspace size for the given +*> values of N, NW, KTOP and KBOT. The estimate is returned +*> in WORK(1). No error message related to LWORK is issued +*> by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +*> +* ===================================================================== + SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, + $ NV, WV, LDWV, WORK, LWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, + $ LDZ, LWORK, N, ND, NH, NS, NV, NW + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), + $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO, RONE + PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 BETA, CDUM, S, TAU + DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP + INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, + $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, + $ LWKOPT, NMIN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER ILAENV + EXTERNAL DLAMCH, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, + $ ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== Estimate optimal workspace. ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + IF( JW.LE.2 ) THEN + LWKOPT = 1 + ELSE +* +* ==== Workspace query call to ZGEHRD ==== +* + CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) + LWK1 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to ZUNMHR ==== +* + CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, + $ WORK, -1, INFO ) + LWK2 = INT( WORK( 1 ) ) +* +* ==== Workspace query call to ZLAQR4 ==== +* + CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, + $ LDV, WORK, -1, INFQR ) + LWK3 = INT( WORK( 1 ) ) +* +* ==== Optimal workspace ==== +* + LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) + END IF +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== Nothing to do ... +* ... for an empty active block ... ==== + NS = 0 + ND = 0 + WORK( 1 ) = ONE + IF( KTOP.GT.KBOT ) + $ RETURN +* ... nor for an empty deflation window. ==== + IF( NW.LT.1 ) + $ RETURN +* +* ==== Machine constants ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Setup deflation window ==== +* + JW = MIN( NW, KBOT-KTOP+1 ) + KWTOP = KBOT - JW + 1 + IF( KWTOP.EQ.KTOP ) THEN + S = ZERO + ELSE + S = H( KWTOP, KWTOP-1 ) + END IF +* + IF( KBOT.EQ.KWTOP ) THEN +* +* ==== 1-by-1 deflation window: not much to do ==== +* + SH( KWTOP ) = H( KWTOP, KWTOP ) + NS = 1 + ND = 0 + IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, + $ KWTOP ) ) ) ) THEN + NS = 0 + ND = 1 + IF( KWTOP.GT.KTOP ) + $ H( KWTOP, KWTOP-1 ) = ZERO + END IF + WORK( 1 ) = ONE + RETURN + END IF +* +* ==== Convert to spike-triangular form. (In case of a +* . rare QR failure, this routine continues to do +* . aggressive early deflation using that part of +* . the deflation window that converged using INFQR +* . here and there to keep track.) ==== +* + CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) + CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) +* + CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) + NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK ) + IF( JW.GT.NMIN ) THEN + CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, WORK, LWORK, INFQR ) + ELSE + CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, + $ JW, V, LDV, INFQR ) + END IF +* +* ==== Deflation detection loop ==== +* + NS = JW + ILST = INFQR + 1 + DO 10 KNT = INFQR + 1, JW +* +* ==== Small spike tip deflation test ==== +* + FOO = CABS1( T( NS, NS ) ) + IF( FOO.EQ.RZERO ) + $ FOO = CABS1( S ) + IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) + $ THEN +* +* ==== One more converged eigenvalue ==== +* + NS = NS - 1 + ELSE +* +* ==== One undeflatable eigenvalue. Move it up out of the +* . way. (ZTREXC can not fail in this case.) ==== +* + IFST = NS + CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + ILST = ILST + 1 + END IF + 10 CONTINUE +* +* ==== Return to Hessenberg form ==== +* + IF( NS.EQ.0 ) + $ S = ZERO +* + IF( NS.LT.JW ) THEN +* +* ==== sorting the diagonal of T improves accuracy for +* . graded matrices. ==== +* + DO 30 I = INFQR + 1, NS + IFST = I + DO 20 J = I + 1, NS + IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) + $ IFST = J + 20 CONTINUE + ILST = I + IF( IFST.NE.ILST ) + $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) + 30 CONTINUE + END IF +* +* ==== Restore shift/eigenvalue array from T ==== +* + DO 40 I = INFQR + 1, JW + SH( KWTOP+I-1 ) = T( I, I ) + 40 CONTINUE +* +* + IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN + IF( NS.GT.1 .AND. S.NE.ZERO ) THEN +* +* ==== Reflect spike back into lower triangle ==== +* + CALL ZCOPY( NS, V, LDV, WORK, 1 ) + DO 50 I = 1, NS + WORK( I ) = DCONJG( WORK( I ) ) + 50 CONTINUE + BETA = WORK( 1 ) + CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU ) + WORK( 1 ) = ONE +* + CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) +* + CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, + $ WORK( JW+1 ) ) + CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, + $ WORK( JW+1 ) ) +* + CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), + $ LWORK-JW, INFO ) + END IF +* +* ==== Copy updated reduced window into place ==== +* + IF( KWTOP.GT.1 ) + $ H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) ) + CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) + CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), + $ LDH+1 ) +* +* ==== Accumulate orthogonal matrix in order update +* . H and Z, if requested. ==== +* + IF( NS.GT.1 .AND. S.NE.ZERO ) + $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, + $ WORK( JW+1 ), LWORK-JW, INFO ) +* +* ==== Update vertical slab in H ==== +* + IF( WANTT ) THEN + LTOP = 1 + ELSE + LTOP = KTOP + END IF + DO 60 KROW = LTOP, KWTOP - 1, NV + KLN = MIN( NV, KWTOP-KROW ) + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), + $ LDH, V, LDV, ZERO, WV, LDWV ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) + 60 CONTINUE +* +* ==== Update horizontal slab in H ==== +* + IF( WANTT ) THEN + DO 70 KCOL = KBOT + 1, N, NH + KLN = MIN( NH, N-KCOL+1 ) + CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, + $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) + CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), + $ LDH ) + 70 CONTINUE + END IF +* +* ==== Update vertical slab in Z ==== +* + IF( WANTZ ) THEN + DO 80 KROW = ILOZ, IHIZ, NV + KLN = MIN( NV, IHIZ-KROW+1 ) + CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), + $ LDZ, V, LDV, ZERO, WV, LDWV ) + CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), + $ LDZ ) + 80 CONTINUE + END IF + END IF +* +* ==== Return the number of deflations ... ==== +* + ND = JW - NS +* +* ==== ... and the number of shifts. (Subtracting +* . INFQR from the spike length takes care +* . of the case of a rare QR failure while +* . calculating eigenvalues of the deflation +* . window.) ==== +* + NS = NS - INFQR +* +* ==== Return optimal workspace. ==== +* + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) +* +* ==== End of ZLAQR3 ==== +* + END diff --git a/dspl/liblapack/SRC/zlaqr4.f b/dspl/liblapack/SRC/zlaqr4.f new file mode 100644 index 0000000..012fa37 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr4.f @@ -0,0 +1,703 @@ +*> \brief \b ZLAQR4 computes the eigenvalues of a Hessenberg matrix, and optionally the matrices from the Schur decomposition. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, +* IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQR4 implements one level of recursion for ZLAQR0. +*> It is a complete implementation of the small bulge multi-shift +*> QR algorithm. It may be called by ZLAQR0 and, for large enough +*> deflation window size, it may be called by ZLAQR3. This +*> subroutine is identical to ZLAQR0 except that it calls ZLAQR2 +*> instead of ZLAQR3. +*> +*> ZLAQR4 computes the eigenvalues of a Hessenberg matrix H +*> and, optionally, the matrices T and Z from the Schur decomposition +*> H = Z T Z**H, where T is an upper triangular matrix (the +*> Schur form), and Z is the unitary matrix of Schur vectors. +*> +*> Optionally Z may be postmultiplied into an input unitary +*> matrix Q so that this routine can give the Schur factorization +*> of a matrix A which has been reduced to the Hessenberg form H +*> by the unitary matrix Q: A = Q*H*Q**H = (QZ)*H*(QZ)**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> = .TRUE. : the full Schur form T is required; +*> = .FALSE.: only eigenvalues are required. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> = .TRUE. : the matrix of Schur vectors Z is required; +*> = .FALSE.: Schur vectors are not required. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix H. N .GE. 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> It is assumed that H is already upper triangular in rows +*> and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1, +*> H(ILO,ILO-1) is zero. ILO and IHI are normally set by a +*> previous call to ZGEBAL, and then passed to ZGEHRD when the +*> matrix output by ZGEBAL is reduced to Hessenberg form. +*> Otherwise, ILO and IHI should be set to 1 and N, +*> respectively. If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N. +*> If N = 0, then ILO = 1 and IHI = 0. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On entry, the upper Hessenberg matrix H. +*> On exit, if INFO = 0 and WANTT is .TRUE., then H +*> contains the upper triangular matrix T from the Schur +*> decomposition (the Schur form). If INFO = 0 and WANT is +*> .FALSE., then the contents of H are unspecified on exit. +*> (The output value of H when INFO.GT.0 is given under the +*> description of INFO below.) +*> +*> This subroutine may explicitly set H(i,j) = 0 for i.GT.j and +*> j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the array H. LDH .GE. max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored +*> in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are +*> stored in the same order as on the diagonal of the Schur +*> form returned in H, with W(i) = H(i,i). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. +*> 1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,IHI) +*> If WANTZ is .FALSE., then Z is not referenced. +*> If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is +*> replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the +*> orthogonal Schur factor of H(ILO:IHI,ILO:IHI). +*> (The output value of Z when INFO.GT.0 is given under +*> the description of INFO below.) +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. if WANTZ is .TRUE. +*> then LDZ.GE.MAX(1,IHIZ). Otherwize, LDZ.GE.1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension LWORK +*> On exit, if LWORK = -1, WORK(1) returns an estimate of +*> the optimal value for LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK .GE. max(1,N) +*> is sufficient, but LWORK typically as large as 6*N may +*> be required for optimal performance. A workspace query +*> to determine the optimal workspace size is recommended. +*> +*> If LWORK = -1, then ZLAQR4 does a workspace query. +*> In this case, ZLAQR4 checks the input parameters and +*> estimates the optimal workspace size for the given +*> values of N, ILO and IHI. The estimate is returned +*> in WORK(1). No error message related to LWORK is +*> issued by XERBLA. Neither H nor Z are accessed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> .GT. 0: if INFO = i, ZLAQR4 failed to compute all of +*> the eigenvalues. Elements 1:ilo-1 and i+1:n of WR +*> and WI contain those eigenvalues which have been +*> successfully computed. (Failures are rare.) +*> +*> If INFO .GT. 0 and WANT is .FALSE., then on exit, +*> the remaining unconverged eigenvalues are the eigen- +*> values of the upper Hessenberg matrix rows and +*> columns ILO through INFO of the final, output +*> value of H. +*> +*> If INFO .GT. 0 and WANTT is .TRUE., then on exit +*> +*> (*) (initial value of H)*U = U*(final value of H) +*> +*> where U is a unitary matrix. The final +*> value of H is upper Hessenberg and triangular in +*> rows and columns INFO+1 through IHI. +*> +*> If INFO .GT. 0 and WANTZ is .TRUE., then on exit +*> +*> (final value of Z(ILO:IHI,ILOZ:IHIZ) +*> = (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U +*> +*> where U is the unitary matrix in (*) (regard- +*> less of the value of WANTT.) +*> +*> If INFO .GT. 0 and WANTZ is .FALSE., then Z is not +*> accessed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> \n +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part II: Aggressive Early Deflation, SIAM Journal +*> of Matrix Analysis, volume 23, pages 948--973, 2002. +*> +* ===================================================================== + SUBROUTINE ZLAQR4( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, WORK, LWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, LWORK, N + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* +* .. Parameters .. +* +* ==== Matrices of order NTINY or smaller must be processed by +* . ZLAHQR because of insufficient subdiagonal scratch space. +* . (This is a hard limit.) ==== + INTEGER NTINY + PARAMETER ( NTINY = 11 ) +* +* ==== Exceptional deflation windows: try to cure rare +* . slow convergence by varying the size of the +* . deflation window after KEXNW iterations. ==== + INTEGER KEXNW + PARAMETER ( KEXNW = 5 ) +* +* ==== Exceptional shifts: try to cure rare slow convergence +* . with ad-hoc exceptional shifts every KEXSH iterations. +* . ==== + INTEGER KEXSH + PARAMETER ( KEXSH = 6 ) +* +* ==== The constant WILK1 is used to form the exceptional +* . shifts. ==== + DOUBLE PRECISION WILK1 + PARAMETER ( WILK1 = 0.75d0 ) + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 AA, BB, CC, CDUM, DD, DET, RTDISC, SWAP, TR2 + DOUBLE PRECISION S + INTEGER I, INF, IT, ITMAX, K, KACC22, KBOT, KDU, KS, + $ KT, KTOP, KU, KV, KWH, KWTOP, KWV, LD, LS, + $ LWKOPT, NDEC, NDFL, NH, NHO, NIBBLE, NMIN, NS, + $ NSMAX, NSR, NVE, NW, NWMAX, NWR, NWUPBD + LOGICAL SORTED + CHARACTER JBCMPZ*2 +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Local Arrays .. + COMPLEX*16 ZDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL ZLACPY, ZLAHQR, ZLAQR2, ZLAQR5 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, INT, MAX, MIN, MOD, + $ SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. + INFO = 0 +* +* ==== Quick return for N = 0: nothing to do. ==== +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = ONE + RETURN + END IF +* + IF( N.LE.NTINY ) THEN +* +* ==== Tiny matrices must use ZLAHQR. ==== +* + LWKOPT = 1 + IF( LWORK.NE.-1 ) + $ CALL ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ, + $ IHIZ, Z, LDZ, INFO ) + ELSE +* +* ==== Use small bulge multi-shift QR with aggressive early +* . deflation on larger-than-tiny matrices. ==== +* +* ==== Hope for the best. ==== +* + INFO = 0 +* +* ==== Set up job flags for ILAENV. ==== +* + IF( WANTT ) THEN + JBCMPZ( 1: 1 ) = 'S' + ELSE + JBCMPZ( 1: 1 ) = 'E' + END IF + IF( WANTZ ) THEN + JBCMPZ( 2: 2 ) = 'V' + ELSE + JBCMPZ( 2: 2 ) = 'N' + END IF +* +* ==== NWR = recommended deflation window size. At this +* . point, N .GT. NTINY = 11, so there is enough +* . subdiagonal workspace for NWR.GE.2 as required. +* . (In fact, there is enough subdiagonal space for +* . NWR.GE.3.) ==== +* + NWR = ILAENV( 13, 'ZLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NWR = MAX( 2, NWR ) + NWR = MIN( IHI-ILO+1, ( N-1 ) / 3, NWR ) +* +* ==== NSR = recommended number of simultaneous shifts. +* . At this point N .GT. NTINY = 11, so there is at +* . enough subdiagonal workspace for NSR to be even +* . and greater than or equal to two as required. ==== +* + NSR = ILAENV( 15, 'ZLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NSR = MIN( NSR, ( N+6 ) / 9, IHI-ILO ) + NSR = MAX( 2, NSR-MOD( NSR, 2 ) ) +* +* ==== Estimate optimal workspace ==== +* +* ==== Workspace query call to ZLAQR2 ==== +* + CALL ZLAQR2( WANTT, WANTZ, N, ILO, IHI, NWR+1, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H, LDH, N, H, LDH, N, H, + $ LDH, WORK, -1 ) +* +* ==== Optimal workspace = MAX(ZLAQR5, ZLAQR2) ==== +* + LWKOPT = MAX( 3*NSR / 2, INT( WORK( 1 ) ) ) +* +* ==== Quick return in case of workspace query. ==== +* + IF( LWORK.EQ.-1 ) THEN + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) + RETURN + END IF +* +* ==== ZLAHQR/ZLAQR0 crossover point ==== +* + NMIN = ILAENV( 12, 'ZLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NMIN = MAX( NTINY, NMIN ) +* +* ==== Nibble crossover point ==== +* + NIBBLE = ILAENV( 14, 'ZLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + NIBBLE = MAX( 0, NIBBLE ) +* +* ==== Accumulate reflections during ttswp? Use block +* . 2-by-2 structure during matrix-matrix multiply? ==== +* + KACC22 = ILAENV( 16, 'ZLAQR4', JBCMPZ, N, ILO, IHI, LWORK ) + KACC22 = MAX( 0, KACC22 ) + KACC22 = MIN( 2, KACC22 ) +* +* ==== NWMAX = the largest possible deflation window for +* . which there is sufficient workspace. ==== +* + NWMAX = MIN( ( N-1 ) / 3, LWORK / 2 ) + NW = NWMAX +* +* ==== NSMAX = the Largest number of simultaneous shifts +* . for which there is sufficient workspace. ==== +* + NSMAX = MIN( ( N+6 ) / 9, 2*LWORK / 3 ) + NSMAX = NSMAX - MOD( NSMAX, 2 ) +* +* ==== NDFL: an iteration count restarted at deflation. ==== +* + NDFL = 1 +* +* ==== ITMAX = iteration limit ==== +* + ITMAX = MAX( 30, 2*KEXSH )*MAX( 10, ( IHI-ILO+1 ) ) +* +* ==== Last row and column in the active block ==== +* + KBOT = IHI +* +* ==== Main Loop ==== +* + DO 70 IT = 1, ITMAX +* +* ==== Done when KBOT falls below ILO ==== +* + IF( KBOT.LT.ILO ) + $ GO TO 80 +* +* ==== Locate active block ==== +* + DO 10 K = KBOT, ILO + 1, -1 + IF( H( K, K-1 ).EQ.ZERO ) + $ GO TO 20 + 10 CONTINUE + K = ILO + 20 CONTINUE + KTOP = K +* +* ==== Select deflation window size: +* . Typical Case: +* . If possible and advisable, nibble the entire +* . active block. If not, use size MIN(NWR,NWMAX) +* . or MIN(NWR+1,NWMAX) depending upon which has +* . the smaller corresponding subdiagonal entry +* . (a heuristic). +* . +* . Exceptional Case: +* . If there have been no deflations in KEXNW or +* . more iterations, then vary the deflation window +* . size. At first, because, larger windows are, +* . in general, more powerful than smaller ones, +* . rapidly increase the window to the maximum possible. +* . Then, gradually reduce the window size. ==== +* + NH = KBOT - KTOP + 1 + NWUPBD = MIN( NH, NWMAX ) + IF( NDFL.LT.KEXNW ) THEN + NW = MIN( NWUPBD, NWR ) + ELSE + NW = MIN( NWUPBD, 2*NW ) + END IF + IF( NW.LT.NWMAX ) THEN + IF( NW.GE.NH-1 ) THEN + NW = NH + ELSE + KWTOP = KBOT - NW + 1 + IF( CABS1( H( KWTOP, KWTOP-1 ) ).GT. + $ CABS1( H( KWTOP-1, KWTOP-2 ) ) )NW = NW + 1 + END IF + END IF + IF( NDFL.LT.KEXNW ) THEN + NDEC = -1 + ELSE IF( NDEC.GE.0 .OR. NW.GE.NWUPBD ) THEN + NDEC = NDEC + 1 + IF( NW-NDEC.LT.2 ) + $ NDEC = 0 + NW = NW - NDEC + END IF +* +* ==== Aggressive early deflation: +* . split workspace under the subdiagonal into +* . - an nw-by-nw work array V in the lower +* . left-hand-corner, +* . - an NW-by-at-least-NW-but-more-is-better +* . (NW-by-NHO) horizontal work array along +* . the bottom edge, +* . - an at-least-NW-but-more-is-better (NHV-by-NW) +* . vertical work array along the left-hand-edge. +* . ==== +* + KV = N - NW + 1 + KT = NW + 1 + NHO = ( N-NW-1 ) - KT + 1 + KWV = NW + 2 + NVE = ( N-NW ) - KWV + 1 +* +* ==== Aggressive early deflation ==== +* + CALL ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, + $ IHIZ, Z, LDZ, LS, LD, W, H( KV, 1 ), LDH, NHO, + $ H( KV, KT ), LDH, NVE, H( KWV, 1 ), LDH, WORK, + $ LWORK ) +* +* ==== Adjust KBOT accounting for new deflations. ==== +* + KBOT = KBOT - LD +* +* ==== KS points to the shifts. ==== +* + KS = KBOT - LS + 1 +* +* ==== Skip an expensive QR sweep if there is a (partly +* . heuristic) reason to expect that many eigenvalues +* . will deflate without it. Here, the QR sweep is +* . skipped if many eigenvalues have just been deflated +* . or if the remaining active block is small. +* + IF( ( LD.EQ.0 ) .OR. ( ( 100*LD.LE.NW*NIBBLE ) .AND. ( KBOT- + $ KTOP+1.GT.MIN( NMIN, NWMAX ) ) ) ) THEN +* +* ==== NS = nominal number of simultaneous shifts. +* . This may be lowered (slightly) if ZLAQR2 +* . did not provide that many shifts. ==== +* + NS = MIN( NSMAX, NSR, MAX( 2, KBOT-KTOP ) ) + NS = NS - MOD( NS, 2 ) +* +* ==== If there have been no deflations +* . in a multiple of KEXSH iterations, +* . then try exceptional shifts. +* . Otherwise use shifts provided by +* . ZLAQR2 above or from the eigenvalues +* . of a trailing principal submatrix. ==== +* + IF( MOD( NDFL, KEXSH ).EQ.0 ) THEN + KS = KBOT - NS + 1 + DO 30 I = KBOT, KS + 1, -2 + W( I ) = H( I, I ) + WILK1*CABS1( H( I, I-1 ) ) + W( I-1 ) = W( I ) + 30 CONTINUE + ELSE +* +* ==== Got NS/2 or fewer shifts? Use ZLAHQR +* . on a trailing principal submatrix to +* . get more. (Since NS.LE.NSMAX.LE.(N+6)/9, +* . there is enough space below the subdiagonal +* . to fit an NS-by-NS scratch array.) ==== +* + IF( KBOT-KS+1.LE.NS / 2 ) THEN + KS = KBOT - NS + 1 + KT = N - NS + 1 + CALL ZLACPY( 'A', NS, NS, H( KS, KS ), LDH, + $ H( KT, 1 ), LDH ) + CALL ZLAHQR( .false., .false., NS, 1, NS, + $ H( KT, 1 ), LDH, W( KS ), 1, 1, ZDUM, + $ 1, INF ) + KS = KS + INF +* +* ==== In case of a rare QR failure use +* . eigenvalues of the trailing 2-by-2 +* . principal submatrix. Scale to avoid +* . overflows, underflows and subnormals. +* . (The scale factor S can not be zero, +* . because H(KBOT,KBOT-1) is nonzero.) ==== +* + IF( KS.GE.KBOT ) THEN + S = CABS1( H( KBOT-1, KBOT-1 ) ) + + $ CABS1( H( KBOT, KBOT-1 ) ) + + $ CABS1( H( KBOT-1, KBOT ) ) + + $ CABS1( H( KBOT, KBOT ) ) + AA = H( KBOT-1, KBOT-1 ) / S + CC = H( KBOT, KBOT-1 ) / S + BB = H( KBOT-1, KBOT ) / S + DD = H( KBOT, KBOT ) / S + TR2 = ( AA+DD ) / TWO + DET = ( AA-TR2 )*( DD-TR2 ) - BB*CC + RTDISC = SQRT( -DET ) + W( KBOT-1 ) = ( TR2+RTDISC )*S + W( KBOT ) = ( TR2-RTDISC )*S +* + KS = KBOT - 1 + END IF + END IF +* + IF( KBOT-KS+1.GT.NS ) THEN +* +* ==== Sort the shifts (Helps a little) ==== +* + SORTED = .false. + DO 50 K = KBOT, KS + 1, -1 + IF( SORTED ) + $ GO TO 60 + SORTED = .true. + DO 40 I = KS, K - 1 + IF( CABS1( W( I ) ).LT.CABS1( W( I+1 ) ) ) + $ THEN + SORTED = .false. + SWAP = W( I ) + W( I ) = W( I+1 ) + W( I+1 ) = SWAP + END IF + 40 CONTINUE + 50 CONTINUE + 60 CONTINUE + END IF + END IF +* +* ==== If there are only two shifts, then use +* . only one. ==== +* + IF( KBOT-KS+1.EQ.2 ) THEN + IF( CABS1( W( KBOT )-H( KBOT, KBOT ) ).LT. + $ CABS1( W( KBOT-1 )-H( KBOT, KBOT ) ) ) THEN + W( KBOT-1 ) = W( KBOT ) + ELSE + W( KBOT ) = W( KBOT-1 ) + END IF + END IF +* +* ==== Use up to NS of the the smallest magnatiude +* . shifts. If there aren't NS shifts available, +* . then use them all, possibly dropping one to +* . make the number of shifts even. ==== +* + NS = MIN( NS, KBOT-KS+1 ) + NS = NS - MOD( NS, 2 ) + KS = KBOT - NS + 1 +* +* ==== Small-bulge multi-shift QR sweep: +* . split workspace under the subdiagonal into +* . - a KDU-by-KDU work array U in the lower +* . left-hand-corner, +* . - a KDU-by-at-least-KDU-but-more-is-better +* . (KDU-by-NHo) horizontal work array WH along +* . the bottom edge, +* . - and an at-least-KDU-but-more-is-better-by-KDU +* . (NVE-by-KDU) vertical work WV arrow along +* . the left-hand-edge. ==== +* + KDU = 3*NS - 3 + KU = N - KDU + 1 + KWH = KDU + 1 + NHO = ( N-KDU+1-4 ) - ( KDU+1 ) + 1 + KWV = KDU + 4 + NVE = N - KDU - KWV + 1 +* +* ==== Small-bulge multi-shift QR sweep ==== +* + CALL ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NS, + $ W( KS ), H, LDH, ILOZ, IHIZ, Z, LDZ, WORK, + $ 3, H( KU, 1 ), LDH, NVE, H( KWV, 1 ), LDH, + $ NHO, H( KU, KWH ), LDH ) + END IF +* +* ==== Note progress (or the lack of it). ==== +* + IF( LD.GT.0 ) THEN + NDFL = 1 + ELSE + NDFL = NDFL + 1 + END IF +* +* ==== End of main loop ==== + 70 CONTINUE +* +* ==== Iteration limit exceeded. Set INFO to show where +* . the problem occurred and exit. ==== +* + INFO = KBOT + 80 CONTINUE + END IF +* +* ==== Return the optimal value of LWORK. ==== +* + WORK( 1 ) = DCMPLX( LWKOPT, 0 ) +* +* ==== End of ZLAQR4 ==== +* + END diff --git a/dspl/liblapack/SRC/zlaqr5.f b/dspl/liblapack/SRC/zlaqr5.f new file mode 100644 index 0000000..0dfbce8 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqr5.f @@ -0,0 +1,908 @@ +*> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQR5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, +* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, +* WV, LDWV, NH, WH, LDWH ) +* +* .. Scalar Arguments .. +* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, +* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV +* LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. +* COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), +* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQR5, called by ZLAQR0, performs a +*> single small-bulge multi-shift QR sweep. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTT +*> \verbatim +*> WANTT is LOGICAL +*> WANTT = .true. if the triangular Schur factor +*> is being computed. WANTT is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> WANTZ = .true. if the unitary Schur factor is being +*> computed. WANTZ is set to .false. otherwise. +*> \endverbatim +*> +*> \param[in] KACC22 +*> \verbatim +*> KACC22 is INTEGER with value 0, 1, or 2. +*> Specifies the computation mode of far-from-diagonal +*> orthogonal updates. +*> = 0: ZLAQR5 does not accumulate reflections and does not +*> use matrix-matrix multiply to update far-from-diagonal +*> matrix entries. +*> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries. +*> = 2: ZLAQR5 accumulates reflections, uses matrix-matrix +*> multiply to update the far-from-diagonal matrix entries, +*> and takes advantage of 2-by-2 block structure during +*> matrix multiplies. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> N is the order of the Hessenberg matrix H upon which this +*> subroutine operates. +*> \endverbatim +*> +*> \param[in] KTOP +*> \verbatim +*> KTOP is INTEGER +*> \endverbatim +*> +*> \param[in] KBOT +*> \verbatim +*> KBOT is INTEGER +*> These are the first and last rows and columns of an +*> isolated diagonal block upon which the QR sweep is to be +*> applied. It is assumed without a check that +*> either KTOP = 1 or H(KTOP,KTOP-1) = 0 +*> and +*> either KBOT = N or H(KBOT+1,KBOT) = 0. +*> \endverbatim +*> +*> \param[in] NSHFTS +*> \verbatim +*> NSHFTS is INTEGER +*> NSHFTS gives the number of simultaneous shifts. NSHFTS +*> must be positive and even. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is COMPLEX*16 array, dimension (NSHFTS) +*> S contains the shifts of origin that define the multi- +*> shift QR sweep. On output S may be reordered. +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 array, dimension (LDH,N) +*> On input H contains a Hessenberg matrix. On output a +*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied +*> to the isolated diagonal block in rows and columns KTOP +*> through KBOT. +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> LDH is the leading dimension of H just as declared in the +*> calling procedure. LDH.GE.MAX(1,N). +*> \endverbatim +*> +*> \param[in] ILOZ +*> \verbatim +*> ILOZ is INTEGER +*> \endverbatim +*> +*> \param[in] IHIZ +*> \verbatim +*> IHIZ is INTEGER +*> Specify the rows of Z to which transformations must be +*> applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,IHIZ) +*> If WANTZ = .TRUE., then the QR Sweep unitary +*> similarity transformation is accumulated into +*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right. +*> If WANTZ = .FALSE., then Z is unreferenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> LDA is the leading dimension of Z just as declared in +*> the calling procedure. LDZ.GE.N. +*> \endverbatim +*> +*> \param[out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2) +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> LDV is the leading dimension of V as declared in the +*> calling procedure. LDV.GE.3. +*> \endverbatim +*> +*> \param[out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> LDU is the leading dimension of U just as declared in the +*> in the calling subroutine. LDU.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NH +*> \verbatim +*> NH is INTEGER +*> NH is the number of columns in array WH available for +*> workspace. NH.GE.1. +*> \endverbatim +*> +*> \param[out] WH +*> \verbatim +*> WH is COMPLEX*16 array, dimension (LDWH,NH) +*> \endverbatim +*> +*> \param[in] LDWH +*> \verbatim +*> LDWH is INTEGER +*> Leading dimension of WH just as declared in the +*> calling procedure. LDWH.GE.3*NSHFTS-3. +*> \endverbatim +*> +*> \param[in] NV +*> \verbatim +*> NV is INTEGER +*> NV is the number of rows in WV agailable for workspace. +*> NV.GE.1. +*> \endverbatim +*> +*> \param[out] WV +*> \verbatim +*> WV is COMPLEX*16 array, dimension (LDWV,3*NSHFTS-3) +*> \endverbatim +*> +*> \param[in] LDWV +*> \verbatim +*> LDWV is INTEGER +*> LDWV is the leading dimension of WV as declared in the +*> in the calling subroutine. LDWV.GE.NV. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Karen Braman and Ralph Byers, Department of Mathematics, +*> University of Kansas, USA +* +*> \par References: +* ================ +*> +*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR +*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3 +*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages +*> 929--947, 2002. +*> +* ===================================================================== + SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S, + $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, + $ WV, LDWV, NH, WH, LDWH ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV, + $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV + LOGICAL WANTT, WANTZ +* .. +* .. Array Arguments .. + COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ), + $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * ) +* .. +* +* ================================================================ +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), + $ ONE = ( 1.0d0, 0.0d0 ) ) + DOUBLE PRECISION RZERO, RONE + PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) +* .. +* .. Local Scalars .. + COMPLEX*16 ALPHA, BETA, CDUM, REFSUM + DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL, + $ SMLNUM, TST1, TST2, ULP + INTEGER I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN, + $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS, + $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL, + $ NS, NU + LOGICAL ACCUM, BLK22, BMP22 +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Intrinsic Functions .. +* + INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, MIN, MOD +* .. +* .. Local Arrays .. + COMPLEX*16 VT( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, ZGEMM, ZLACPY, ZLAQR1, ZLARFG, ZLASET, + $ ZTRMM +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* ==== If there are no shifts, then there is nothing to do. ==== +* + IF( NSHFTS.LT.2 ) + $ RETURN +* +* ==== If the active block is empty or 1-by-1, then there +* . is nothing to do. ==== +* + IF( KTOP.GE.KBOT ) + $ RETURN +* +* ==== NSHFTS is supposed to be even, but if it is odd, +* . then simply reduce it by one. ==== +* + NS = NSHFTS - MOD( NSHFTS, 2 ) +* +* ==== Machine constants for deflation ==== +* + SAFMIN = DLAMCH( 'SAFE MINIMUM' ) + SAFMAX = RONE / SAFMIN + CALL DLABAD( SAFMIN, SAFMAX ) + ULP = DLAMCH( 'PRECISION' ) + SMLNUM = SAFMIN*( DBLE( N ) / ULP ) +* +* ==== Use accumulated reflections to update far-from-diagonal +* . entries ? ==== +* + ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 ) +* +* ==== If so, exploit the 2-by-2 block structure? ==== +* + BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 ) +* +* ==== clear trash ==== +* + IF( KTOP+2.LE.KBOT ) + $ H( KTOP+2, KTOP ) = ZERO +* +* ==== NBMPS = number of 2-shift bulges in the chain ==== +* + NBMPS = NS / 2 +* +* ==== KDU = width of slab ==== +* + KDU = 6*NBMPS - 3 +* +* ==== Create and chase chains of NBMPS bulges ==== +* + DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2 + NDCOL = INCOL + KDU + IF( ACCUM ) + $ CALL ZLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU ) +* +* ==== Near-the-diagonal bulge chase. The following loop +* . performs the near-the-diagonal part of a small bulge +* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal +* . chunk extends from column INCOL to column NDCOL +* . (including both column INCOL and column NDCOL). The +* . following loop chases a 3*NBMPS column long chain of +* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL +* . may be less than KTOP and and NDCOL may be greater than +* . KBOT indicating phantom columns from which to chase +* . bulges before they are actually introduced or to which +* . to chase bulges beyond column KBOT.) ==== +* + DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 ) +* +* ==== Bulges number MTOP to MBOT are active double implicit +* . shift bulges. There may or may not also be small +* . 2-by-2 bulge, if there is room. The inactive bulges +* . (if any) must wait until the active bulges have moved +* . down the diagonal to make room. The phantom matrix +* . paradigm described above helps keep track. ==== +* + MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 ) + MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 ) + M22 = MBOT + 1 + BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ. + $ ( KBOT-2 ) +* +* ==== Generate reflections to chase the chain right +* . one column. (The minimum value of K is KTOP-1.) ==== +* + DO 10 M = MTOP, MBOT + K = KRCOL + 3*( M-1 ) + IF( K.EQ.KTOP-1 ) THEN + CALL ZLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ), + $ S( 2*M ), V( 1, M ) ) + ALPHA = V( 1, M ) + CALL ZLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M ) = H( K+2, K ) + V( 3, M ) = H( K+3, K ) + CALL ZLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) ) +* +* ==== A Bulge may collapse because of vigilant +* . deflation or destructive underflow. In the +* . underflow case, try the two-small-subdiagonals +* . trick to try to reinflate the bulge. ==== +* + IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE. + $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN +* +* ==== Typical case: not collapsed (yet). ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Atypical case: collapsed. Attempt to +* . reintroduce ignoring H(K+1,K) and H(K+2,K). +* . If the fill resulting from the new +* . reflector is too large, then abandon it. +* . Otherwise, use the new one. ==== +* + CALL ZLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ), + $ S( 2*M ), VT ) + ALPHA = VT( 1 ) + CALL ZLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) ) + REFSUM = DCONJG( VT( 1 ) )* + $ ( H( K+1, K )+DCONJG( VT( 2 ) )* + $ H( K+2, K ) ) +* + IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+ + $ CABS1( REFSUM*VT( 3 ) ).GT.ULP* + $ ( CABS1( H( K, K ) )+CABS1( H( K+1, + $ K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN +* +* ==== Starting a new bulge here would +* . create non-negligible fill. Use +* . the old one with trepidation. ==== +* + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + ELSE +* +* ==== Stating a new bulge here would +* . create only negligible fill. +* . Replace the old reflector with +* . the new one. ==== +* + H( K+1, K ) = H( K+1, K ) - REFSUM + H( K+2, K ) = ZERO + H( K+3, K ) = ZERO + V( 1, M ) = VT( 1 ) + V( 2, M ) = VT( 2 ) + V( 3, M ) = VT( 3 ) + END IF + END IF + END IF + 10 CONTINUE +* +* ==== Generate a 2-by-2 reflection, if needed. ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF( K.EQ.KTOP-1 ) THEN + CALL ZLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ), + $ S( 2*M22 ), V( 1, M22 ) ) + BETA = V( 1, M22 ) + CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + ELSE + BETA = H( K+1, K ) + V( 2, M22 ) = H( K+2, K ) + CALL ZLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) ) + H( K+1, K ) = BETA + H( K+2, K ) = ZERO + END IF + END IF +* +* ==== Multiply H by reflections from the left ==== +* + IF( ACCUM ) THEN + JBOT = MIN( NDCOL, KBOT ) + ELSE IF( WANTT ) THEN + JBOT = N + ELSE + JBOT = KBOT + END IF + DO 30 J = MAX( KTOP, KRCOL ), JBOT + MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 ) + DO 20 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = DCONJG( V( 1, M ) )* + $ ( H( K+1, J )+DCONJG( V( 2, M ) )* + $ H( K+2, J )+DCONJG( V( 3, M ) )*H( K+3, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M ) + H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M ) + 20 CONTINUE + 30 CONTINUE + IF( BMP22 ) THEN + K = KRCOL + 3*( M22-1 ) + DO 40 J = MAX( K+1, KTOP ), JBOT + REFSUM = DCONJG( V( 1, M22 ) )* + $ ( H( K+1, J )+DCONJG( V( 2, M22 ) )* + $ H( K+2, J ) ) + H( K+1, J ) = H( K+1, J ) - REFSUM + H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 ) + 40 CONTINUE + END IF +* +* ==== Multiply H by reflections from the right. +* . Delay filling in the last row until the +* . vigilant deflation check is complete. ==== +* + IF( ACCUM ) THEN + JTOP = MAX( KTOP, INCOL ) + ELSE IF( WANTT ) THEN + JTOP = 1 + ELSE + JTOP = KTOP + END IF + DO 80 M = MTOP, MBOT + IF( V( 1, M ).NE.ZERO ) THEN + K = KRCOL + 3*( M-1 ) + DO 50 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )* + $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*DCONJG( V( 2, M ) ) + H( J, K+3 ) = H( J, K+3 ) - + $ REFSUM*DCONJG( V( 3, M ) ) + 50 CONTINUE +* + IF( ACCUM ) THEN +* +* ==== Accumulate U. (If necessary, update Z later +* . with with an efficient matrix-matrix +* . multiply.) ==== +* + KMS = K - INCOL + DO 60 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )* + $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*DCONJG( V( 2, M ) ) + U( J, KMS+3 ) = U( J, KMS+3 ) - + $ REFSUM*DCONJG( V( 3, M ) ) + 60 CONTINUE + ELSE IF( WANTZ ) THEN +* +* ==== U is not accumulated, so update Z +* . now by multiplying by reflections +* . from the right. ==== +* + DO 70 J = ILOZ, IHIZ + REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )* + $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*DCONJG( V( 2, M ) ) + Z( J, K+3 ) = Z( J, K+3 ) - + $ REFSUM*DCONJG( V( 3, M ) ) + 70 CONTINUE + END IF + END IF + 80 CONTINUE +* +* ==== Special case: 2-by-2 reflection (if needed) ==== +* + K = KRCOL + 3*( M22-1 ) + IF( BMP22 ) THEN + IF ( V( 1, M22 ).NE.ZERO ) THEN + DO 90 J = JTOP, MIN( KBOT, K+3 ) + REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )* + $ H( J, K+2 ) ) + H( J, K+1 ) = H( J, K+1 ) - REFSUM + H( J, K+2 ) = H( J, K+2 ) - + $ REFSUM*DCONJG( V( 2, M22 ) ) + 90 CONTINUE +* + IF( ACCUM ) THEN + KMS = K - INCOL + DO 100 J = MAX( 1, KTOP-INCOL ), KDU + REFSUM = V( 1, M22 )*( U( J, KMS+1 )+ + $ V( 2, M22 )*U( J, KMS+2 ) ) + U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM + U( J, KMS+2 ) = U( J, KMS+2 ) - + $ REFSUM*DCONJG( V( 2, M22 ) ) + 100 CONTINUE + ELSE IF( WANTZ ) THEN + DO 110 J = ILOZ, IHIZ + REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )* + $ Z( J, K+2 ) ) + Z( J, K+1 ) = Z( J, K+1 ) - REFSUM + Z( J, K+2 ) = Z( J, K+2 ) - + $ REFSUM*DCONJG( V( 2, M22 ) ) + 110 CONTINUE + END IF + END IF + END IF +* +* ==== Vigilant deflation check ==== +* + MSTART = MTOP + IF( KRCOL+3*( MSTART-1 ).LT.KTOP ) + $ MSTART = MSTART + 1 + MEND = MBOT + IF( BMP22 ) + $ MEND = MEND + 1 + IF( KRCOL.EQ.KBOT-2 ) + $ MEND = MEND + 1 + DO 120 M = MSTART, MEND + K = MIN( KBOT-1, KRCOL+3*( M-1 ) ) +* +* ==== The following convergence test requires that +* . the tradition small-compared-to-nearby-diagonals +* . criterion and the Ahues & Tisseur (LAWN 122, 1997) +* . criteria both be satisfied. The latter improves +* . accuracy in some examples. Falling back on an +* . alternate convergence criterion when TST1 or TST2 +* . is zero (as done here) is traditional but probably +* . unnecessary. ==== +* + IF( H( K+1, K ).NE.ZERO ) THEN + TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) ) + IF( TST1.EQ.RZERO ) THEN + IF( K.GE.KTOP+1 ) + $ TST1 = TST1 + CABS1( H( K, K-1 ) ) + IF( K.GE.KTOP+2 ) + $ TST1 = TST1 + CABS1( H( K, K-2 ) ) + IF( K.GE.KTOP+3 ) + $ TST1 = TST1 + CABS1( H( K, K-3 ) ) + IF( K.LE.KBOT-2 ) + $ TST1 = TST1 + CABS1( H( K+2, K+1 ) ) + IF( K.LE.KBOT-3 ) + $ TST1 = TST1 + CABS1( H( K+3, K+1 ) ) + IF( K.LE.KBOT-4 ) + $ TST1 = TST1 + CABS1( H( K+4, K+1 ) ) + END IF + IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) ) + $ THEN + H12 = MAX( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H21 = MIN( CABS1( H( K+1, K ) ), + $ CABS1( H( K, K+1 ) ) ) + H11 = MAX( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + H22 = MIN( CABS1( H( K+1, K+1 ) ), + $ CABS1( H( K, K )-H( K+1, K+1 ) ) ) + SCL = H11 + H12 + TST2 = H22*( H11 / SCL ) +* + IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE. + $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO + END IF + END IF + 120 CONTINUE +* +* ==== Fill in the last row of each bulge. ==== +* + MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 ) + DO 130 M = MTOP, MEND + K = KRCOL + 3*( M-1 ) + REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 ) + H( K+4, K+1 ) = -REFSUM + H( K+4, K+2 ) = -REFSUM*DCONJG( V( 2, M ) ) + H( K+4, K+3 ) = H( K+4, K+3 ) - + $ REFSUM*DCONJG( V( 3, M ) ) + 130 CONTINUE +* +* ==== End of near-the-diagonal bulge chase. ==== +* + 140 CONTINUE +* +* ==== Use U (if accumulated) to update far-from-diagonal +* . entries in H. If required, use U to update Z as +* . well. ==== +* + IF( ACCUM ) THEN + IF( WANTT ) THEN + JTOP = 1 + JBOT = N + ELSE + JTOP = KTOP + JBOT = KBOT + END IF + IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR. + $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN +* +* ==== Updates not exploiting the 2-by-2 block +* . structure of U. K1 and NU keep track of +* . the location and size of U in the special +* . cases of introducing bulges and chasing +* . bulges off the bottom. In these special +* . cases and in case the number of shifts +* . is NS = 2, there is no 2-by-2 block +* . structure to exploit. ==== +* + K1 = MAX( 1, KTOP-INCOL ) + NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1 +* +* ==== Horizontal Multiply ==== +* + DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) + CALL ZGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ), + $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH, + $ LDWH ) + CALL ZLACPY( 'ALL', NU, JLEN, WH, LDWH, + $ H( INCOL+K1, JCOL ), LDH ) + 150 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV + JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW ) + CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ H( JROW, INCOL+K1 ), LDH ) + 160 CONTINUE +* +* ==== Z multiply (also vertical) ==== +* + IF( WANTZ ) THEN + DO 170 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) + CALL ZGEMM( 'N', 'N', JLEN, NU, NU, ONE, + $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ), + $ LDU, ZERO, WV, LDWV ) + CALL ZLACPY( 'ALL', JLEN, NU, WV, LDWV, + $ Z( JROW, INCOL+K1 ), LDZ ) + 170 CONTINUE + END IF + ELSE +* +* ==== Updates exploiting U's 2-by-2 block structure. +* . (I2, I4, J2, J4 are the last rows and columns +* . of the blocks.) ==== +* + I2 = ( KDU+1 ) / 2 + I4 = KDU + J2 = I4 - I2 + J4 = KDU +* +* ==== KZS and KNZ deal with the band of zeros +* . along the diagonal of one of the triangular +* . blocks. ==== +* + KZS = ( J4-J2 ) - ( NS+1 ) + KNZ = NS + 1 +* +* ==== Horizontal multiply ==== +* + DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH + JLEN = MIN( NH, JBOT-JCOL+1 ) +* +* ==== Copy bottom of H to top+KZS of scratch ==== +* (The first KZS rows get multiplied by zero.) ==== +* + CALL ZLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ), + $ LDH, WH( KZS+1, 1 ), LDWH ) +* +* ==== Multiply by U21**H ==== +* + CALL ZLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH ) + CALL ZTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE, + $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ), + $ LDWH ) +* +* ==== Multiply top of H by U11**H ==== +* + CALL ZGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU, + $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH ) +* +* ==== Copy top of H to bottom of WH ==== +* + CALL ZLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U21**H ==== +* + CALL ZTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE, + $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH ) +* +* ==== Multiply by U22 ==== +* + CALL ZGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE, + $ U( J2+1, I2+1 ), LDU, + $ H( INCOL+1+J2, JCOL ), LDH, ONE, + $ WH( I2+1, 1 ), LDWH ) +* +* ==== Copy it back ==== +* + CALL ZLACPY( 'ALL', KDU, JLEN, WH, LDWH, + $ H( INCOL+1, JCOL ), LDH ) + 180 CONTINUE +* +* ==== Vertical multiply ==== +* + DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV + JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW ) +* +* ==== Copy right of H to scratch (the first KZS +* . columns get multiplied by zero) ==== +* + CALL ZLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ), + $ LDH, WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV ) + CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV, + $ LDWV ) +* +* ==== Copy left of H to right of scratch ==== +* + CALL ZLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ H( JROW, INCOL+1+J2 ), LDH, + $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Copy it back ==== +* + CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ H( JROW, INCOL+1 ), LDH ) + 190 CONTINUE +* +* ==== Multiply Z (also vertical) ==== +* + IF( WANTZ ) THEN + DO 200 JROW = ILOZ, IHIZ, NV + JLEN = MIN( NV, IHIZ-JROW+1 ) +* +* ==== Copy right of Z to left of scratch (first +* . KZS columns get multiplied by zero) ==== +* + CALL ZLACPY( 'ALL', JLEN, KNZ, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ WV( 1, 1+KZS ), LDWV ) +* +* ==== Multiply by U12 ==== +* + CALL ZLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, + $ LDWV ) + CALL ZTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE, + $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ), + $ LDWV ) +* +* ==== Multiply by U11 ==== +* + CALL ZGEMM( 'N', 'N', JLEN, I2, J2, ONE, + $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE, + $ WV, LDWV ) +* +* ==== Copy left of Z to right of scratch ==== +* + CALL ZLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ), + $ LDZ, WV( 1, 1+I2 ), LDWV ) +* +* ==== Multiply by U21 ==== +* + CALL ZTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE, + $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), + $ LDWV ) +* +* ==== Multiply by U22 ==== +* + CALL ZGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE, + $ Z( JROW, INCOL+1+J2 ), LDZ, + $ U( J2+1, I2+1 ), LDU, ONE, + $ WV( 1, 1+I2 ), LDWV ) +* +* ==== Copy the result back to Z ==== +* + CALL ZLACPY( 'ALL', JLEN, KDU, WV, LDWV, + $ Z( JROW, INCOL+1 ), LDZ ) + 200 CONTINUE + END IF + END IF + END IF + 210 CONTINUE +* +* ==== End of ZLAQR5 ==== +* + END diff --git a/dspl/liblapack/SRC/zlaqsb.f b/dspl/liblapack/SRC/zlaqsb.f new file mode 100644 index 0000000..3d3912c --- /dev/null +++ b/dspl/liblapack/SRC/zlaqsb.f @@ -0,0 +1,228 @@ +*> \brief \b ZLAQSB scales a symmetric/Hermitian band matrix, using scaling factors computed by spbequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQSB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER KD, LDAB, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQSB equilibrates a symmetric band matrix A using the scaling +*> factors in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the symmetric band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored in band format. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = MAX( 1, J-KD ), J + AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, MIN( N, J+KD ) + AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSB +* + END diff --git a/dspl/liblapack/SRC/zlaqsp.f b/dspl/liblapack/SRC/zlaqsp.f new file mode 100644 index 0000000..339408f --- /dev/null +++ b/dspl/liblapack/SRC/zlaqsp.f @@ -0,0 +1,214 @@ +*> \brief \b ZLAQSP scales a symmetric/Hermitian matrix in packed storage, using scaling factors computed by sppequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQSP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQSP equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the equilibrated matrix: diag(S) * A * diag(S), in +*> the same storage format as A. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, JC + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + JC = 1 + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 ) + 10 CONTINUE + JC = JC + J + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + JC = 1 + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J ) + 30 CONTINUE + JC = JC + N - J + 1 + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSP +* + END diff --git a/dspl/liblapack/SRC/zlaqsy.f b/dspl/liblapack/SRC/zlaqsy.f new file mode 100644 index 0000000..0118201 --- /dev/null +++ b/dspl/liblapack/SRC/zlaqsy.f @@ -0,0 +1,218 @@ +*> \brief \b ZLAQSY scales a symmetric/Hermitian matrix, using scaling factors computed by spoequ. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAQSY + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, UPLO +* INTEGER LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAQSY equilibrates a symmetric matrix A using the scaling factors +*> in the vector S. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if EQUED = 'Y', the equilibrated matrix: +*> diag(S) * A * diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. +*> \endverbatim +*> +*> \param[in] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> Ratio of the smallest S(i) to the largest S(i). +*> \endverbatim +*> +*> \param[in] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix entry. +*> \endverbatim +*> +*> \param[out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies whether or not equilibration was done. +*> = 'N': No equilibration. +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> THRESH is a threshold value used to decide if scaling should be done +*> based on the ratio of the scaling factors. If SCOND < THRESH, +*> scaling is done. +*> +*> LARGE and SMALL are threshold values used to decide if scaling should +*> be done based on the absolute size of the largest matrix element. +*> If AMAX > LARGE or AMAX < SMALL, scaling is done. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + SUBROUTINE ZLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER EQUED, UPLO + INTEGER LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, THRESH + PARAMETER ( ONE = 1.0D+0, THRESH = 0.1D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION CJ, LARGE, SMALL +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + EQUED = 'N' + RETURN + END IF +* +* Initialize LARGE and SMALL. +* + SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' ) + LARGE = ONE / SMALL +* + IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN +* +* No equilibration +* + EQUED = 'N' + ELSE +* +* Replace A by diag(S) * A * diag(S). +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Upper triangle of A is stored. +* + DO 20 J = 1, N + CJ = S( J ) + DO 10 I = 1, J + A( I, J ) = CJ*S( I )*A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE +* +* Lower triangle of A is stored. +* + DO 40 J = 1, N + CJ = S( J ) + DO 30 I = J, N + A( I, J ) = CJ*S( I )*A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + EQUED = 'Y' + END IF +* + RETURN +* +* End of ZLAQSY +* + END diff --git a/dspl/liblapack/SRC/zlar1v.f b/dspl/liblapack/SRC/zlar1v.f new file mode 100644 index 0000000..bb3a66e --- /dev/null +++ b/dspl/liblapack/SRC/zlar1v.f @@ -0,0 +1,488 @@ +*> \brief \b ZLAR1V computes the (scaled) r-th column of the inverse of the submatrix in rows b1 through bn of the tridiagonal matrix LDLT - λI. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAR1V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, +* PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, +* R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* .. Scalar Arguments .. +* LOGICAL WANTNC +* INTEGER B1, BN, N, NEGCNT, R +* DOUBLE PRECISION GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, +* $ RQCORR, ZTZ +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ) +* DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ), +* $ WORK( * ) +* COMPLEX*16 Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAR1V computes the (scaled) r-th column of the inverse of +*> the sumbmatrix in rows B1 through BN of the tridiagonal matrix +*> L D L**T - sigma I. When sigma is close to an eigenvalue, the +*> computed vector is an accurate eigenvector. Usually, r corresponds +*> to the index where the eigenvector is largest in magnitude. +*> The following steps accomplish this computation : +*> (a) Stationary qd transform, L D L**T - sigma I = L(+) D(+) L(+)**T, +*> (b) Progressive qd transform, L D L**T - sigma I = U(-) D(-) U(-)**T, +*> (c) Computation of the diagonal elements of the inverse of +*> L D L**T - sigma I by combining the above transforms, and choosing +*> r as the index where the diagonal of the inverse is (one of the) +*> largest in magnitude. +*> (d) Computation of the (scaled) r-th column of the inverse using the +*> twisted factorization obtained by combining the top part of the +*> the stationary and the bottom part of the progressive transform. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix L D L**T. +*> \endverbatim +*> +*> \param[in] B1 +*> \verbatim +*> B1 is INTEGER +*> First index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] BN +*> \verbatim +*> BN is INTEGER +*> Last index of the submatrix of L D L**T. +*> \endverbatim +*> +*> \param[in] LAMBDA +*> \verbatim +*> LAMBDA is DOUBLE PRECISION +*> The shift. In order to compute an accurate eigenvector, +*> LAMBDA should be a good approximation to an eigenvalue +*> of L D L**T. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the unit bidiagonal matrix +*> L, in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D. +*> \endverbatim +*> +*> \param[in] LD +*> \verbatim +*> LD is DOUBLE PRECISION array, dimension (N-1) +*> The n-1 elements L(i)*D(i). +*> \endverbatim +*> +*> \param[in] LLD +*> \verbatim +*> LLD is DOUBLE PRECISION array, dimension (N-1) +*> The n-1 elements L(i)*L(i)*D(i). +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] GAPTOL +*> \verbatim +*> GAPTOL is DOUBLE PRECISION +*> Tolerance that indicates when eigenvector entries are negligible +*> w.r.t. their contribution to the residual. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (N) +*> On input, all entries of Z must be set to 0. +*> On output, Z contains the (scaled) r-th column of the +*> inverse. The scaling is such that Z(R) equals 1. +*> \endverbatim +*> +*> \param[in] WANTNC +*> \verbatim +*> WANTNC is LOGICAL +*> Specifies whether NEGCNT has to be computed. +*> \endverbatim +*> +*> \param[out] NEGCNT +*> \verbatim +*> NEGCNT is INTEGER +*> If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin +*> in the matrix factorization L D L**T, and NEGCNT = -1 otherwise. +*> \endverbatim +*> +*> \param[out] ZTZ +*> \verbatim +*> ZTZ is DOUBLE PRECISION +*> The square of the 2-norm of Z. +*> \endverbatim +*> +*> \param[out] MINGMA +*> \verbatim +*> MINGMA is DOUBLE PRECISION +*> The reciprocal of the largest (in magnitude) diagonal +*> element of the inverse of L D L**T - sigma I. +*> \endverbatim +*> +*> \param[in,out] R +*> \verbatim +*> R is INTEGER +*> The twist index for the twisted factorization used to +*> compute Z. +*> On input, 0 <= R <= N. If R is input as 0, R is set to +*> the index where (L D L**T - sigma I)^{-1} is largest +*> in magnitude. If 1 <= R <= N, R is unchanged. +*> On output, R contains the twist index used to compute Z. +*> Ideally, R designates the position of the maximum entry in the +*> eigenvector. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension (2) +*> The support of the vector in Z, i.e., the vector Z is +*> nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). +*> \endverbatim +*> +*> \param[out] NRMINV +*> \verbatim +*> NRMINV is DOUBLE PRECISION +*> NRMINV = 1/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RESID +*> \verbatim +*> RESID is DOUBLE PRECISION +*> The residual of the FP vector. +*> RESID = ABS( MINGMA )/SQRT( ZTZ ) +*> \endverbatim +*> +*> \param[out] RQCORR +*> \verbatim +*> RQCORR is DOUBLE PRECISION +*> The Rayleigh Quotient correction to LAMBDA. +*> RQCORR = MINGMA*TMP +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE ZLAR1V( N, B1, BN, LAMBDA, D, L, LD, LLD, + $ PIVMIN, GAPTOL, Z, WANTNC, NEGCNT, ZTZ, MINGMA, + $ R, ISUPPZ, NRMINV, RESID, RQCORR, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTNC + INTEGER B1, BN, N, NEGCNT, R + DOUBLE PRECISION GAPTOL, LAMBDA, MINGMA, NRMINV, PIVMIN, RESID, + $ RQCORR, ZTZ +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ) + DOUBLE PRECISION D( * ), L( * ), LD( * ), LLD( * ), + $ WORK( * ) + COMPLEX*16 Z( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) + +* .. +* .. Local Scalars .. + LOGICAL SAWNAN1, SAWNAN2 + INTEGER I, INDLPL, INDP, INDS, INDUMN, NEG1, NEG2, R1, + $ R2 + DOUBLE PRECISION DMINUS, DPLUS, EPS, S, TMP +* .. +* .. External Functions .. + LOGICAL DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL DISNAN, DLAMCH +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE +* .. +* .. Executable Statements .. +* + EPS = DLAMCH( 'Precision' ) + + + IF( R.EQ.0 ) THEN + R1 = B1 + R2 = BN + ELSE + R1 = R + R2 = R + END IF + +* Storage for LPLUS + INDLPL = 0 +* Storage for UMINUS + INDUMN = N + INDS = 2*N + 1 + INDP = 3*N + 1 + + IF( B1.EQ.1 ) THEN + WORK( INDS ) = ZERO + ELSE + WORK( INDS+B1-1 ) = LLD( B1-1 ) + END IF + +* +* Compute the stationary transform (using the differential form) +* until the index R2. +* + SAWNAN1 = .FALSE. + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 50 I = B1, R1 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 50 CONTINUE + SAWNAN1 = DISNAN( S ) + IF( SAWNAN1 ) GOTO 60 + DO 51 I = R1, R2 - 1 + DPLUS = D( I ) + S + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + S = WORK( INDS+I ) - LAMBDA + 51 CONTINUE + SAWNAN1 = DISNAN( S ) +* + 60 CONTINUE + IF( SAWNAN1 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG1 = 0 + S = WORK( INDS+B1-1 ) - LAMBDA + DO 70 I = B1, R1 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + IF(DPLUS.LT.ZERO) NEG1 = NEG1 + 1 + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 70 CONTINUE + DO 71 I = R1, R2 - 1 + DPLUS = D( I ) + S + IF(ABS(DPLUS).LT.PIVMIN) DPLUS = -PIVMIN + WORK( INDLPL+I ) = LD( I ) / DPLUS + WORK( INDS+I ) = S*WORK( INDLPL+I )*L( I ) + IF( WORK( INDLPL+I ).EQ.ZERO ) + $ WORK( INDS+I ) = LLD( I ) + S = WORK( INDS+I ) - LAMBDA + 71 CONTINUE + END IF +* +* Compute the progressive transform (using the differential form) +* until the index R1 +* + SAWNAN2 = .FALSE. + NEG2 = 0 + WORK( INDP+BN-1 ) = D( BN ) - LAMBDA + DO 80 I = BN - 1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + 80 CONTINUE + TMP = WORK( INDP+R1-1 ) + SAWNAN2 = DISNAN( TMP ) + + IF( SAWNAN2 ) THEN +* Runs a slower version of the above loop if a NaN is detected + NEG2 = 0 + DO 100 I = BN-1, R1, -1 + DMINUS = LLD( I ) + WORK( INDP+I ) + IF(ABS(DMINUS).LT.PIVMIN) DMINUS = -PIVMIN + TMP = D( I ) / DMINUS + IF(DMINUS.LT.ZERO) NEG2 = NEG2 + 1 + WORK( INDUMN+I ) = L( I )*TMP + WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - LAMBDA + IF( TMP.EQ.ZERO ) + $ WORK( INDP+I-1 ) = D( I ) - LAMBDA + 100 CONTINUE + END IF +* +* Find the index (from R1 to R2) of the largest (in magnitude) +* diagonal element of the inverse +* + MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 ) + IF( MINGMA.LT.ZERO ) NEG1 = NEG1 + 1 + IF( WANTNC ) THEN + NEGCNT = NEG1 + NEG2 + ELSE + NEGCNT = -1 + ENDIF + IF( ABS(MINGMA).EQ.ZERO ) + $ MINGMA = EPS*WORK( INDS+R1-1 ) + R = R1 + DO 110 I = R1, R2 - 1 + TMP = WORK( INDS+I ) + WORK( INDP+I ) + IF( TMP.EQ.ZERO ) + $ TMP = EPS*WORK( INDS+I ) + IF( ABS( TMP ).LE.ABS( MINGMA ) ) THEN + MINGMA = TMP + R = I + 1 + END IF + 110 CONTINUE +* +* Compute the FP vector: solve N^T v = e_r +* + ISUPPZ( 1 ) = B1 + ISUPPZ( 2 ) = BN + Z( R ) = CONE + ZTZ = ONE +* +* Compute the FP vector upwards from R +* + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 210 I = R-1, B1, -1 + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GOTO 220 + ENDIF + ZTZ = ZTZ + DBLE( Z( I )*Z( I ) ) + 210 CONTINUE + 220 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 230 I = R - 1, B1, -1 + IF( Z( I+1 ).EQ.ZERO ) THEN + Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 ) + ELSE + Z( I ) = -( WORK( INDLPL+I )*Z( I+1 ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I ) = ZERO + ISUPPZ( 1 ) = I + 1 + GO TO 240 + END IF + ZTZ = ZTZ + DBLE( Z( I )*Z( I ) ) + 230 CONTINUE + 240 CONTINUE + ENDIF + +* Compute the FP vector downwards from R in blocks of size BLKSIZ + IF( .NOT.SAWNAN1 .AND. .NOT.SAWNAN2 ) THEN + DO 250 I = R, BN-1 + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 260 + END IF + ZTZ = ZTZ + DBLE( Z( I+1 )*Z( I+1 ) ) + 250 CONTINUE + 260 CONTINUE + ELSE +* Run slower loop if NaN occurred. + DO 270 I = R, BN - 1 + IF( Z( I ).EQ.ZERO ) THEN + Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 ) + ELSE + Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) ) + END IF + IF( (ABS(Z(I))+ABS(Z(I+1)))* ABS(LD(I)).LT.GAPTOL ) + $ THEN + Z( I+1 ) = ZERO + ISUPPZ( 2 ) = I + GO TO 280 + END IF + ZTZ = ZTZ + DBLE( Z( I+1 )*Z( I+1 ) ) + 270 CONTINUE + 280 CONTINUE + END IF +* +* Compute quantities for convergence test +* + TMP = ONE / ZTZ + NRMINV = SQRT( TMP ) + RESID = ABS( MINGMA )*NRMINV + RQCORR = MINGMA*TMP +* +* + RETURN +* +* End of ZLAR1V +* + END diff --git a/dspl/liblapack/SRC/zlar2v.f b/dspl/liblapack/SRC/zlar2v.f new file mode 100644 index 0000000..0f7fe63 --- /dev/null +++ b/dspl/liblapack/SRC/zlar2v.f @@ -0,0 +1,169 @@ +*> \brief \b ZLAR2V applies a vector of plane rotations with real cosines and complex sines from both sides to a sequence of 2-by-2 symmetric/Hermitian matrices. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAR2V + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ) +* COMPLEX*16 S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAR2V applies a vector of complex plane rotations with real cosines +*> from both sides to a sequence of 2-by-2 complex Hermitian matrices, +*> defined by the elements of the vectors x, y and z. For i = 1,2,...,n +*> +*> ( x(i) z(i) ) := +*> ( conjg(z(i)) y(i) ) +*> +*> ( c(i) conjg(s(i)) ) ( x(i) z(i) ) ( c(i) -conjg(s(i)) ) +*> ( -s(i) c(i) ) ( conjg(z(i)) y(i) ) ( s(i) c(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> The vector x; the elements of x are assumed to be real. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> The vector y; the elements of y are assumed to be real. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> The vector z. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X, Y and Z. INCX > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX*16 array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAR2V( N, X, Y, Z, INCX, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 S( * ), X( * ), Y( * ), Z( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX + DOUBLE PRECISION CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII, + $ ZIR + COMPLEX*16 SI, T2, T3, T4, ZI +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DCONJG, DIMAG +* .. +* .. Executable Statements .. +* + IX = 1 + IC = 1 + DO 10 I = 1, N + XI = DBLE( X( IX ) ) + YI = DBLE( Y( IX ) ) + ZI = Z( IX ) + ZIR = DBLE( ZI ) + ZII = DIMAG( ZI ) + CI = C( IC ) + SI = S( IC ) + SIR = DBLE( SI ) + SII = DIMAG( SI ) + T1R = SIR*ZIR - SII*ZII + T1I = SIR*ZII + SII*ZIR + T2 = CI*ZI + T3 = T2 - DCONJG( SI )*XI + T4 = DCONJG( T2 ) + SI*YI + T5 = CI*XI + T1R + T6 = CI*YI - T1R + X( IX ) = CI*T5 + ( SIR*DBLE( T4 )+SII*DIMAG( T4 ) ) + Y( IX ) = CI*T6 - ( SIR*DBLE( T3 )-SII*DIMAG( T3 ) ) + Z( IX ) = CI*T3 + DCONJG( SI )*DCMPLX( T6, T1I ) + IX = IX + INCX + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of ZLAR2V +* + END diff --git a/dspl/liblapack/SRC/zlarcm.f b/dspl/liblapack/SRC/zlarcm.f new file mode 100644 index 0000000..77a709f --- /dev/null +++ b/dspl/liblapack/SRC/zlarcm.f @@ -0,0 +1,185 @@ +*> \brief \b ZLARCM copies all or part of a real two-dimensional array to a complex array. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARCM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* .. Scalar Arguments .. +* INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), RWORK( * ) +* COMPLEX*16 B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARCM performs a very simple matrix-matrix multiplication: +*> C := A * B, +*> where A is M by M and real; B is M by N and complex; +*> C is M by N and complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A and of the matrix C. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns and rows of the matrix B and +*> the number of columns of the matrix C. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, M) +*> On entry, A contains the M by M matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >=max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, B contains the M by N matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >=max(1,M). +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On exit, C contains the M by N matrix C. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >=max(1,M). +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*M*N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER LDA, LDB, LDC, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), RWORK( * ) + COMPLEX*16 B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. External Subroutines .. + EXTERNAL DGEMM +* .. +* .. Executable Statements .. +* +* Quick return if possible. +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* + DO 20 J = 1, N + DO 10 I = 1, M + RWORK( ( J-1 )*M+I ) = DBLE( B( I, J ) ) + 10 CONTINUE + 20 CONTINUE +* + L = M*N + 1 + CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 40 J = 1, N + DO 30 I = 1, M + C( I, J ) = RWORK( L+( J-1 )*M+I-1 ) + 30 CONTINUE + 40 CONTINUE +* + DO 60 J = 1, N + DO 50 I = 1, M + RWORK( ( J-1 )*M+I ) = DIMAG( B( I, J ) ) + 50 CONTINUE + 60 CONTINUE + CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO, + $ RWORK( L ), M ) + DO 80 J = 1, N + DO 70 I = 1, M + C( I, J ) = DCMPLX( DBLE( C( I, J ) ), + $ RWORK( L+( J-1 )*M+I-1 ) ) + 70 CONTINUE + 80 CONTINUE +* + RETURN +* +* End of ZLARCM +* + END diff --git a/dspl/liblapack/SRC/zlarf.f b/dspl/liblapack/SRC/zlarf.f new file mode 100644 index 0000000..f1be80d --- /dev/null +++ b/dspl/liblapack/SRC/zlarf.f @@ -0,0 +1,232 @@ +*> \brief \b ZLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARF applies a complex elementary reflector H to a complex M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H, supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +* Set up variables for scanning V. LASTV begins pointing to the end +* of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +* Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +* Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILAZLC(LASTV, N, C, LDC) + ELSE +* Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILAZLR(M, LASTV, C, LDC) + END IF + END IF +* Note that lastc.eq.0 renders the BLAS operations null; no special +* case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + CALL ZGEMV( 'Conjugate transpose', LASTV, LASTC, ONE, + $ C, LDC, V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H +* + CALL ZGERC( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL ZGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H +* + CALL ZGERC( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of ZLARF +* + END diff --git a/dspl/liblapack/SRC/zlarfb.f b/dspl/liblapack/SRC/zlarfb.f new file mode 100644 index 0000000..b4a2b4d --- /dev/null +++ b/dspl/liblapack/SRC/zlarfb.f @@ -0,0 +1,731 @@ +*> \brief \b ZLARFB applies a block reflector or its conjugate-transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFB applies a complex block reflector H or its transpose H**H to a +*> complex M-by-N matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2013 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2013 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C1**H +* + DO 10 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**H * V2 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK, + $ LDWORK, ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 30 J = 1, K + DO 20 I = 1, N + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ), + $ LDV, ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C2**H +* + DO 70 J = 1, K + CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H * V1 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', N, + $ K, M-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W**H +* + DO 90 J = 1, K + DO 80 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE, + $ C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK, + $ LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C1**H +* + DO 130 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V, LDV, WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C2**H * V2**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, + $ C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, + $ V( 1, K+1 ), LDV, WORK, LDWORK, ONE, + $ C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 150 J = 1, K + DO 140 I = 1, N + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V, LDV, WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C2 * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE, + $ C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M, + $ K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C2**H +* + DO 190 J = 1, K + CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( N, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK, + $ LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H * V1**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', N, K, M-K, ONE, C, + $ LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, N, K, -ONE, V, + $ LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N, + $ K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 210 J = 1, K + DO 200 I = 1, N + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK, + $ LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', M, + $ K, N-K, ONE, C, LDC, V, LDV, ONE, WORK, + $ LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, + $ ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K, + $ -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M, + $ K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, M + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of ZLARFB +* + END diff --git a/dspl/liblapack/SRC/zlarfg.f b/dspl/liblapack/SRC/zlarfg.f new file mode 100644 index 0000000..081d391 --- /dev/null +++ b/dspl/liblapack/SRC/zlarfg.f @@ -0,0 +1,203 @@ +*> \brief \b ZLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFG generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, with beta real, and x is an +*> (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> +*> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY3, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHR = DBLE( ALPHA ) + ALPHI = DIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( (ABS( BETA ).LT.SAFMIN) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHA = DCMPLX( ALPHR, ALPHI ) + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) + CALL ZSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of ZLARFG +* + END diff --git a/dspl/liblapack/SRC/zlarfgp.f b/dspl/liblapack/SRC/zlarfgp.f new file mode 100644 index 0000000..c9e55a5 --- /dev/null +++ b/dspl/liblapack/SRC/zlarfgp.f @@ -0,0 +1,272 @@ +*> \brief \b ZLARFGP generates an elementary reflector (Householder matrix) with non-negative beta. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFGP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFGP generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, beta is real and non-negative, and +*> x is an (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARFGP( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION ALPHI, ALPHR, BETA, BIGNUM, SMLNUM, XNORM + COMPLEX*16 SAVEALPHA +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY3, DLAPY2, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, DLAPY3, DLAPY2, DZNRM2, ZLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHR = DBLE( ALPHA ) + ALPHI = DIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO ) THEN +* +* H = [1-alpha/abs(alpha) 0; 0 I], sign chosen so ALPHA >= 0. +* + IF( ALPHI.EQ.ZERO ) THEN + IF( ALPHR.GE.ZERO ) THEN +* When TAU.eq.ZERO, the vector is special-cased to be +* all zeros in the application routines. We do not need +* to clear it. + TAU = ZERO + ELSE +* However, the application routines rely on explicit +* zero checks when TAU.ne.ZERO, and we must clear X. + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + ALPHA = -ALPHA + END IF + ELSE +* Only "reflecting" the diagonal entry to be real and non-negative. + XNORM = DLAPY2( ALPHR, ALPHI ) + TAU = DCMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + ALPHA = XNORM + END IF + ELSE +* +* general case +* + BETA = SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'E' ) + BIGNUM = ONE / SMLNUM +* + KNT = 0 + IF( ABS( BETA ).LT.SMLNUM ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL ZDSCAL( N-1, BIGNUM, X, INCX ) + BETA = BETA*BIGNUM + ALPHI = ALPHI*BIGNUM + ALPHR = ALPHR*BIGNUM + IF( (ABS( BETA ).LT.SMLNUM) .AND. (KNT .LT. 20) ) + $ GO TO 10 +* +* New BETA is at most 1, at least SMLNUM +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHA = DCMPLX( ALPHR, ALPHI ) + BETA = SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + SAVEALPHA = ALPHA + ALPHA = ALPHA + BETA + IF( BETA.LT.ZERO ) THEN + BETA = -BETA + TAU = -ALPHA / BETA + ELSE + ALPHR = ALPHI * (ALPHI/DBLE( ALPHA )) + ALPHR = ALPHR + XNORM * (XNORM/DBLE( ALPHA )) + TAU = DCMPLX( ALPHR/BETA, -ALPHI/BETA ) + ALPHA = DCMPLX( -ALPHR, ALPHI ) + END IF + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA ) +* + IF ( ABS(TAU).LE.SMLNUM ) THEN +* +* In the case where the computed TAU ends up being a denormalized number, +* it loses relative accuracy. This is a BIG problem. Solution: flush TAU +* to ZERO (or TWO or whatever makes a nonnegative real number for BETA). +* +* (Bug report provided by Pat Quillen from MathWorks on Jul 29, 2009.) +* (Thanks Pat. Thanks MathWorks.) +* + ALPHR = DBLE( SAVEALPHA ) + ALPHI = DIMAG( SAVEALPHA ) + IF( ALPHI.EQ.ZERO ) THEN + IF( ALPHR.GE.ZERO ) THEN + TAU = ZERO + ELSE + TAU = TWO + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + BETA = -SAVEALPHA + END IF + ELSE + XNORM = DLAPY2( ALPHR, ALPHI ) + TAU = DCMPLX( ONE - ALPHR / XNORM, -ALPHI / XNORM ) + DO J = 1, N-1 + X( 1 + (J-1)*INCX ) = ZERO + END DO + BETA = XNORM + END IF +* + ELSE +* +* This is the general case. +* + CALL ZSCAL( N-1, ALPHA, X, INCX ) +* + END IF +* +* If BETA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SMLNUM + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of ZLARFGP +* + END diff --git a/dspl/liblapack/SRC/zlarft.f b/dspl/liblapack/SRC/zlarft.f new file mode 100644 index 0000000..78ad2f1 --- /dev/null +++ b/dspl/liblapack/SRC/zlarft.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZTRMV, ZGEMM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/dspl/liblapack/SRC/zlarfx.f b/dspl/liblapack/SRC/zlarfx.f new file mode 100644 index 0000000..685d164 --- /dev/null +++ b/dspl/liblapack/SRC/zlarfx.f @@ -0,0 +1,700 @@ +*> \brief \b ZLARFX applies an elementary reflector to a general rectangular matrix, with loop unrolling when the reflector has order ≤ 10. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFX applies a complex elementary reflector H to a complex m by n +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix +*> +*> This version uses inline code if H has order < 11. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (M) if SIDE = 'L' +*> or (N) if SIDE = 'R' +*> The vector v in the representation of H. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m by n matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> WORK is not referenced if H has order < 11. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARFX( SIDE, M, N, V, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER J + COMPLEX*16 SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9, + $ V1, V10, V2, V3, V4, V5, V6, V7, V8, V9 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C, where H has order m. +* + GO TO ( 10, 30, 50, 70, 90, 110, 130, 150, + $ 170, 190 )M +* +* Code for general M +* + CALL ZLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 10 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) ) + DO 20 J = 1, N + C( 1, J ) = T1*C( 1, J ) + 20 CONTINUE + GO TO 410 + 30 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + DO 40 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + 40 CONTINUE + GO TO 410 + 50 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + DO 60 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + 60 CONTINUE + GO TO 410 + 70 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + DO 80 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + 80 CONTINUE + GO TO 410 + 90 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + DO 100 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + 100 CONTINUE + GO TO 410 + 110 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + DO 120 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + 120 CONTINUE + GO TO 410 + 130 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + DO 140 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + 140 CONTINUE + GO TO 410 + 150 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + DO 160 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + 160 CONTINUE + GO TO 410 + 170 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + V9 = DCONJG( V( 9 ) ) + T9 = TAU*DCONJG( V9 ) + DO 180 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + 180 CONTINUE + GO TO 410 + 190 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = DCONJG( V( 1 ) ) + T1 = TAU*DCONJG( V1 ) + V2 = DCONJG( V( 2 ) ) + T2 = TAU*DCONJG( V2 ) + V3 = DCONJG( V( 3 ) ) + T3 = TAU*DCONJG( V3 ) + V4 = DCONJG( V( 4 ) ) + T4 = TAU*DCONJG( V4 ) + V5 = DCONJG( V( 5 ) ) + T5 = TAU*DCONJG( V5 ) + V6 = DCONJG( V( 6 ) ) + T6 = TAU*DCONJG( V6 ) + V7 = DCONJG( V( 7 ) ) + T7 = TAU*DCONJG( V7 ) + V8 = DCONJG( V( 8 ) ) + T8 = TAU*DCONJG( V8 ) + V9 = DCONJG( V( 9 ) ) + T9 = TAU*DCONJG( V9 ) + V10 = DCONJG( V( 10 ) ) + T10 = TAU*DCONJG( V10 ) + DO 200 J = 1, N + SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) + + $ V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) + + $ V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) + + $ V10*C( 10, J ) + C( 1, J ) = C( 1, J ) - SUM*T1 + C( 2, J ) = C( 2, J ) - SUM*T2 + C( 3, J ) = C( 3, J ) - SUM*T3 + C( 4, J ) = C( 4, J ) - SUM*T4 + C( 5, J ) = C( 5, J ) - SUM*T5 + C( 6, J ) = C( 6, J ) - SUM*T6 + C( 7, J ) = C( 7, J ) - SUM*T7 + C( 8, J ) = C( 8, J ) - SUM*T8 + C( 9, J ) = C( 9, J ) - SUM*T9 + C( 10, J ) = C( 10, J ) - SUM*T10 + 200 CONTINUE + GO TO 410 + ELSE +* +* Form C * H, where H has order n. +* + GO TO ( 210, 230, 250, 270, 290, 310, 330, 350, + $ 370, 390 )N +* +* Code for general N +* + CALL ZLARF( SIDE, M, N, V, 1, TAU, C, LDC, WORK ) + GO TO 410 + 210 CONTINUE +* +* Special code for 1 x 1 Householder +* + T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) ) + DO 220 J = 1, M + C( J, 1 ) = T1*C( J, 1 ) + 220 CONTINUE + GO TO 410 + 230 CONTINUE +* +* Special code for 2 x 2 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + DO 240 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + 240 CONTINUE + GO TO 410 + 250 CONTINUE +* +* Special code for 3 x 3 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + DO 260 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + 260 CONTINUE + GO TO 410 + 270 CONTINUE +* +* Special code for 4 x 4 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + DO 280 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + 280 CONTINUE + GO TO 410 + 290 CONTINUE +* +* Special code for 5 x 5 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + DO 300 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + 300 CONTINUE + GO TO 410 + 310 CONTINUE +* +* Special code for 6 x 6 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + DO 320 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + 320 CONTINUE + GO TO 410 + 330 CONTINUE +* +* Special code for 7 x 7 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + DO 340 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + 340 CONTINUE + GO TO 410 + 350 CONTINUE +* +* Special code for 8 x 8 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + DO 360 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + 360 CONTINUE + GO TO 410 + 370 CONTINUE +* +* Special code for 9 x 9 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*DCONJG( V9 ) + DO 380 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + 380 CONTINUE + GO TO 410 + 390 CONTINUE +* +* Special code for 10 x 10 Householder +* + V1 = V( 1 ) + T1 = TAU*DCONJG( V1 ) + V2 = V( 2 ) + T2 = TAU*DCONJG( V2 ) + V3 = V( 3 ) + T3 = TAU*DCONJG( V3 ) + V4 = V( 4 ) + T4 = TAU*DCONJG( V4 ) + V5 = V( 5 ) + T5 = TAU*DCONJG( V5 ) + V6 = V( 6 ) + T6 = TAU*DCONJG( V6 ) + V7 = V( 7 ) + T7 = TAU*DCONJG( V7 ) + V8 = V( 8 ) + T8 = TAU*DCONJG( V8 ) + V9 = V( 9 ) + T9 = TAU*DCONJG( V9 ) + V10 = V( 10 ) + T10 = TAU*DCONJG( V10 ) + DO 400 J = 1, M + SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) + + $ V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) + + $ V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) + + $ V10*C( J, 10 ) + C( J, 1 ) = C( J, 1 ) - SUM*T1 + C( J, 2 ) = C( J, 2 ) - SUM*T2 + C( J, 3 ) = C( J, 3 ) - SUM*T3 + C( J, 4 ) = C( J, 4 ) - SUM*T4 + C( J, 5 ) = C( J, 5 ) - SUM*T5 + C( J, 6 ) = C( J, 6 ) - SUM*T6 + C( J, 7 ) = C( J, 7 ) - SUM*T7 + C( J, 8 ) = C( J, 8 ) - SUM*T8 + C( J, 9 ) = C( J, 9 ) - SUM*T9 + C( J, 10 ) = C( J, 10 ) - SUM*T10 + 400 CONTINUE + GO TO 410 + END IF + 410 CONTINUE + RETURN +* +* End of ZLARFX +* + END diff --git a/dspl/liblapack/SRC/zlarfy.f b/dspl/liblapack/SRC/zlarfy.f new file mode 100644 index 0000000..5760573 --- /dev/null +++ b/dspl/liblapack/SRC/zlarfy.f @@ -0,0 +1,163 @@ +*> \brief \b ZLARFY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCV, LDC, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFY applies an elementary reflector, or Householder matrix, H, +*> to an n x n Hermitian matrix C, from both the left and the right. +*> +*> H is represented in the form +*> +*> H = I - tau * v * v' +*> +*> where tau is a scalar and v is a vector. +*> +*> If tau is zero, then H is taken to be the unit matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix C is stored. +*> = 'U': Upper triangle +*> = 'L': Lower triangle +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (N-1)*abs(INCV)) +*> The vector v as described above. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between successive elements of v. INCV must +*> not be zero. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau as described above. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On entry, the matrix C. +*> On exit, C is overwritten by H * C * H'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max( 1, N ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16_eig +* +* ===================================================================== + SUBROUTINE ZLARFY( UPLO, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK test routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCV, LDC, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZHEMV, ZHER2 +* .. +* .. External Functions .. + COMPLEX*16 ZDOTC + EXTERNAL ZDOTC +* .. +* .. Executable Statements .. +* + IF( TAU.EQ.ZERO ) + $ RETURN +* +* Form w:= C * v +* + CALL ZHEMV( UPLO, N, ONE, C, LDC, V, INCV, ZERO, WORK, 1 ) +* + ALPHA = -HALF*TAU*ZDOTC( N, WORK, 1, V, INCV ) + CALL ZAXPY( N, ALPHA, V, INCV, WORK, 1 ) +* +* C := C - v * w' - w * v' +* + CALL ZHER2( UPLO, N, -TAU, V, INCV, WORK, 1, C, LDC ) +* + RETURN +* +* End of ZLARFY +* + END diff --git a/dspl/liblapack/SRC/zlargv.f b/dspl/liblapack/SRC/zlargv.f new file mode 100644 index 0000000..1e17983 --- /dev/null +++ b/dspl/liblapack/SRC/zlargv.f @@ -0,0 +1,300 @@ +*> \brief \b ZLARGV generates a vector of plane rotations with real cosines and complex sines. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARGV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ) +* COMPLEX*16 X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARGV generates a vector of complex plane rotations with real +*> cosines, determined by elements of the complex vectors x and y. +*> For i = 1,2,...,n +*> +*> ( c(i) s(i) ) ( x(i) ) = ( r(i) ) +*> ( -conjg(s(i)) c(i) ) ( y(i) ) = ( 0 ) +*> +*> where c(i)**2 + ABS(s(i))**2 = 1 +*> +*> The following conventions are used (these are the same as in ZLARTG, +*> but differ from the BLAS1 routine ZROTG): +*> If y(i)=0, then c(i)=1 and s(i)=0. +*> If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be generated. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> On entry, the vector x. +*> On exit, x(i) is overwritten by r(i), for i = 1,...,n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (1+(N-1)*INCY) +*> On entry, the vector y. +*> On exit, the sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[out] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARGV( N, X, INCX, Y, INCY, C, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + + INTEGER COUNT, I, IC, IX, IY, J + DOUBLE PRECISION CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX*16 F, FF, FS, G, GS, R, SN +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + EXTERNAL DLAMCH, DLAPY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG, + $ MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1, ABSSQ +* .. +* .. Save statement .. +* SAVE FIRST, SAFMX2, SAFMIN, SAFMN2 +* .. +* .. Data statements .. +* DATA FIRST / .TRUE. / +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) ) + ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2 +* .. +* .. Executable Statements .. +* +* IF( FIRST ) THEN +* FIRST = .FALSE. + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 +* END IF + IX = 1 + IY = 1 + IC = 1 + DO 60 I = 1, N + F = X( IX ) + G = Y( IY ) +* +* Use identical algorithm as in ZLARTG +* + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO ) THEN + CS = ONE + SN = CZERO + R = F + GO TO 50 + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = DLAPY2( DBLE( G ), DIMAG( G ) ) +* Do complex/real division explicitly with two real +* divisions + D = DLAPY2( DBLE( GS ), DIMAG( GS ) ) + SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D ) + GO TO 50 + END IF + F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = DLAPY2( DBLE( F ), DIMAG( F ) ) + FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D ) + ELSE + DR = SAFMX2*DBLE( F ) + DI = SAFMX2*DIMAG( F ) + D = DLAPY2( DR, DI ) + FF = DCMPLX( DR / D, DI / D ) + END IF + SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real +* multiplies + R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D ) + SN = SN*DCONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 J = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 J = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + 50 CONTINUE + C( IC ) = CS + Y( IY ) = SN + X( IX ) = R + IC = IC + INCC + IY = IY + INCY + IX = IX + INCX + 60 CONTINUE + RETURN +* +* End of ZLARGV +* + END diff --git a/dspl/liblapack/SRC/zlarnv.f b/dspl/liblapack/SRC/zlarnv.f new file mode 100644 index 0000000..7541652 --- /dev/null +++ b/dspl/liblapack/SRC/zlarnv.f @@ -0,0 +1,193 @@ +*> \brief \b ZLARNV returns a vector of random numbers from a uniform or normal distribution. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARNV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARNV( IDIST, ISEED, N, X ) +* +* .. Scalar Arguments .. +* INTEGER IDIST, N +* .. +* .. Array Arguments .. +* INTEGER ISEED( 4 ) +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARNV returns a vector of n random complex numbers from a uniform or +*> normal distribution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IDIST +*> \verbatim +*> IDIST is INTEGER +*> Specifies the distribution of the random numbers: +*> = 1: real and imaginary parts each uniform (0,1) +*> = 2: real and imaginary parts each uniform (-1,1) +*> = 3: real and imaginary parts each normal (0,1) +*> = 4: uniformly distributed on the disc abs(z) < 1 +*> = 5: uniformly distributed on the circle abs(z) = 1 +*> \endverbatim +*> +*> \param[in,out] ISEED +*> \verbatim +*> ISEED is INTEGER array, dimension (4) +*> On entry, the seed of the random number generator; the array +*> elements must be between 0 and 4095, and ISEED(4) must be +*> odd. +*> On exit, the seed is updated. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of random numbers to be generated. +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The generated random numbers. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> This routine calls the auxiliary routine DLARUV to generate random +*> real numbers from a uniform (0,1) distribution, in batches of up to +*> 128 using vectorisable code. The Box-Muller method is used to +*> transform numbers from a uniform to a normal distribution. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARNV( IDIST, ISEED, N, X ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IDIST, N +* .. +* .. Array Arguments .. + INTEGER ISEED( 4 ) + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 ) + INTEGER LV + PARAMETER ( LV = 128 ) + DOUBLE PRECISION TWOPI + PARAMETER ( TWOPI = 6.2831853071795864769252867663D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IL, IV +* .. +* .. Local Arrays .. + DOUBLE PRECISION U( LV ) +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, EXP, LOG, MIN, SQRT +* .. +* .. External Subroutines .. + EXTERNAL DLARUV +* .. +* .. Executable Statements .. +* + DO 60 IV = 1, N, LV / 2 + IL = MIN( LV / 2, N-IV+1 ) +* +* Call DLARUV to generate 2*IL real numbers from a uniform (0,1) +* distribution (2*IL <= LV) +* + CALL DLARUV( ISEED, 2*IL, U ) +* + IF( IDIST.EQ.1 ) THEN +* +* Copy generated numbers +* + DO 10 I = 1, IL + X( IV+I-1 ) = DCMPLX( U( 2*I-1 ), U( 2*I ) ) + 10 CONTINUE + ELSE IF( IDIST.EQ.2 ) THEN +* +* Convert generated numbers to uniform (-1,1) distribution +* + DO 20 I = 1, IL + X( IV+I-1 ) = DCMPLX( TWO*U( 2*I-1 )-ONE, + $ TWO*U( 2*I )-ONE ) + 20 CONTINUE + ELSE IF( IDIST.EQ.3 ) THEN +* +* Convert generated numbers to normal (0,1) distribution +* + DO 30 I = 1, IL + X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )* + $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 30 CONTINUE + ELSE IF( IDIST.EQ.4 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit disk +* + DO 40 I = 1, IL + X( IV+I-1 ) = SQRT( U( 2*I-1 ) )* + $ EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 40 CONTINUE + ELSE IF( IDIST.EQ.5 ) THEN +* +* Convert generated numbers to complex numbers uniformly +* distributed on the unit circle +* + DO 50 I = 1, IL + X( IV+I-1 ) = EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE + RETURN +* +* End of ZLARNV +* + END diff --git a/dspl/liblapack/SRC/zlarrv.f b/dspl/liblapack/SRC/zlarrv.f new file mode 100644 index 0000000..67a6758 --- /dev/null +++ b/dspl/liblapack/SRC/zlarrv.f @@ -0,0 +1,1060 @@ +*> \brief \b ZLARRV computes the eigenvectors of the tridiagonal matrix T = L D LT given L, D and the eigenvalues of L D LT. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARRV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARRV( N, VL, VU, D, L, PIVMIN, +* ISPLIT, M, DOL, DOU, MINRGP, +* RTOL1, RTOL2, W, WERR, WGAP, +* IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER DOL, DOU, INFO, LDZ, M, N +* DOUBLE PRECISION MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), +* $ ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), GERS( * ), L( * ), W( * ), WERR( * ), +* $ WGAP( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARRV computes the eigenvectors of the tridiagonal matrix +*> T = L D L**T given L, D and APPROXIMATIONS to the eigenvalues of L D L**T. +*> The input eigenvalues should have been computed by DLARRE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> Lower bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> Upper bound of the interval that contains the desired +*> eigenvalues. VL < VU. Needed to compute gaps on the left or right +*> end of the extremal eigenvalues in the desired RANGE. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the diagonal matrix D. +*> On exit, D may be overwritten. +*> \endverbatim +*> +*> \param[in,out] L +*> \verbatim +*> L is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the unit +*> bidiagonal matrix L are in elements 1 to N-1 of L +*> (if the matrix is not split.) At the end of each block +*> is stored the corresponding shift as given by DLARRE. +*> On exit, L is overwritten. +*> \endverbatim +*> +*> \param[in] PIVMIN +*> \verbatim +*> PIVMIN is DOUBLE PRECISION +*> The minimum pivot allowed in the Sturm sequence. +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into blocks. +*> The first block consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of input eigenvalues. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] DOL +*> \verbatim +*> DOL is INTEGER +*> \endverbatim +*> +*> \param[in] DOU +*> \verbatim +*> DOU is INTEGER +*> If the user wants to compute only selected eigenvectors from all +*> the eigenvalues supplied, he can specify an index range DOL:DOU. +*> Or else the setting DOL=1, DOU=M should be applied. +*> Note that DOL and DOU refer to the order in which the eigenvalues +*> are stored in W. +*> If the user wants to compute only selected eigenpairs, then +*> the columns DOL-1 to DOU+1 of the eigenvector space Z contain the +*> computed eigenvectors. All other columns of Z are set to zero. +*> \endverbatim +*> +*> \param[in] MINRGP +*> \verbatim +*> MINRGP is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL1 +*> \verbatim +*> RTOL1 is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] RTOL2 +*> \verbatim +*> RTOL2 is DOUBLE PRECISION +*> Parameters for bisection. +*> An interval [LEFT,RIGHT] has converged if +*> RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) +*> \endverbatim +*> +*> \param[in,out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements of W contain the APPROXIMATE eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block ( The output array +*> W from DLARRE is expected here ). Furthermore, they are with +*> respect to the shift of the corresponding root representation +*> for their block. On exit, W holds the eigenvalues of the +*> UNshifted matrix. +*> \endverbatim +*> +*> \param[in,out] WERR +*> \verbatim +*> WERR is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the semiwidth of the uncertainty +*> interval of the corresponding eigenvalue in W +*> \endverbatim +*> +*> \param[in,out] WGAP +*> \verbatim +*> WGAP is DOUBLE PRECISION array, dimension (N) +*> The separation from the right neighbor eigenvalue in W. +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The indices of the blocks (submatrices) associated with the +*> corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue +*> W(i) belongs to the first block from the top, =2 if W(i) +*> belongs to the second block, etc. +*> \endverbatim +*> +*> \param[in] INDEXW +*> \verbatim +*> INDEXW is INTEGER array, dimension (N) +*> The indices of the eigenvalues within each block (submatrix); +*> for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the +*> i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. +*> \endverbatim +*> +*> \param[in] GERS +*> \verbatim +*> GERS is DOUBLE PRECISION array, dimension (2*N) +*> The N Gerschgorin intervals (the i-th Gerschgorin interval +*> is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should +*> be computed from the original UNshifted matrix. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M) ) +*> If INFO = 0, the first M columns of Z contain the +*> orthonormal eigenvectors of the matrix T +*> corresponding to the input eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The I-th eigenvector +*> is nonzero only in elements ISUPPZ( 2*I-1 ) through +*> ISUPPZ( 2*I ). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (12*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (7*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> > 0: A problem occurred in ZLARRV. +*> < 0: One of the called subroutines signaled an internal problem. +*> Needs inspection of the corresponding parameter IINFO +*> for further information. +*> +*> =-1: Problem in DLARRB when refining a child's eigenvalues. +*> =-2: Problem in DLARRF when computing the RRR of a child. +*> When a child is inside a tight cluster, it can be difficult +*> to find an RRR. A partial remedy from the user's point of +*> view is to make the parameter MINRGP smaller and recompile. +*> However, as the orthogonality of the computed vectors is +*> proportional to 1/MINRGP, the user should be aware that +*> he might be trading in precision when he decreases MINRGP. +*> =-3: Problem in DLARRB when refining a single eigenvalue +*> after the Rayleigh correction was rejected. +*> = 5: The Rayleigh Quotient Iteration failed to converge to +*> full accuracy in MAXITR steps. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA +* +* ===================================================================== + SUBROUTINE ZLARRV( N, VL, VU, D, L, PIVMIN, + $ ISPLIT, M, DOL, DOU, MINRGP, + $ RTOL1, RTOL2, W, WERR, WGAP, + $ IBLOCK, INDEXW, GERS, Z, LDZ, ISUPPZ, + $ WORK, IWORK, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER DOL, DOU, INFO, LDZ, M, N + DOUBLE PRECISION MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ), + $ ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), GERS( * ), L( * ), W( * ), WERR( * ), + $ WGAP( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXITR + PARAMETER ( MAXITR = 10 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ) ) + DOUBLE PRECISION ZERO, ONE, TWO, THREE, FOUR, HALF + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ TWO = 2.0D0, THREE = 3.0D0, + $ FOUR = 4.0D0, HALF = 0.5D0) +* .. +* .. Local Scalars .. + LOGICAL ESKIP, NEEDBS, STP2II, TRYRQC, USEDBS, USEDRQ + INTEGER DONE, I, IBEGIN, IDONE, IEND, II, IINDC1, + $ IINDC2, IINDR, IINDWK, IINFO, IM, IN, INDEIG, + $ INDLD, INDLLD, INDWRK, ISUPMN, ISUPMX, ITER, + $ ITMP1, J, JBLK, K, MINIWSIZE, MINWSIZE, NCLUS, + $ NDEPTH, NEGCNT, NEWCLS, NEWFST, NEWFTT, NEWLST, + $ NEWSIZ, OFFSET, OLDCLS, OLDFST, OLDIEN, OLDLST, + $ OLDNCL, P, PARITY, Q, WBEGIN, WEND, WINDEX, + $ WINDMN, WINDPL, ZFROM, ZTO, ZUSEDL, ZUSEDU, + $ ZUSEDW + INTEGER INDIN1, INDIN2 + DOUBLE PRECISION BSTRES, BSTW, EPS, FUDGE, GAP, GAPTOL, GL, GU, + $ LAMBDA, LEFT, LGAP, MINGMA, NRMINV, RESID, + $ RGAP, RIGHT, RQCORR, RQTOL, SAVGAP, SGNDEF, + $ SIGMA, SPDIAM, SSIGMA, TAU, TMP, TOL, ZTZ +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLARRB, DLARRF, ZDSCAL, ZLAR1V, + $ ZLASET +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, MAX, MIN + INTRINSIC DCMPLX +* .. +* .. Executable Statements .. +* .. + + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* +* The first N entries of WORK are reserved for the eigenvalues + INDLD = N+1 + INDLLD= 2*N+1 + INDIN1 = 3*N + 1 + INDIN2 = 4*N + 1 + INDWRK = 5*N + 1 + MINWSIZE = 12 * N + + DO 5 I= 1,MINWSIZE + WORK( I ) = ZERO + 5 CONTINUE + +* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the +* factorization used to compute the FP vector + IINDR = 0 +* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current +* layer and the one above. + IINDC1 = N + IINDC2 = 2*N + IINDWK = 3*N + 1 + + MINIWSIZE = 7 * N + DO 10 I= 1,MINIWSIZE + IWORK( I ) = 0 + 10 CONTINUE + + ZUSEDL = 1 + IF(DOL.GT.1) THEN +* Set lower bound for use of Z + ZUSEDL = DOL-1 + ENDIF + ZUSEDU = M + IF(DOU.LT.M) THEN +* Set lower bound for use of Z + ZUSEDU = DOU+1 + ENDIF +* The width of the part of Z that is used + ZUSEDW = ZUSEDU - ZUSEDL + 1 + + + CALL ZLASET( 'Full', N, ZUSEDW, CZERO, CZERO, + $ Z(1,ZUSEDL), LDZ ) + + EPS = DLAMCH( 'Precision' ) + RQTOL = TWO * EPS +* +* Set expert flags for standard code. + TRYRQC = .TRUE. + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN + ELSE +* Only selected eigenpairs are computed. Since the other evalues +* are not refined by RQ iteration, bisection has to compute to full +* accuracy. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ENDIF + +* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the +* desired eigenvalues. The support of the nonzero eigenvector +* entries is contained in the interval IBEGIN:IEND. +* Remark that if k eigenpairs are desired, then the eigenvectors +* are stored in k contiguous columns of Z. + +* DONE is the number of eigenvectors already computed + DONE = 0 + IBEGIN = 1 + WBEGIN = 1 + DO 170 JBLK = 1, IBLOCK( M ) + IEND = ISPLIT( JBLK ) + SIGMA = L( IEND ) +* Find the eigenvectors of the submatrix indexed IBEGIN +* through IEND. + WEND = WBEGIN - 1 + 15 CONTINUE + IF( WEND.LT.M ) THEN + IF( IBLOCK( WEND+1 ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 15 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 170 + ELSEIF( (WEND.LT.DOL).OR.(WBEGIN.GT.DOU) ) THEN + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + GO TO 170 + END IF + +* Find local spectral diameter of the block + GL = GERS( 2*IBEGIN-1 ) + GU = GERS( 2*IBEGIN ) + DO 20 I = IBEGIN+1 , IEND + GL = MIN( GERS( 2*I-1 ), GL ) + GU = MAX( GERS( 2*I ), GU ) + 20 CONTINUE + SPDIAM = GU - GL + +* OLDIEN is the last index of the previous block + OLDIEN = IBEGIN - 1 +* Calculate the size of the current block + IN = IEND - IBEGIN + 1 +* The number of eigenvalues in the current block + IM = WEND - WBEGIN + 1 + +* This is for a 1x1 block + IF( IBEGIN.EQ.IEND ) THEN + DONE = DONE+1 + Z( IBEGIN, WBEGIN ) = DCMPLX( ONE, ZERO ) + ISUPPZ( 2*WBEGIN-1 ) = IBEGIN + ISUPPZ( 2*WBEGIN ) = IBEGIN + W( WBEGIN ) = W( WBEGIN ) + SIGMA + WORK( WBEGIN ) = W( WBEGIN ) + IBEGIN = IEND + 1 + WBEGIN = WBEGIN + 1 + GO TO 170 + END IF + +* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) +* Note that these can be approximations, in this case, the corresp. +* entries of WERR give the size of the uncertainty interval. +* The eigenvalue approximations will be refined when necessary as +* high relative accuracy is required for the computation of the +* corresponding eigenvectors. + CALL DCOPY( IM, W( WBEGIN ), 1, + $ WORK( WBEGIN ), 1 ) + +* We store in W the eigenvalue approximations w.r.t. the original +* matrix T. + DO 30 I=1,IM + W(WBEGIN+I-1) = W(WBEGIN+I-1)+SIGMA + 30 CONTINUE + + +* NDEPTH is the current depth of the representation tree + NDEPTH = 0 +* PARITY is either 1 or 0 + PARITY = 1 +* NCLUS is the number of clusters for the next level of the +* representation tree, we start with NCLUS = 1 for the root + NCLUS = 1 + IWORK( IINDC1+1 ) = 1 + IWORK( IINDC1+2 ) = IM + +* IDONE is the number of eigenvectors already computed in the current +* block + IDONE = 0 +* loop while( IDONE.LT.IM ) +* generate the representation tree for the current block and +* compute the eigenvectors + 40 CONTINUE + IF( IDONE.LT.IM ) THEN +* This is a crude protection against infinitely deep trees + IF( NDEPTH.GT.M ) THEN + INFO = -2 + RETURN + ENDIF +* breadth first processing of the current level of the representation +* tree: OLDNCL = number of clusters on current level + OLDNCL = NCLUS +* reset NCLUS to count the number of child clusters + NCLUS = 0 +* + PARITY = 1 - PARITY + IF( PARITY.EQ.0 ) THEN + OLDCLS = IINDC1 + NEWCLS = IINDC2 + ELSE + OLDCLS = IINDC2 + NEWCLS = IINDC1 + END IF +* Process the clusters on the current level + DO 150 I = 1, OLDNCL + J = OLDCLS + 2*I +* OLDFST, OLDLST = first, last index of current cluster. +* cluster indices start with 1 and are relative +* to WBEGIN when accessing W, WGAP, WERR, Z + OLDFST = IWORK( J-1 ) + OLDLST = IWORK( J ) + IF( NDEPTH.GT.0 ) THEN +* Retrieve relatively robust representation (RRR) of cluster +* that has been computed at the previous level +* The RRR is stored in Z and overwritten once the eigenvectors +* have been computed or when the cluster is refined + + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Get representation from location of the leftmost evalue +* of the cluster + J = WBEGIN + OLDFST - 1 + ELSE + IF(WBEGIN+OLDFST-1.LT.DOL) THEN +* Get representation from the left end of Z array + J = DOL - 1 + ELSEIF(WBEGIN+OLDFST-1.GT.DOU) THEN +* Get representation from the right end of Z array + J = DOU + ELSE + J = WBEGIN + OLDFST - 1 + ENDIF + ENDIF + DO 45 K = 1, IN - 1 + D( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ J ) ) + L( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1, + $ J+1 ) ) + 45 CONTINUE + D( IEND ) = DBLE( Z( IEND, J ) ) + SIGMA = DBLE( Z( IEND, J+1 ) ) + +* Set the corresponding entries in Z to zero + CALL ZLASET( 'Full', IN, 2, CZERO, CZERO, + $ Z( IBEGIN, J), LDZ ) + END IF + +* Compute DL and DLL of current RRR + DO 50 J = IBEGIN, IEND-1 + TMP = D( J )*L( J ) + WORK( INDLD-1+J ) = TMP + WORK( INDLLD-1+J ) = TMP*L( J ) + 50 CONTINUE + + IF( NDEPTH.GT.0 ) THEN +* P and Q are index of the first and last eigenvalue to compute +* within the current block + P = INDEXW( WBEGIN-1+OLDFST ) + Q = INDEXW( WBEGIN-1+OLDLST ) +* Offset for the arrays WORK, WGAP and WERR, i.e., the P-OFFSET +* through the Q-OFFSET elements of these arrays are to be used. +* OFFSET = P-OLDFST + OFFSET = INDEXW( WBEGIN ) - 1 +* perform limited bisection (if necessary) to get approximate +* eigenvalues to the precision needed. + CALL DLARRB( IN, D( IBEGIN ), + $ WORK(INDLLD+IBEGIN-1), + $ P, Q, RTOL1, RTOL2, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN),WERR(WBEGIN), + $ WORK( INDWRK ), IWORK( IINDWK ), + $ PIVMIN, SPDIAM, IN, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -1 + RETURN + ENDIF +* We also recompute the extremal gaps. W holds all eigenvalues +* of the unshifted matrix and must be used for computation +* of WGAP, the entries of WORK might stem from RRRs with +* different shifts. The gaps from WBEGIN-1+OLDFST to +* WBEGIN-1+OLDLST are correctly computed in DLARRB. +* However, we only allow the gaps to become greater since +* this is what should happen when we decrease WERR + IF( OLDFST.GT.1) THEN + WGAP( WBEGIN+OLDFST-2 ) = + $ MAX(WGAP(WBEGIN+OLDFST-2), + $ W(WBEGIN+OLDFST-1)-WERR(WBEGIN+OLDFST-1) + $ - W(WBEGIN+OLDFST-2)-WERR(WBEGIN+OLDFST-2) ) + ENDIF + IF( WBEGIN + OLDLST -1 .LT. WEND ) THEN + WGAP( WBEGIN+OLDLST-1 ) = + $ MAX(WGAP(WBEGIN+OLDLST-1), + $ W(WBEGIN+OLDLST)-WERR(WBEGIN+OLDLST) + $ - W(WBEGIN+OLDLST-1)-WERR(WBEGIN+OLDLST-1) ) + ENDIF +* Each time the eigenvalues in WORK get refined, we store +* the newly found approximation with all shifts applied in W + DO 53 J=OLDFST,OLDLST + W(WBEGIN+J-1) = WORK(WBEGIN+J-1)+SIGMA + 53 CONTINUE + END IF + +* Process the current node. + NEWFST = OLDFST + DO 140 J = OLDFST, OLDLST + IF( J.EQ.OLDLST ) THEN +* we are at the right end of the cluster, this is also the +* boundary of the child cluster + NEWLST = J + ELSE IF ( WGAP( WBEGIN + J -1).GE. + $ MINRGP* ABS( WORK(WBEGIN + J -1) ) ) THEN +* the right relative gap is big enough, the child cluster +* (NEWFST,..,NEWLST) is well separated from the following + NEWLST = J + ELSE +* inside a child cluster, the relative gap is not +* big enough. + GOTO 140 + END IF + +* Compute size of child cluster found + NEWSIZ = NEWLST - NEWFST + 1 + +* NEWFTT is the place in Z where the new RRR or the computed +* eigenvector is to be stored + IF((DOL.EQ.1).AND.(DOU.EQ.M)) THEN +* Store representation at location of the leftmost evalue +* of the cluster + NEWFTT = WBEGIN + NEWFST - 1 + ELSE + IF(WBEGIN+NEWFST-1.LT.DOL) THEN +* Store representation at the left end of Z array + NEWFTT = DOL - 1 + ELSEIF(WBEGIN+NEWFST-1.GT.DOU) THEN +* Store representation at the right end of Z array + NEWFTT = DOU + ELSE + NEWFTT = WBEGIN + NEWFST - 1 + ENDIF + ENDIF + + IF( NEWSIZ.GT.1) THEN +* +* Current child is not a singleton but a cluster. +* Compute and store new representation of child. +* +* +* Compute left and right cluster gap. +* +* LGAP and RGAP are not computed from WORK because +* the eigenvalue approximations may stem from RRRs +* different shifts. However, W hold all eigenvalues +* of the unshifted matrix. Still, the entries in WGAP +* have to be computed from WORK since the entries +* in W might be of the same order so that gaps are not +* exhibited correctly for very close eigenvalues. + IF( NEWFST.EQ.1 ) THEN + LGAP = MAX( ZERO, + $ W(WBEGIN)-WERR(WBEGIN) - VL ) + ELSE + LGAP = WGAP( WBEGIN+NEWFST-2 ) + ENDIF + RGAP = WGAP( WBEGIN+NEWLST-1 ) +* +* Compute left- and rightmost eigenvalue of child +* to high precision in order to shift as close +* as possible and obtain as large relative gaps +* as possible +* + DO 55 K =1,2 + IF(K.EQ.1) THEN + P = INDEXW( WBEGIN-1+NEWFST ) + ELSE + P = INDEXW( WBEGIN-1+NEWLST ) + ENDIF + OFFSET = INDEXW( WBEGIN ) - 1 + CALL DLARRB( IN, D(IBEGIN), + $ WORK( INDLLD+IBEGIN-1 ),P,P, + $ RQTOL, RQTOL, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ IN, IINFO ) + 55 CONTINUE +* + IF((WBEGIN+NEWLST-1.LT.DOL).OR. + $ (WBEGIN+NEWFST-1.GT.DOU)) THEN +* if the cluster contains no desired eigenvalues +* skip the computation of that branch of the rep. tree +* +* We could skip before the refinement of the extremal +* eigenvalues of the child, but then the representation +* tree could be different from the one when nothing is +* skipped. For this reason we skip at this place. + IDONE = IDONE + NEWLST - NEWFST + 1 + GOTO 139 + ENDIF +* +* Compute RRR of child cluster. +* Note that the new RRR is stored in Z +* +* DLARRF needs LWORK = 2*N + CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ NEWFST, NEWLST, WORK(WBEGIN), + $ WGAP(WBEGIN), WERR(WBEGIN), + $ SPDIAM, LGAP, RGAP, PIVMIN, TAU, + $ WORK( INDIN1 ), WORK( INDIN2 ), + $ WORK( INDWRK ), IINFO ) +* In the complex case, DLARRF cannot write +* the new RRR directly into Z and needs an intermediate +* workspace + DO 56 K = 1, IN-1 + Z( IBEGIN+K-1, NEWFTT ) = + $ DCMPLX( WORK( INDIN1+K-1 ), ZERO ) + Z( IBEGIN+K-1, NEWFTT+1 ) = + $ DCMPLX( WORK( INDIN2+K-1 ), ZERO ) + 56 CONTINUE + Z( IEND, NEWFTT ) = + $ DCMPLX( WORK( INDIN1+IN-1 ), ZERO ) + IF( IINFO.EQ.0 ) THEN +* a new RRR for the cluster was found by DLARRF +* update shift and store it + SSIGMA = SIGMA + TAU + Z( IEND, NEWFTT+1 ) = DCMPLX( SSIGMA, ZERO ) +* WORK() are the midpoints and WERR() the semi-width +* Note that the entries in W are unchanged. + DO 116 K = NEWFST, NEWLST + FUDGE = + $ THREE*EPS*ABS(WORK(WBEGIN+K-1)) + WORK( WBEGIN + K - 1 ) = + $ WORK( WBEGIN + K - 1) - TAU + FUDGE = FUDGE + + $ FOUR*EPS*ABS(WORK(WBEGIN+K-1)) +* Fudge errors + WERR( WBEGIN + K - 1 ) = + $ WERR( WBEGIN + K - 1 ) + FUDGE +* Gaps are not fudged. Provided that WERR is small +* when eigenvalues are close, a zero gap indicates +* that a new representation is needed for resolving +* the cluster. A fudge could lead to a wrong decision +* of judging eigenvalues 'separated' which in +* reality are not. This could have a negative impact +* on the orthogonality of the computed eigenvectors. + 116 CONTINUE + + NCLUS = NCLUS + 1 + K = NEWCLS + 2*NCLUS + IWORK( K-1 ) = NEWFST + IWORK( K ) = NEWLST + ELSE + INFO = -2 + RETURN + ENDIF + ELSE +* +* Compute eigenvector of singleton +* + ITER = 0 +* + TOL = FOUR * LOG(DBLE(IN)) * EPS +* + K = NEWFST + WINDEX = WBEGIN + K - 1 + WINDMN = MAX(WINDEX - 1,1) + WINDPL = MIN(WINDEX + 1,M) + LAMBDA = WORK( WINDEX ) + DONE = DONE + 1 +* Check if eigenvector computation is to be skipped + IF((WINDEX.LT.DOL).OR. + $ (WINDEX.GT.DOU)) THEN + ESKIP = .TRUE. + GOTO 125 + ELSE + ESKIP = .FALSE. + ENDIF + LEFT = WORK( WINDEX ) - WERR( WINDEX ) + RIGHT = WORK( WINDEX ) + WERR( WINDEX ) + INDEIG = INDEXW( WINDEX ) +* Note that since we compute the eigenpairs for a child, +* all eigenvalue approximations are w.r.t the same shift. +* In this case, the entries in WORK should be used for +* computing the gaps since they exhibit even very small +* differences in the eigenvalues, as opposed to the +* entries in W which might "look" the same. + + IF( K .EQ. 1) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VL, the formula +* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) +* can lead to an overestimation of the left gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small left gap. + LGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + LGAP = WGAP(WINDMN) + ENDIF + IF( K .EQ. IM) THEN +* In the case RANGE='I' and with not much initial +* accuracy in LAMBDA and VU, the formula +* can lead to an overestimation of the right gap and +* thus to inadequately early RQI 'convergence'. +* Prevent this by forcing a small right gap. + RGAP = EPS*MAX(ABS(LEFT),ABS(RIGHT)) + ELSE + RGAP = WGAP(WINDEX) + ENDIF + GAP = MIN( LGAP, RGAP ) + IF(( K .EQ. 1).OR.(K .EQ. IM)) THEN +* The eigenvector support can become wrong +* because significant entries could be cut off due to a +* large GAPTOL parameter in LAR1V. Prevent this. + GAPTOL = ZERO + ELSE + GAPTOL = GAP * EPS + ENDIF + ISUPMN = IN + ISUPMX = 1 +* Update WGAP so that it holds the minimum gap +* to the left or the right. This is crucial in the +* case where bisection is used to ensure that the +* eigenvalue is refined up to the required precision. +* The correct value is restored afterwards. + SAVGAP = WGAP(WINDEX) + WGAP(WINDEX) = GAP +* We want to use the Rayleigh Quotient Correction +* as often as possible since it converges quadratically +* when we are close enough to the desired eigenvalue. +* However, the Rayleigh Quotient can have the wrong sign +* and lead us away from the desired eigenvalue. In this +* case, the best we can do is to use bisection. + USEDBS = .FALSE. + USEDRQ = .FALSE. +* Bisection is initially turned off unless it is forced + NEEDBS = .NOT.TRYRQC + 120 CONTINUE +* Check if bisection should be used to refine eigenvalue + IF(NEEDBS) THEN +* Take the bisection as new iterate + USEDBS = .TRUE. + ITMP1 = IWORK( IINDR+WINDEX ) + OFFSET = INDEXW( WBEGIN ) - 1 + CALL DLARRB( IN, D(IBEGIN), + $ WORK(INDLLD+IBEGIN-1),INDEIG,INDEIG, + $ ZERO, TWO*EPS, OFFSET, + $ WORK(WBEGIN),WGAP(WBEGIN), + $ WERR(WBEGIN),WORK( INDWRK ), + $ IWORK( IINDWK ), PIVMIN, SPDIAM, + $ ITMP1, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = -3 + RETURN + ENDIF + LAMBDA = WORK( WINDEX ) +* Reset twist index from inaccurate LAMBDA to +* force computation of true MINGMA + IWORK( IINDR+WINDEX ) = 0 + ENDIF +* Given LAMBDA, compute the eigenvector. + CALL ZLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ), + $ L( IBEGIN ), WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + IF(ITER .EQ. 0) THEN + BSTRES = RESID + BSTW = LAMBDA + ELSEIF(RESID.LT.BSTRES) THEN + BSTRES = RESID + BSTW = LAMBDA + ENDIF + ISUPMN = MIN(ISUPMN,ISUPPZ( 2*WINDEX-1 )) + ISUPMX = MAX(ISUPMX,ISUPPZ( 2*WINDEX )) + ITER = ITER + 1 + +* sin alpha <= |resid|/gap +* Note that both the residual and the gap are +* proportional to the matrix, so ||T|| doesn't play +* a role in the quotient + +* +* Convergence test for Rayleigh-Quotient iteration +* (omitted when Bisection has been used) +* + IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT. + $ RQTOL*ABS( LAMBDA ) .AND. .NOT. USEDBS) + $ THEN +* We need to check that the RQCORR update doesn't +* move the eigenvalue away from the desired one and +* towards a neighbor. -> protection with bisection + IF(INDEIG.LE.NEGCNT) THEN +* The wanted eigenvalue lies to the left + SGNDEF = -ONE + ELSE +* The wanted eigenvalue lies to the right + SGNDEF = ONE + ENDIF +* We only use the RQCORR if it improves the +* the iterate reasonably. + IF( ( RQCORR*SGNDEF.GE.ZERO ) + $ .AND.( LAMBDA + RQCORR.LE. RIGHT) + $ .AND.( LAMBDA + RQCORR.GE. LEFT) + $ ) THEN + USEDRQ = .TRUE. +* Store new midpoint of bisection interval in WORK + IF(SGNDEF.EQ.ONE) THEN +* The current LAMBDA is on the left of the true +* eigenvalue + LEFT = LAMBDA +* We prefer to assume that the error estimate +* is correct. We could make the interval not +* as a bracket but to be modified if the RQCORR +* chooses to. In this case, the RIGHT side should +* be modified as follows: +* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) + ELSE +* The current LAMBDA is on the right of the true +* eigenvalue + RIGHT = LAMBDA +* See comment about assuming the error estimate is +* correct above. +* LEFT = MIN(LEFT, LAMBDA + RQCORR) + ENDIF + WORK( WINDEX ) = + $ HALF * (RIGHT + LEFT) +* Take RQCORR since it has the correct sign and +* improves the iterate reasonably + LAMBDA = LAMBDA + RQCORR +* Update width of error interval + WERR( WINDEX ) = + $ HALF * (RIGHT-LEFT) + ELSE + NEEDBS = .TRUE. + ENDIF + IF(RIGHT-LEFT.LT.RQTOL*ABS(LAMBDA)) THEN +* The eigenvalue is computed to bisection accuracy +* compute eigenvector and stop + USEDBS = .TRUE. + GOTO 120 + ELSEIF( ITER.LT.MAXITR ) THEN + GOTO 120 + ELSEIF( ITER.EQ.MAXITR ) THEN + NEEDBS = .TRUE. + GOTO 120 + ELSE + INFO = 5 + RETURN + END IF + ELSE + STP2II = .FALSE. + IF(USEDRQ .AND. USEDBS .AND. + $ BSTRES.LE.RESID) THEN + LAMBDA = BSTW + STP2II = .TRUE. + ENDIF + IF (STP2II) THEN +* improve error angle by second step + CALL ZLAR1V( IN, 1, IN, LAMBDA, + $ D( IBEGIN ), L( IBEGIN ), + $ WORK(INDLD+IBEGIN-1), + $ WORK(INDLLD+IBEGIN-1), + $ PIVMIN, GAPTOL, Z( IBEGIN, WINDEX ), + $ .NOT.USEDBS, NEGCNT, ZTZ, MINGMA, + $ IWORK( IINDR+WINDEX ), + $ ISUPPZ( 2*WINDEX-1 ), + $ NRMINV, RESID, RQCORR, WORK( INDWRK ) ) + ENDIF + WORK( WINDEX ) = LAMBDA + END IF +* +* Compute FP-vector support w.r.t. whole matrix +* + ISUPPZ( 2*WINDEX-1 ) = ISUPPZ( 2*WINDEX-1 )+OLDIEN + ISUPPZ( 2*WINDEX ) = ISUPPZ( 2*WINDEX )+OLDIEN + ZFROM = ISUPPZ( 2*WINDEX-1 ) + ZTO = ISUPPZ( 2*WINDEX ) + ISUPMN = ISUPMN + OLDIEN + ISUPMX = ISUPMX + OLDIEN +* Ensure vector is ok if support in the RQI has changed + IF(ISUPMN.LT.ZFROM) THEN + DO 122 II = ISUPMN,ZFROM-1 + Z( II, WINDEX ) = ZERO + 122 CONTINUE + ENDIF + IF(ISUPMX.GT.ZTO) THEN + DO 123 II = ZTO+1,ISUPMX + Z( II, WINDEX ) = ZERO + 123 CONTINUE + ENDIF + CALL ZDSCAL( ZTO-ZFROM+1, NRMINV, + $ Z( ZFROM, WINDEX ), 1 ) + 125 CONTINUE +* Update W + W( WINDEX ) = LAMBDA+SIGMA +* Recompute the gaps on the left and right +* But only allow them to become larger and not +* smaller (which can only happen through "bad" +* cancellation and doesn't reflect the theory +* where the initial gaps are underestimated due +* to WERR being too crude.) + IF(.NOT.ESKIP) THEN + IF( K.GT.1) THEN + WGAP( WINDMN ) = MAX( WGAP(WINDMN), + $ W(WINDEX)-WERR(WINDEX) + $ - W(WINDMN)-WERR(WINDMN) ) + ENDIF + IF( WINDEX.LT.WEND ) THEN + WGAP( WINDEX ) = MAX( SAVGAP, + $ W( WINDPL )-WERR( WINDPL ) + $ - W( WINDEX )-WERR( WINDEX) ) + ENDIF + ENDIF + IDONE = IDONE + 1 + ENDIF +* here ends the code for the current child +* + 139 CONTINUE +* Proceed to any remaining child nodes + NEWFST = J + 1 + 140 CONTINUE + 150 CONTINUE + NDEPTH = NDEPTH + 1 + GO TO 40 + END IF + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 170 CONTINUE +* + + RETURN +* +* End of ZLARRV +* + END diff --git a/dspl/liblapack/SRC/zlarscl2.f b/dspl/liblapack/SRC/zlarscl2.f new file mode 100644 index 0000000..0d7cb1e --- /dev/null +++ b/dspl/liblapack/SRC/zlarscl2.f @@ -0,0 +1,122 @@ +*> \brief \b ZLARSCL2 performs reciprocal diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARSCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARSCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* COMPLEX*16 X( LDX, * ) +* DOUBLE PRECISION D( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARSCL2 performs a reciprocal diagonal scaling on an vector: +*> x <-- inv(D) * x +*> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_zge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLARSCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + COMPLEX*16 X( LDX, * ) + DOUBLE PRECISION D( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) / D( I ) + END DO + END DO + + RETURN + END + diff --git a/dspl/liblapack/SRC/zlartg.f b/dspl/liblapack/SRC/zlartg.f new file mode 100644 index 0000000..8989bb8 --- /dev/null +++ b/dspl/liblapack/SRC/zlartg.f @@ -0,0 +1,250 @@ +*> \brief \b ZLARTG generates a plane rotation with real cosine and complex sine. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARTG + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARTG( F, G, CS, SN, R ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION CS +* COMPLEX*16 F, G, R, SN +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARTG generates a plane rotation so that +*> +*> [ CS SN ] [ F ] [ R ] +*> [ __ ] . [ ] = [ ] where CS**2 + |SN|**2 = 1. +*> [ -SN CS ] [ G ] [ 0 ] +*> +*> This is a faster version of the BLAS1 routine ZROTG, except for +*> the following differences: +*> F and G are unchanged on return. +*> If G=0, then CS=1 and SN=0. +*> If F=0, then CS=0 and SN is chosen so that R is real. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] F +*> \verbatim +*> F is COMPLEX*16 +*> The first component of vector to be rotated. +*> \endverbatim +*> +*> \param[in] G +*> \verbatim +*> G is COMPLEX*16 +*> The second component of vector to be rotated. +*> \endverbatim +*> +*> \param[out] CS +*> \verbatim +*> CS is DOUBLE PRECISION +*> The cosine of the rotation. +*> \endverbatim +*> +*> \param[out] SN +*> \verbatim +*> SN is COMPLEX*16 +*> The sine of the rotation. +*> \endverbatim +*> +*> \param[out] R +*> \verbatim +*> R is COMPLEX*16 +*> The nonzero component of the rotated vector. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel +*> +*> This version has a few statements commented out for thread safety +*> (machine parameters are computed on each entry). 10 feb 03, SJH. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARTG( F, G, CS, SN, R ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION CS + COMPLEX*16 F, G, R, SN +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION TWO, ONE, ZERO + PARAMETER ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. +* LOGICAL FIRST + INTEGER COUNT, I + DOUBLE PRECISION D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN, + $ SAFMN2, SAFMX2, SCALE + COMPLEX*16 FF, FS, GS +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY2 + LOGICAL DISNAN + EXTERNAL DLAMCH, DLAPY2, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG, + $ MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1, ABSSQ +* .. +* .. Statement Function definitions .. + ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) ) + ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2 +* .. +* .. Executable Statements .. +* + SAFMIN = DLAMCH( 'S' ) + EPS = DLAMCH( 'E' ) + SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) / + $ LOG( DLAMCH( 'B' ) ) / TWO ) + SAFMX2 = ONE / SAFMN2 + SCALE = MAX( ABS1( F ), ABS1( G ) ) + FS = F + GS = G + COUNT = 0 + IF( SCALE.GE.SAFMX2 ) THEN + 10 CONTINUE + COUNT = COUNT + 1 + FS = FS*SAFMN2 + GS = GS*SAFMN2 + SCALE = SCALE*SAFMN2 + IF( SCALE.GE.SAFMX2 ) + $ GO TO 10 + ELSE IF( SCALE.LE.SAFMN2 ) THEN + IF( G.EQ.CZERO.OR.DISNAN( ABS( G ) ) ) THEN + CS = ONE + SN = CZERO + R = F + RETURN + END IF + 20 CONTINUE + COUNT = COUNT - 1 + FS = FS*SAFMX2 + GS = GS*SAFMX2 + SCALE = SCALE*SAFMX2 + IF( SCALE.LE.SAFMN2 ) + $ GO TO 20 + END IF + F2 = ABSSQ( FS ) + G2 = ABSSQ( GS ) + IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN +* +* This is a rare case: F is very small. +* + IF( F.EQ.CZERO ) THEN + CS = ZERO + R = DLAPY2( DBLE( G ), DIMAG( G ) ) +* Do complex/real division explicitly with two real divisions + D = DLAPY2( DBLE( GS ), DIMAG( GS ) ) + SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D ) + RETURN + END IF + F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) ) +* G2 and G2S are accurate +* G2 is at least SAFMIN, and G2S is at least SAFMN2 + G2S = SQRT( G2 ) +* Error in CS from underflow in F2S is at most +* UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS +* If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN, +* and so CS .lt. sqrt(SAFMIN) +* If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN +* and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS) +* Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S + CS = F2S / G2S +* Make sure abs(FF) = 1 +* Do complex/real division explicitly with 2 real divisions + IF( ABS1( F ).GT.ONE ) THEN + D = DLAPY2( DBLE( F ), DIMAG( F ) ) + FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D ) + ELSE + DR = SAFMX2*DBLE( F ) + DI = SAFMX2*DIMAG( F ) + D = DLAPY2( DR, DI ) + FF = DCMPLX( DR / D, DI / D ) + END IF + SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S ) + R = CS*F + SN*G + ELSE +* +* This is the most common case. +* Neither F2 nor F2/G2 are less than SAFMIN +* F2S cannot overflow, and it is accurate +* + F2S = SQRT( ONE+G2 / F2 ) +* Do the F2S(real)*FS(complex) multiply with two real multiplies + R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) ) + CS = ONE / F2S + D = F2 + G2 +* Do complex/real division explicitly with two real divisions + SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D ) + SN = SN*DCONJG( GS ) + IF( COUNT.NE.0 ) THEN + IF( COUNT.GT.0 ) THEN + DO 30 I = 1, COUNT + R = R*SAFMX2 + 30 CONTINUE + ELSE + DO 40 I = 1, -COUNT + R = R*SAFMN2 + 40 CONTINUE + END IF + END IF + END IF + RETURN +* +* End of ZLARTG +* + END diff --git a/dspl/liblapack/SRC/zlartv.f b/dspl/liblapack/SRC/zlartv.f new file mode 100644 index 0000000..d6c7ee7 --- /dev/null +++ b/dspl/liblapack/SRC/zlartv.f @@ -0,0 +1,150 @@ +*> \brief \b ZLARTV applies a vector of plane rotations with real cosines and complex sines to the elements of a pair of vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARTV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* .. Scalar Arguments .. +* INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ) +* COMPLEX*16 S( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARTV applies a vector of complex plane rotations with real cosines +*> to elements of the complex vectors x and y. For i = 1,2,...,n +*> +*> ( x(i) ) := ( c(i) s(i) ) ( x(i) ) +*> ( y(i) ) ( -conjg(s(i)) c(i) ) ( y(i) ) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of plane rotations to be applied. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (1+(N-1)*INCX) +*> The vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension (1+(N-1)*INCY) +*> The vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between elements of Y. INCY > 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension (1+(N-1)*INCC) +*> The cosines of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX*16 array, dimension (1+(N-1)*INCC) +*> The sines of the plane rotations. +*> \endverbatim +*> +*> \param[in] INCC +*> \verbatim +*> INCC is INTEGER +*> The increment between elements of C and S. INCC > 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARTV( N, X, INCX, Y, INCY, C, S, INCC ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCC, INCX, INCY, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ) + COMPLEX*16 S( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IC, IX, IY + COMPLEX*16 XI, YI +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IX = 1 + IY = 1 + IC = 1 + DO 10 I = 1, N + XI = X( IX ) + YI = Y( IY ) + X( IX ) = C( IC )*XI + S( IC )*YI + Y( IY ) = C( IC )*YI - DCONJG( S( IC ) )*XI + IX = IX + INCX + IY = IY + INCY + IC = IC + INCC + 10 CONTINUE + RETURN +* +* End of ZLARTV +* + END diff --git a/dspl/liblapack/SRC/zlarz.f b/dspl/liblapack/SRC/zlarz.f new file mode 100644 index 0000000..5c28cee --- /dev/null +++ b/dspl/liblapack/SRC/zlarz.f @@ -0,0 +1,241 @@ +*> \brief \b ZLARZ applies an elementary reflector (as returned by stzrzf) to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, L, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARZ applies a complex elementary reflector H to a complex +*> M-by-N matrix C, from either the left or the right. H is represented +*> in the form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H (the conjugate transpose of H), supply conjg(tau) instead +*> tau. +*> +*> H is a product of k elementary reflectors as returned by ZTZRZF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of entries of the vector V containing +*> the meaningful part of the Householder vectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (1+(L-1)*abs(INCV)) +*> The vector v in the representation of H as returned by +*> ZTZRZF. V is not used if TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, L, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:n ) = conjg( C( 1, 1:n ) ) +* + CALL ZCOPY( N, C, LDC, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) +* +* w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )**H * v( 1:l ) ) +* + CALL ZGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ), + $ LDC, V, INCV, ONE, WORK, 1 ) + CALL ZLACGV( N, WORK, 1 ) +* +* C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) +* + CALL ZAXPY( N, -TAU, WORK, 1, C, LDC ) +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* tau * v( 1:l ) * w( 1:n )**H +* + CALL ZGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ), + $ LDC ) + END IF +* + ELSE +* +* Form C * H +* + IF( TAU.NE.ZERO ) THEN +* +* w( 1:m ) = C( 1:m, 1 ) +* + CALL ZCOPY( M, C, 1, WORK, 1 ) +* +* w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) +* + CALL ZGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC, + $ V, INCV, ONE, WORK, 1 ) +* +* C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) +* + CALL ZAXPY( M, -TAU, WORK, 1, C, 1 ) +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* tau * w( 1:m ) * v( 1:l )**H +* + CALL ZGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ), + $ LDC ) +* + END IF +* + END IF +* + RETURN +* +* End of ZLARZ +* + END diff --git a/dspl/liblapack/SRC/zlarzb.f b/dspl/liblapack/SRC/zlarzb.f new file mode 100644 index 0000000..e27c7e0 --- /dev/null +++ b/dspl/liblapack/SRC/zlarzb.f @@ -0,0 +1,337 @@ +*> \brief \b ZLARZB applies a block reflector or its conjugate-transpose to a general matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARZB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, +* LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARZB applies a complex block reflector H or its transpose H**H +*> to a complex distributed M-by-N C from the left or the right. +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise (not supported yet) +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix V containing the +*> meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,NV). +*> If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V, + $ LDV, T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, INFO, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLARZB', -INFO ) + RETURN + END IF +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C +* +* W( 1:n, 1:k ) = C( 1:k, 1:n )**H +* + DO 10 J = 1, K + CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... +* C( m-l+1:m, 1:n )**H * V( 1:k, 1:l )**T +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'Transpose', 'Conjugate transpose', N, K, L, + $ ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, + $ LDWORK ) +* +* W( 1:n, 1:k ) = W( 1:n, 1:k ) * T**T or W( 1:m, 1:k ) * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T, + $ LDT, WORK, LDWORK ) +* +* C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )**H +* + DO 30 J = 1, N + DO 20 I = 1, K + C( I, J ) = C( I, J ) - WORK( J, I ) + 20 CONTINUE + 30 CONTINUE +* +* C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... +* V( 1:k, 1:l )**H * W( 1:n, 1:k )**H +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV, + $ WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC ) +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H +* +* W( 1:m, 1:k ) = C( 1:m, 1:k ) +* + DO 40 J = 1, K + CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... +* C( 1:m, n-l+1:n ) * V( 1:k, 1:l )**H +* + IF( L.GT.0 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', M, K, L, ONE, + $ C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK ) +* +* W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T ) or +* W( 1:m, 1:k ) * T**H +* + DO 50 J = 1, K + CALL ZLACGV( K-J+1, T( J, J ), 1 ) + 50 CONTINUE + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T, + $ LDT, WORK, LDWORK ) + DO 60 J = 1, K + CALL ZLACGV( K-J+1, T( J, J ), 1 ) + 60 CONTINUE +* +* C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) +* + DO 80 J = 1, K + DO 70 I = 1, M + C( I, J ) = C( I, J ) - WORK( I, J ) + 70 CONTINUE + 80 CONTINUE +* +* C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... +* W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) +* + DO 90 J = 1, L + CALL ZLACGV( K, V( 1, J ), 1 ) + 90 CONTINUE + IF( L.GT.0 ) + $ CALL ZGEMM( 'No transpose', 'No transpose', M, L, K, -ONE, + $ WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC ) + DO 100 J = 1, L + CALL ZLACGV( K, V( 1, J ), 1 ) + 100 CONTINUE +* + END IF +* + RETURN +* +* End of ZLARZB +* + END diff --git a/dspl/liblapack/SRC/zlarzt.f b/dspl/liblapack/SRC/zlarzt.f new file mode 100644 index 0000000..ccd8c4b --- /dev/null +++ b/dspl/liblapack/SRC/zlarzt.f @@ -0,0 +1,266 @@ +*> \brief \b ZLARZT forms the triangular factor T of a block reflector H = I - vtvH. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARZT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARZT forms the triangular factor T of a complex block reflector +*> H of order > n, which is defined as a product of k elementary +*> reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> +*> Currently, only STOREV = 'R' and DIRECT = 'B' are supported. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise (not supported yet) +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> ______V_____ +*> ( v1 v2 v3 ) / \ +*> ( v1 v2 v3 ) ( v1 v1 v1 v1 v1 . . . . 1 ) +*> V = ( v1 v2 v3 ) ( v2 v2 v2 v2 v2 . . . 1 ) +*> ( v1 v2 v3 ) ( v3 v3 v3 v3 v3 . . 1 ) +*> ( v1 v2 v3 ) +*> . . . +*> . . . +*> 1 . . +*> 1 . +*> 1 +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> ______V_____ +*> 1 / \ +*> . 1 ( 1 . . . . v1 v1 v1 v1 v1 ) +*> . . 1 ( . 1 . . . v2 v2 v2 v2 v2 ) +*> . . . ( . . 1 . . v3 v3 v3 v3 v3 ) +*> . . . +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> V = ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZLACGV, ZTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Check for currently supported options +* + INFO = 0 + IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLARZT', -INFO ) + RETURN + END IF +* + DO 20 I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO 10 J = I, K + T( J, I ) = ZERO + 10 CONTINUE + ELSE +* +* general case +* + IF( I.LT.K ) THEN +* +* T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)**H +* + CALL ZLACGV( N, V( I, 1 ), LDV ) + CALL ZGEMV( 'No transpose', K-I, N, -TAU( I ), + $ V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO, + $ T( I+1, I ), 1 ) + CALL ZLACGV( N, V( I, 1 ), LDV ) +* +* T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + END IF + T( I, I ) = TAU( I ) + END IF + 20 CONTINUE + RETURN +* +* End of ZLARZT +* + END diff --git a/dspl/liblapack/SRC/zlascl.f b/dspl/liblapack/SRC/zlascl.f new file mode 100644 index 0000000..c53c6f5 --- /dev/null +++ b/dspl/liblapack/SRC/zlascl.f @@ -0,0 +1,368 @@ +*> \brief \b ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASCL multiplies the M by N complex matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See ZGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is DOUBLE PRECISION +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If TYPE = 'G', 'L', 'U', 'H', LDA >= max(1,M); +*> TYPE = 'B', LDA >= KL+1; +*> TYPE = 'Q', LDA >= KU+1; +*> TYPE = 'Z', LDA >= 2*KL+KU+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. DISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( DISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of ZLASCL +* + END diff --git a/dspl/liblapack/SRC/zlascl2.f b/dspl/liblapack/SRC/zlascl2.f new file mode 100644 index 0000000..e1a0f97 --- /dev/null +++ b/dspl/liblapack/SRC/zlascl2.f @@ -0,0 +1,122 @@ +*> \brief \b ZLASCL2 performs diagonal scaling on a vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASCL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASCL2 ( M, N, D, X, LDX ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDX +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASCL2 performs a diagonal scaling on a vector: +*> x <-- D * x +*> where the DOUBLE PRECISION diagonal matrix D is stored as a vector. +*> +*> Eventually to be replaced by BLAS_zge_diag_scale in the new BLAS +*> standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of D and X. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of X. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, length M +*> Diagonal matrix D, stored as a vector of length M. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,N) +*> On entry, the vector X to be scaled by D. +*> On exit, the scaled vector. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the vector X. LDX >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZLASCL2 ( M, N, D, X, LDX ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER M, N, LDX +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* + DO J = 1, N + DO I = 1, M + X( I, J ) = X( I, J ) * D( I ) + END DO + END DO + + RETURN + END + diff --git a/dspl/liblapack/SRC/zlaset.f b/dspl/liblapack/SRC/zlaset.f new file mode 100644 index 0000000..7966782 --- /dev/null +++ b/dspl/liblapack/SRC/zlaset.f @@ -0,0 +1,184 @@ +*> \brief \b ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASET + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASET initializes a 2-D array A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set. The lower triangle +*> is unchanged. +*> = 'L': Lower triangular part is set. The upper triangle +*> is unchanged. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of A. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> All the offdiagonal array elements are set to ALPHA. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> All the diagonal array elements are set to BETA. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +*> A(i,i) = BETA , 1 <= i <= min(m,n) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of ZLASET +* + END diff --git a/dspl/liblapack/SRC/zlasr.f b/dspl/liblapack/SRC/zlasr.f new file mode 100644 index 0000000..69891ba --- /dev/null +++ b/dspl/liblapack/SRC/zlasr.f @@ -0,0 +1,439 @@ +*> \brief \b ZLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), S( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASR applies a sequence of real plane rotations to a complex matrix +*> A, from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP + COMPLEX*16 TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZLASR +* + END diff --git a/dspl/liblapack/SRC/zlassq.f b/dspl/liblapack/SRC/zlassq.f new file mode 100644 index 0000000..fd13811 --- /dev/null +++ b/dspl/liblapack/SRC/zlassq.f @@ -0,0 +1,168 @@ +*> \brief \b ZLASSQ updates a sum of squares represented in scaled form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASSQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASSQ returns the values scl and ssq such that +*> +*> ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +*> +*> where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +*> assumed to be at least unity and the value of ssq will then satisfy +*> +*> 1.0 .le. ssq .le. ( sumsq + 2*n ). +*> +*> scale is assumed to be non-negative and scl returns the value +*> +*> scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +*> i +*> +*> scale and sumsq must be supplied in SCALE and SUMSQ respectively. +*> SCALE and SUMSQ are overwritten by scl and ssq respectively. +*> +*> The routine makes only one pass through the vector X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements to be used from the vector X. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector x as described above. +*> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> INCX > 0. +*> \endverbatim +*> +*> \param[in,out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On entry, the value scale in the equation above. +*> On exit, SCALE is overwritten with the value scl . +*> \endverbatim +*> +*> \param[in,out] SUMSQ +*> \verbatim +*> SUMSQ is DOUBLE PRECISION +*> On entry, the value sumsq in the equation above. +*> On exit, SUMSQ is overwritten with the value ssq . +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION TEMP1 +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + TEMP1 = ABS( DBLE( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.DISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + TEMP1 = ABS( DIMAG( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.DISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of ZLASSQ +* + END diff --git a/dspl/liblapack/SRC/zlaswlq.f b/dspl/liblapack/SRC/zlaswlq.f new file mode 100644 index 0000000..24dd41d --- /dev/null +++ b/dspl/liblapack/SRC/zlaswlq.f @@ -0,0 +1,258 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASWLQ computes a blocked Short-Wide LQ factorization of a +*> M-by-N matrix A, where N >= M: +*> A = L * Q +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> M >= MB >= 1 +*> \endverbatim +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> NB > M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and below the diagonal +*> of the array contain the N-by-N lower triangular matrix L; +*> the elements above the diagonal represent Q by the rows +*> of blocked V (see Further Details). +*> +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((N-M)/(NB-M)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> +*> \endverbatim +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= MB*M. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> +*> \endverbatim +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A: +*> Q(1) zeros out the upper diagonal entries of rows 1:NB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A +*> . . . +*> +*> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GELQT. +*> +*> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors +*> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLASWLQ( M, N, MB, NB, A, LDA, T, LDT, WORK, LWORK, + $ INFO) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LWORK, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ), T( LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL ZGELQT, ZTPLQT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.LT.M ) THEN + INFO = -2 + ELSE IF( MB.LT.1 .OR. ( MB.GT.M .AND. M.GT.0 )) THEN + INFO = -3 + ELSE IF( NB.LE.M ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.MB ) THEN + INFO = -8 + ELSE IF( ( LWORK.LT.M*MB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = MB*M + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASWLQ', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The LQ Decomposition +* + IF((M.GE.N).OR.(NB.LE.M).OR.(NB.GE.N)) THEN + CALL ZGELQT( M, N, MB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF +* + KK = MOD((N-M),(NB-M)) + II=N-KK+1 +* +* Compute the LQ factorization of the first block A(1:M,1:NB) +* + CALL ZGELQT( M, NB, MB, A(1,1), LDA, T, LDT, WORK, INFO) + CTR = 1 +* + DO I = NB+1, II-NB+M , (NB-M) +* +* Compute the QR factorization of the current block A(1:M,I:I+NB-M) +* + CALL ZTPLQT( M, NB-M, 0, MB, A(1,1), LDA, A( 1, I ), + $ LDA, T(1, CTR * M + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(1:M,II:N) +* + IF (II.LE.N) THEN + CALL ZTPLQT( M, KK, 0, MB, A(1,1), LDA, A( 1, II ), + $ LDA, T(1, CTR * M + 1), LDT, + $ WORK, INFO ) + END IF +* + WORK( 1 ) = M * MB + RETURN +* +* End of ZLASWLQ +* + END diff --git a/dspl/liblapack/SRC/zlaswp.f b/dspl/liblapack/SRC/zlaswp.f new file mode 100644 index 0000000..13c0f9b --- /dev/null +++ b/dspl/liblapack/SRC/zlaswp.f @@ -0,0 +1,193 @@ +*> \brief \b ZLASWP performs a series of row interchanges on a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASWP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASWP performs a series of row interchanges on the matrix A. +*> One row interchange is initiated for each of rows K1 through K2 of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the matrix of column dimension N to which the row +*> interchanges will be applied. +*> On exit, the permuted matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> \endverbatim +*> +*> \param[in] K1 +*> \verbatim +*> K1 is INTEGER +*> The first element of IPIV for which a row interchange will +*> be done. +*> \endverbatim +*> +*> \param[in] K2 +*> \verbatim +*> K2 is INTEGER +*> (K2-K1+1) is the number of elements of IPIV for which a row +*> interchange will be done. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (K1+(K2-K1)*abs(INCX)) +*> The vector of pivot indices. Only the elements in positions +*> K1 through K1+(K2-K1)*abs(INCX) of IPIV are accessed. +*> IPIV(K1+(K-K1)*abs(INCX)) = L implies rows K and L are to be +*> interchanged. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of IPIV. If INCX +*> is negative, the pivots are applied in reverse order. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Modified by +*> R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLASWP( N, A, LDA, K1, K2, IPIV, INCX ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INCX, K1, K2, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, I1, I2, INC, IP, IX, IX0, J, K, N32 + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* +* Interchange row I with row IPIV(K1+(I-K1)*abs(INCX)) for each of rows +* K1 through K2. +* + IF( INCX.GT.0 ) THEN + IX0 = K1 + I1 = K1 + I2 = K2 + INC = 1 + ELSE IF( INCX.LT.0 ) THEN + IX0 = K1 + ( K1-K2 )*INCX + I1 = K2 + I2 = K1 + INC = -1 + ELSE + RETURN + END IF +* + N32 = ( N / 32 )*32 + IF( N32.NE.0 ) THEN + DO 30 J = 1, N32, 32 + IX = IX0 + DO 20 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 10 K = J, J + 31 + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 10 CONTINUE + END IF + IX = IX + INCX + 20 CONTINUE + 30 CONTINUE + END IF + IF( N32.NE.N ) THEN + N32 = N32 + 1 + IX = IX0 + DO 50 I = I1, I2, INC + IP = IPIV( IX ) + IF( IP.NE.I ) THEN + DO 40 K = N32, N + TEMP = A( I, K ) + A( I, K ) = A( IP, K ) + A( IP, K ) = TEMP + 40 CONTINUE + END IF + IX = IX + INCX + 50 CONTINUE + END IF +* + RETURN +* +* End of ZLASWP +* + END diff --git a/dspl/liblapack/SRC/zlasyf.f b/dspl/liblapack/SRC/zlasyf.f new file mode 100644 index 0000000..b0f48fb --- /dev/null +++ b/dspl/liblapack/SRC/zlasyf.f @@ -0,0 +1,830 @@ +*> \brief \b ZLASYF computes a partial factorization of a complex symmetric matrix using the Bunch-Kaufman diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASYF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASYF computes a partial factorization of a complex symmetric matrix +*> A using the Bunch-Kaufman diagonal pivoting method. The partial +*> factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> Note that U**T denotes the transpose of U. +*> +*> ZLASYF is an auxiliary routine called by ZSYTRF. It uses blocked code +*> (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or +*> A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP, + $ KSTEP, KW + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZGEMV, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* +* KW is the column of W which corresponds to column K of A +* + K = N + 10 CONTINUE + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA, + $ W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in + +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KKW of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K-1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + IF( KP.GT.1 ) + $ CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last K+1 to N columns of A +* (columns K (or K and K-1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in last KKW to NB columns of W. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column kw of W now holds +* +* W(kw) = U(k)*D(k), +* +* where U(k) is the k-th column of U +* +* Store subdiag. elements of column U(k) +* and 1-by-1 block D(k) in column k of A. +* NOTE: Diagonal element U(k,k) is a UNIT element +* and not stored. +* A(k,k) := D(k,k) = W(k,kw) +* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) +* + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + R1 = CONE / A( K, K ) + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) +* + ELSE +* +* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold +* +* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 +* block D(k-1:k,k-1:k) in columns k-1 and k of A. +* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT +* block and not stored. +* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) +* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = +* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) +* + IF( K.GT.2 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K-1, KW ) + D11 = W( K, KW ) / D21 + D22 = W( K-1, KW-1 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* +* Update elements in columns A(k-1) and A(k) as +* dot products of rows of ( W(kw-1) W(kw) ) and columns +* of D**(-1) +* + DO 20 J = 1, K - 2 + A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) ) + A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, + $ -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n looping backwards from k+1 to n +* + J = K + 1 + 60 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J + 1 + END IF +* (NOTE: Here, J is used to determine row length. Length N-J+1 +* of the rows to swap back doesn't include diagonal element) + J = J + 1 + IF( JP.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA ) + IF( J.LT.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* +* Copy column K of A to column K of W and update it +* + CALL ZCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA, + $ W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in + +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 ) + CALL ZCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ), + $ 1 ) + CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ), + $ 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* +* ============================================================ +* +* KK is the column of A where pivoting step stopped +* + KK = K + KSTEP - 1 +* +* Interchange rows and columns KP and KK. +* Updated column KP is already stored in column KK of W. +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP of submatrix A +* at step K. No need to copy element into column K +* (or K and K+1 for 2-by-2 pivot) of A, since these columns +* will be later overwritten. +* + A( KP, KP ) = A( KK, KK ) + CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + IF( KP.LT.N ) + $ CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) +* +* Interchange rows KK and KP in first K-1 columns of A +* (columns K (or K and K+1 for 2-by-2 pivot) of A will be +* later overwritten). Interchange rows KK and KP +* in first KK columns of W. +* + IF( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k), +* +* where L(k) is the k-th column of L +* +* Store subdiag. elements of column L(k) +* and 1-by-1 block D(k) in column k of A. +* (NOTE: Diagonal element L(k,k) is a UNIT element +* and not stored) +* A(k,k) := D(k,k) = W(k,k) +* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) +* + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 +* block D(k:k+1,k:k+1) in columns k and k+1 of A. +* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT +* block and not stored) +* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) +* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = +* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) +* + IF( K.LT.N-1 ) THEN +* +* Compose the columns of the inverse of 2-by-2 pivot +* block D in the following way to reduce the number +* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by +* this inverse +* +* D**(-1) = ( d11 d21 )**(-1) = +* ( d21 d22 ) +* +* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = +* ( (-d21 ) ( d11 ) ) +* +* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * +* +* * ( ( d22/d21 ) ( -1 ) ) = +* ( ( -1 ) ( d11/d21 ) ) +* +* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = +* ( ( -1 ) ( D22 ) ) +* +* = 1/d21 * T * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* +* = D21 * ( ( D11 ) ( -1 ) ) +* ( ( -1 ) ( D22 ) ) +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* +* Update elements in columns A(k) and A(k+1) as +* dot products of rows of ( W(k) W(k+1) ) and columns +* of D**(-1) +* + DO 80 J = K + 2, N + A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) ) + A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) +* + END IF +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* of rows in columns 1:k-1 looping backwards from k-1 to 1 +* + J = K - 1 + 120 CONTINUE +* +* Undo the interchanges (if any) of rows JJ and JP at each +* step J +* +* (Here, J is a diagonal index) + JJ = J + JP = IPIV( J ) + IF( JP.LT.0 ) THEN + JP = -JP +* (Here, J is a diagonal index) + J = J - 1 + END IF +* (NOTE: Here, J is used to determine row length. Length J +* of the rows to swap back doesn't include diagonal element) + J = J - 1 + IF( JP.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GT.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLASYF +* + END diff --git a/dspl/liblapack/SRC/zlasyf_aa.f b/dspl/liblapack/SRC/zlasyf_aa.f new file mode 100644 index 0000000..f321b72 --- /dev/null +++ b/dspl/liblapack/SRC/zlasyf_aa.f @@ -0,0 +1,493 @@ +*> \brief \b ZLASYF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASYF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, +* H, LDH, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER J1, M, NB, LDA, LDH +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLATRF_AA factorizes a panel of a complex symmetric matrix A using +*> the Aasen's algorithm. The panel consists of a set of NB rows of A +*> when UPLO is U, or a set of NB columns when UPLO is L. +*> +*> In order to factorize the panel, the Aasen's algorithm requires the +*> last row, or column, of the previous panel. The first row, or column, +*> of A is set to be the first row, or column, of an identity matrix, +*> which is used to factorize the first panel. +*> +*> The resulting J-th row of U, or J-th column of L, is stored in the +*> (J-1)-th row, or column, of A (without the unit diagonals), while +*> the diagonal and subdiagonal of A are overwritten by those of T. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The location of the first row, or column, of the panel +*> within the submatrix of A, passed to this routine, e.g., +*> when called by ZSYTRF_AA, for the first panel, J1 is 1, +*> while for the remaining panels, J1 is 2. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The dimension of the submatrix. M >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The dimension of the panel to be facotorized. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) for +*> the first panel, while dimension (LDA,M+1) for the +*> remaining panels. +*> +*> On entry, A contains the last row, or column, of +*> the previous panel, and the trailing submatrix of A +*> to be factorized, except for the first panel, only +*> the panel is passed. +*> +*> On exit, the leading panel is factorized. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (M) +*> Details of the row and column interchanges, +*> the row and column k were interchanged with the row and +*> column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] H +*> \verbatim +*> H is COMPLEX*16 workspace, dimension (LDH,NB). +*> +*> \endverbatim +*> +*> \param[in] LDH +*> \verbatim +*> LDH is INTEGER +*> The leading dimension of the workspace H. LDH >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace, dimension (M). +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZLASYF_AA( UPLO, J1, M, NB, A, LDA, IPIV, + $ H, LDH, WORK ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER M, NB, J1, LDA, LDH +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), H( LDH, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + INTEGER J, K, K1, I1, I2, MJ + COMPLEX*16 PIV, ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX, ILAENV + EXTERNAL LSAME, ILAENV, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZAXPY, ZSCAL, ZCOPY, ZSWAP, ZLASET, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + J = 1 +* +* K1 is the first column of the panel to be factorized +* i.e., K1 is 2 for the first block column, and 1 for the rest of the blocks +* + K1 = (2-J1)+1 +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* ..................................................... +* Factorize A as U**T*D*U using the upper triangle of A +* ..................................................... +* + 10 CONTINUE + IF ( J.GT.MIN(M, NB) ) + $ GO TO 20 +* +* K is the column to be factorized +* when being called from ZSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J, J:M) - H(J:M, 1:(J-1)) * L(J1:(J-1), J), +* where H(J:M, J) has been initialized to be A(J, J:M) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( 1, J ), 1, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(i:M, i) into WORK +* + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J-1, J:M) * T(J-1,J), +* where A(J-1, J) stores T(J-1, J) and A(J-2, J:M) stores U(J-1, J:M) +* + ALPHA = -A( K-1, J ) + CALL ZAXPY( MJ, ALPHA, A( K-2, J ), LDA, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( K, J ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L(J, (J+1):M) +* where A(J, J) stores T(J, J) and A(J-1, (J+1):M) stores U(J, (J+1):M) +* + IF( K.GT.1 ) THEN + ALPHA = -A( K, J ) + CALL ZAXPY( M-J, ALPHA, A( K-1, J+1 ), LDA, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1, I1+1:M) with A(I1+1:M, I2) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( J1+I1-1, I1+1 ), LDA, + $ A( J1+I1, I2 ), 1 ) +* +* Swap A(I1, I2+1:M) with A(I2, I2+1:M) +* + CALL ZSWAP( M-I2, A( J1+I1-1, I2+1 ), LDA, + $ A( J1+I2-1, I2+1 ), LDA ) +* +* Swap A(I1, I1) with A(I2,I2) +* + PIV = A( I1+J1-1, I1 ) + A( J1+I1-1, I1 ) = A( J1+I2-1, I2 ) + A( J1+I2-1, I2 ) = PIV +* +* Swap H(I1, 1:J1) with H(I2, 1:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J, J+1) = T(J, J+1) +* + A( K, J+1 ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J:M, J), +* + CALL ZCOPY( M-J, A( K+1, J+1 ), LDA, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( K, J+1 ).NE.ZERO ) THEN + ALPHA = ONE / A( K, J+1 ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( K, J+2 ), LDA ) + CALL ZSCAL( M-J-1, ALPHA, A( K, J+2 ), LDA ) + ELSE + CALL ZLASET( 'Full', 1, M-J-1, ZERO, ZERO, + $ A( K, J+2 ), LDA) + END IF + END IF + J = J + 1 + GO TO 10 + 20 CONTINUE +* + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + 30 CONTINUE + IF( J.GT.MIN( M, NB ) ) + $ GO TO 40 +* +* K is the column to be factorized +* when being called from ZSYTRF_AA, +* > for the first block column, J1 is 1, hence J1+J-1 is J, +* > for the rest of the columns, J1 is 2, and J1+J-1 is J+1, +* + K = J1+J-1 + IF( J.EQ.M ) THEN +* +* Only need to compute T(J, J) +* + MJ = 1 + ELSE + MJ = M-J+1 + END IF +* +* H(J:M, J) := A(J:M, J) - H(J:M, 1:(J-1)) * L(J, J1:(J-1))^T, +* where H(J:M, J) has been initialized to be A(J:M, J) +* + IF( K.GT.2 ) THEN +* +* K is the column to be factorized +* > for the first block column, K is J, skipping the first two +* columns +* > for the rest of the columns, K is J+1, skipping only the +* first column +* + CALL ZGEMV( 'No transpose', MJ, J-K1, + $ -ONE, H( J, K1 ), LDH, + $ A( J, 1 ), LDA, + $ ONE, H( J, J ), 1 ) + END IF +* +* Copy H(J:M, J) into WORK +* + CALL ZCOPY( MJ, H( J, J ), 1, WORK( 1 ), 1 ) +* + IF( J.GT.K1 ) THEN +* +* Compute WORK := WORK - L(J:M, J-1) * T(J-1,J), +* where A(J-1, J) = T(J-1, J) and A(J, J-2) = L(J, J-1) +* + ALPHA = -A( J, K-1 ) + CALL ZAXPY( MJ, ALPHA, A( J, K-2 ), 1, WORK( 1 ), 1 ) + END IF +* +* Set A(J, J) = T(J, J) +* + A( J, K ) = WORK( 1 ) +* + IF( J.LT.M ) THEN +* +* Compute WORK(2:M) = T(J, J) L((J+1):M, J) +* where A(J, J) = T(J, J) and A((J+1):M, J-1) = L((J+1):M, J) +* + IF( K.GT.1 ) THEN + ALPHA = -A( J, K ) + CALL ZAXPY( M-J, ALPHA, A( J+1, K-1 ), 1, + $ WORK( 2 ), 1 ) + ENDIF +* +* Find max(|WORK(2:M)|) +* + I2 = IZAMAX( M-J, WORK( 2 ), 1 ) + 1 + PIV = WORK( I2 ) +* +* Apply symmetric pivot +* + IF( (I2.NE.2) .AND. (PIV.NE.0) ) THEN +* +* Swap WORK(I1) and WORK(I2) +* + I1 = 2 + WORK( I2 ) = WORK( I1 ) + WORK( I1 ) = PIV +* +* Swap A(I1+1:M, I1) with A(I2, I1+1:M) +* + I1 = I1+J-1 + I2 = I2+J-1 + CALL ZSWAP( I2-I1-1, A( I1+1, J1+I1-1 ), 1, + $ A( I2, J1+I1 ), LDA ) +* +* Swap A(I2+1:M, I1) with A(I2+1:M, I2) +* + CALL ZSWAP( M-I2, A( I2+1, J1+I1-1 ), 1, + $ A( I2+1, J1+I2-1 ), 1 ) +* +* Swap A(I1, I1) with A(I2, I2) +* + PIV = A( I1, J1+I1-1 ) + A( I1, J1+I1-1 ) = A( I2, J1+I2-1 ) + A( I2, J1+I2-1 ) = PIV +* +* Swap H(I1, I1:J1) with H(I2, I2:J1) +* + CALL ZSWAP( I1-1, H( I1, 1 ), LDH, H( I2, 1 ), LDH ) + IPIV( I1 ) = I2 +* + IF( I1.GT.(K1-1) ) THEN +* +* Swap L(1:I1-1, I1) with L(1:I1-1, I2), +* skipping the first column +* + CALL ZSWAP( I1-K1+1, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ELSE + IPIV( J+1 ) = J+1 + ENDIF +* +* Set A(J+1, J) = T(J+1, J) +* + A( J+1, K ) = WORK( 2 ) +* + IF( J.LT.NB ) THEN +* +* Copy A(J+1:M, J+1) into H(J+1:M, J), +* + CALL ZCOPY( M-J, A( J+1, K+1 ), 1, + $ H( J+1, J+1 ), 1 ) + END IF +* +* Compute L(J+2, J+1) = WORK( 3:M ) / T(J, J+1), +* where A(J, J+1) = T(J, J+1) and A(J+2:M, J) = L(J+2:M, J+1) +* + IF( A( J+1, K ).NE.ZERO ) THEN + ALPHA = ONE / A( J+1, K ) + CALL ZCOPY( M-J-1, WORK( 3 ), 1, A( J+2, K ), 1 ) + CALL ZSCAL( M-J-1, ALPHA, A( J+2, K ), 1 ) + ELSE + CALL ZLASET( 'Full', M-J-1, 1, ZERO, ZERO, + $ A( J+2, K ), LDA ) + END IF + END IF + J = J + 1 + GO TO 30 + 40 CONTINUE + END IF + RETURN +* +* End of ZLASYF_AA +* + END diff --git a/dspl/liblapack/SRC/zlasyf_rk.f b/dspl/liblapack/SRC/zlasyf_rk.f new file mode 100644 index 0000000..664ed93 --- /dev/null +++ b/dspl/liblapack/SRC/zlasyf_rk.f @@ -0,0 +1,974 @@ +*> \brief \b ZLASYF_RK computes a partial factorization of a complex symmetric indefinite matrix using bounded Bunch-Kaufman (rook) diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASYF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZLASYF_RK computes a partial factorization of a complex symmetric +*> matrix A using the bounded Bunch-Kaufman (rook) diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L', +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> ZLASYF_RK is an auxiliary routine called by ZSYTRF_RK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,N-KB+1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,N-KB+1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,N-KB+1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the submatrix A(1:N,1:KB). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the submatrix A(1:N,1:KB). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b) is always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLASYF_RK( UPLO, N, NB, KB, A, LDA, E, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, K, KK, KW, KKW, + $ KP, KSTEP, P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX, SFMIN, DTEMP + COMPLEX*16 D11, D12, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZGEMV, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(CABS1( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL ZCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL ZCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL ZSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL ZSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL ZSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = CONE / ( D11*D22-CONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy superdiagonal element of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = CZERO + A( K, K ) = W( K, KW ) + E( K ) = W( K-1, KW ) + E( K-1 ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), + $ LDW, CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL ZCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL ZCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = CABS1( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( CABS1( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL ZCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL ZCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL ZSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL ZSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL ZCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL ZSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy diagonal elements of D(K) to A, +* copy subdiagonal element of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = CZERO + A( K+1, K+1 ) = W( K+1, K+1 ) + E( K ) = W( K+1, K ) + E( K+1 ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), + $ LDW, CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF +* + RETURN +* +* End of ZLASYF_RK +* + END diff --git a/dspl/liblapack/SRC/zlasyf_rook.f b/dspl/liblapack/SRC/zlasyf_rook.f new file mode 100644 index 0000000..108c03e --- /dev/null +++ b/dspl/liblapack/SRC/zlasyf_rook.f @@ -0,0 +1,900 @@ +*> \brief \b ZLASYF_ROOK computes a partial factorization of a complex symmetric matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASYF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASYF_ROOK computes a partial factorization of a complex symmetric +*> matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. The partial factorization has the form: +*> +*> A = ( I U12 ) ( A11 0 ) ( I 0 ) if UPLO = 'U', or: +*> ( 0 U22 ) ( 0 D ) ( U12**T U22**T ) +*> +*> A = ( L11 0 ) ( D 0 ) ( L11**T L21**T ) if UPLO = 'L' +*> ( L21 I ) ( 0 A22 ) ( 0 I ) +*> +*> where the order of D is at most NB. The actual order is returned in +*> the argument KB, and is either NB or NB-1, or N if N <= NB. +*> +*> ZLASYF_ROOK is an auxiliary routine called by ZSYTRF_ROOK. It uses +*> blocked code (calling Level 3 BLAS) to update the submatrix +*> A11 (if UPLO = 'U') or A22 (if UPLO = 'L'). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The maximum number of columns of the matrix A that should be +*> factored. NB should be at least 2 to allow for 2-by-2 pivot +*> blocks. +*> \endverbatim +*> +*> \param[out] KB +*> \verbatim +*> KB is INTEGER +*> The number of columns of A that were actually factored. +*> KB is either NB-1 or NB, or N if N <= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, A contains details of the partial factorization. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> Only the last KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> Only the first KB elements of IPIV are set. +*> +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZLASYF_ROOK( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, + $ INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KB, LDA, LDW, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER IMAX, ITEMP, J, JB, JJ, JMAX, JP1, JP2, K, KK, + $ KW, KKW, KP, KSTEP, P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX, DTEMP, SFMIN + COMPLEX*16 D11, D12, D21, D22, R1, T, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZGEMV, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN, SQRT, DIMAG, DBLE +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Factorize the trailing columns of A using the upper triangle +* of A and working backwards, and compute the matrix W = U12*D +* for use in updating A11 +* +* K is the main loop index, decreasing from N in steps of 1 or 2 +* + K = N + 10 CONTINUE +* +* KW is the column of W which corresponds to column K of A +* + KW = NB + K - N +* +* Exit from loop +* + IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 ) + $ GO TO 30 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column KW of W and update it +* + CALL ZCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 ) + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), + $ LDA, W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, KW ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, W( 1, KW ), 1 ) + COLMAX = CABS1( W( IMAX, KW ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column KW-1 of W and update it +* + CALL ZCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 ) + CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA, + $ W( IMAX+1, KW-1 ), 1 ) +* + IF( K.LT.N ) + $ CALL ZGEMV( 'No transpose', K, N-K, -CONE, + $ A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW, + $ CONE, W( 1, KW-1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), + $ 1 ) + ROWMAX = CABS1( W( JMAX, KW-1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 ) + DTEMP = CABS1( W( ITEMP, KW-1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.(CABS1( W( IMAX, KW-1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column KW-1 of W to column KW of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K-1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* ============================================================ +* + KK = K - KSTEP + 1 +* +* KKW is the column of W which corresponds to column KK of A +* + KKW = NB + KK - N +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL ZCOPY( K-P, A( P+1, K ), 1, A( P, P+1 ), LDA ) + CALL ZCOPY( P, A( 1, K ), 1, A( 1, P ), 1 ) +* +* Interchange rows K and P in last N-K+1 columns of A +* and last N-K+2 columns of W +* + CALL ZSWAP( N-K+1, A( K, K ), LDA, A( P, K ), LDA ) + CALL ZSWAP( N-KK+1, W( K, KKW ), LDW, W( P, KKW ), LDW ) + END IF +* +* Updated column KP is already stored in column KKW of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + CALL ZCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 ) +* +* Interchange rows KK and KP in last N-KK+1 columns +* of A and W +* + CALL ZSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA ) + CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ), + $ LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column KW of W now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Store U(k) in column k of A +* + CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 ) + IF( K.GT.1 ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 14 II = 1, K - 1 + A( II, K ) = A( II, K ) / A( K, K ) + 14 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns KW and KW-1 of W now +* hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* + IF( K.GT.2 ) THEN +* +* Store U(k) and U(k-1) in columns k and k-1 of A +* + D12 = W( K-1, KW ) + D11 = W( K, KW ) / D12 + D22 = W( K-1, KW-1 ) / D12 + T = CONE / ( D11*D22-CONE ) + DO 20 J = 1, K - 2 + A( J, K-1 ) = T*( (D11*W( J, KW-1 )-W( J, KW ) ) / + $ D12 ) + A( J, K ) = T*( ( D22*W( J, KW )-W( J, KW-1 ) ) / + $ D12 ) + 20 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K-1, K-1 ) = W( K-1, KW-1 ) + A( K-1, K ) = W( K-1, KW ) + A( K, K ) = W( K, KW ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 30 CONTINUE +* +* Update the upper triangle of A11 (= A(1:k,1:k)) as +* +* A11 := A11 - U12*D*U12**T = A11 - U12*W**T +* +* computing blocks of NB columns at a time +* + DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB + JB = MIN( NB, K-J+1 ) +* +* Update the upper triangle of the diagonal block +* + DO 40 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE, + $ A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE, + $ A( J, JJ ), 1 ) + 40 CONTINUE +* +* Update the rectangular superdiagonal block +* + IF( J.GE.2 ) + $ CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, + $ N-K, -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, + $ CONE, A( 1, J ), LDA ) + 50 CONTINUE +* +* Put U12 in standard form by partially undoing the interchanges +* in columns k+1:n +* + J = K + 1 + 60 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J + 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J + 1 + IF( JP2.NE.JJ .AND. J.LE.N ) + $ CALL ZSWAP( N-J+1, A( JP2, J ), LDA, A( JJ, J ), LDA ) + JJ = J - 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL ZSWAP( N-J+1, A( JP1, J ), LDA, A( JJ, J ), LDA ) + IF( J.LE.N ) + $ GO TO 60 +* +* Set KB to the number of columns factorized +* + KB = N - K +* + ELSE +* +* Factorize the leading columns of A using the lower triangle +* of A and working forwards, and compute the matrix W = L21*D +* for use in updating A22 +* +* K is the main loop index, increasing from 1 in steps of 1 or 2 +* + K = 1 + 70 CONTINUE +* +* Exit from loop +* + IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N ) + $ GO TO 90 +* + KSTEP = 1 + P = K +* +* Copy column K of A to column K of W and update it +* + CALL ZCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 ) + IF( K.GT.1 ) + $ CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), + $ LDA, W( K, 1 ), LDW, CONE, W( K, K ), 1 ) +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( W( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 ) + COLMAX = CABS1( W( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + ELSE +* +* ============================================================ +* +* Test for interchange +* +* Equivalent to testing for ABSAKK.GE.ALPHA*COLMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 72 CONTINUE +* +* Begin pivot search loop body +* +* +* Copy column IMAX to column K+1 of W and update it +* + CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1) + CALL ZCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, + $ W( IMAX, K+1 ), 1 ) + IF( K.GT.1 ) + $ CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, + $ A( K, 1 ), LDA, W( IMAX, 1 ), LDW, + $ CONE, W( K, K+1 ), 1 ) +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 ) + ROWMAX = CABS1( W( JMAX, K+1 ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1) + DTEMP = CABS1( W( ITEMP, K+1 ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for +* CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX +* (used to handle NaN and Inf) +* + IF( .NOT.( CABS1( W( IMAX, K+1 ) ).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX +* +* copy column K+1 of W to column K of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX.EQ.COLMAX, +* (used to handle NaN and Inf) +* + ELSE IF( ( P.EQ.JMAX ) .OR. ( ROWMAX.LE.COLMAX ) ) + $ THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot not found: set params and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX +* +* Copy updated JMAXth (next IMAXth) column to Kth of W +* + CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 ) +* + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 72 +* + END IF +* +* ============================================================ +* + KK = K + KSTEP - 1 +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Copy non-updated column K to column P +* + CALL ZCOPY( P-K, A( K, K ), 1, A( P, K ), LDA ) + CALL ZCOPY( N-P+1, A( P, K ), 1, A( P, P ), 1 ) +* +* Interchange rows K and P in first K columns of A +* and first K+1 columns of W +* + CALL ZSWAP( K, A( K, 1 ), LDA, A( P, 1 ), LDA ) + CALL ZSWAP( KK, W( K, 1 ), LDW, W( P, 1 ), LDW ) + END IF +* +* Updated column KP is already stored in column KK of W +* + IF( KP.NE.KK ) THEN +* +* Copy non-updated column KK to column KP +* + A( KP, K ) = A( KK, K ) + CALL ZCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA ) + CALL ZCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 ) +* +* Interchange rows KK and KP in first KK columns of A and W +* + CALL ZSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) + CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW ) + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k of W now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* +* Store L(k) in column k of A +* + CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 ) + IF( K.LT.N ) THEN + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN + R1 = CONE / A( K, K ) + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + ELSE IF( A( K, K ).NE.CZERO ) THEN + DO 74 II = K + 1, N + A( II, K ) = A( II, K ) / A( K, K ) + 74 CONTINUE + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 of W now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Store L(k) and L(k+1) in columns k and k+1 of A +* + D21 = W( K+1, K ) + D11 = W( K+1, K+1 ) / D21 + D22 = W( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + DO 80 J = K + 2, N + A( J, K ) = T*( ( D11*W( J, K )-W( J, K+1 ) ) / + $ D21 ) + A( J, K+1 ) = T*( ( D22*W( J, K+1 )-W( J, K ) ) / + $ D21 ) + 80 CONTINUE + END IF +* +* Copy D(k) to A +* + A( K, K ) = W( K, K ) + A( K+1, K ) = W( K+1, K ) + A( K+1, K+1 ) = W( K+1, K+1 ) + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 70 +* + 90 CONTINUE +* +* Update the lower triangle of A22 (= A(k:n,k:n)) as +* +* A22 := A22 - L21*D*L21**T = A22 - L21*W**T +* +* computing blocks of NB columns at a time +* + DO 110 J = K, N, NB + JB = MIN( NB, N-J+1 ) +* +* Update the lower triangle of the diagonal block +* + DO 100 JJ = J, J + JB - 1 + CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE, + $ A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE, + $ A( JJ, JJ ), 1 ) + 100 CONTINUE +* +* Update the rectangular subdiagonal block +* + IF( J+JB.LE.N ) + $ CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB, + $ K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW, + $ CONE, A( J+JB, J ), LDA ) + 110 CONTINUE +* +* Put L21 in standard form by partially undoing the interchanges +* in columns 1:k-1 +* + J = K - 1 + 120 CONTINUE +* + KSTEP = 1 + JP1 = 1 + JJ = J + JP2 = IPIV( J ) + IF( JP2.LT.0 ) THEN + JP2 = -JP2 + J = J - 1 + JP1 = -IPIV( J ) + KSTEP = 2 + END IF +* + J = J - 1 + IF( JP2.NE.JJ .AND. J.GE.1 ) + $ CALL ZSWAP( J, A( JP2, 1 ), LDA, A( JJ, 1 ), LDA ) + JJ = J + 1 + IF( JP1.NE.JJ .AND. KSTEP.EQ.2 ) + $ CALL ZSWAP( J, A( JP1, 1 ), LDA, A( JJ, 1 ), LDA ) + IF( J.GE.1 ) + $ GO TO 120 +* +* Set KB to the number of columns factorized +* + KB = K - 1 +* + END IF + RETURN +* +* End of ZLASYF_ROOK +* + END diff --git a/dspl/liblapack/SRC/zlat2c.f b/dspl/liblapack/SRC/zlat2c.f new file mode 100644 index 0000000..dad8fa8 --- /dev/null +++ b/dspl/liblapack/SRC/zlat2c.f @@ -0,0 +1,180 @@ +*> \brief \b ZLAT2C converts a double complex triangular matrix to a complex triangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAT2C + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAT2C( UPLO, N, A, LDA, SA, LDSA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDSA, N +* .. +* .. Array Arguments .. +* COMPLEX SA( LDSA, * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAT2C converts a COMPLEX*16 triangular matrix, SA, to a COMPLEX +*> triangular matrix, A. +*> +*> RMAX is the overflow for the SINGLE PRECISION arithmetic +*> ZLAT2C checks that all the entries of A are between -RMAX and +*> RMAX. If not the conversion is aborted and a flag is raised. +*> +*> This is an auxiliary routine so there is no argument checking. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of rows and columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the N-by-N triangular coefficient matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] SA +*> \verbatim +*> SA is COMPLEX array, dimension (LDSA,N) +*> Only the UPLO part of SA is referenced. On exit, if INFO=0, +*> the N-by-N coefficient matrix SA; if INFO>0, the content of +*> the UPLO part of SA is unspecified. +*> \endverbatim +*> +*> \param[in] LDSA +*> \verbatim +*> LDSA is INTEGER +*> The leading dimension of the array SA. LDSA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> = 1: an entry of the matrix A is greater than the SINGLE +*> PRECISION overflow threshold, in this case, the content +*> of the UPLO part of SA in exit is unspecified. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAT2C( UPLO, N, A, LDA, SA, LDSA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDSA, N +* .. +* .. Array Arguments .. + COMPLEX SA( LDSA, * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION RMAX + LOGICAL UPPER +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DIMAG +* .. +* .. External Functions .. + REAL SLAMCH + LOGICAL LSAME + EXTERNAL SLAMCH, LSAME +* .. +* .. Executable Statements .. +* + RMAX = SLAMCH( 'O' ) + UPPER = LSAME( UPLO, 'U' ) + IF( UPPER ) THEN + DO 20 J = 1, N + DO 10 I = 1, J + IF( ( DBLE( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DBLE( A( I, J ) ).GT.RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).GT.RMAX ) ) THEN + INFO = 1 + GO TO 50 + END IF + SA( I, J ) = A( I, J ) + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1, N + DO 30 I = J, N + IF( ( DBLE( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DBLE( A( I, J ) ).GT.RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).LT.-RMAX ) .OR. + $ ( DIMAG( A( I, J ) ).GT.RMAX ) ) THEN + INFO = 1 + GO TO 50 + END IF + SA( I, J ) = A( I, J ) + 30 CONTINUE + 40 CONTINUE + END IF + 50 CONTINUE +* + RETURN +* +* End of ZLAT2C +* + END diff --git a/dspl/liblapack/SRC/zlatbs.f b/dspl/liblapack/SRC/zlatbs.f new file mode 100644 index 0000000..c9d672a --- /dev/null +++ b/dspl/liblapack/SRC/zlatbs.f @@ -0,0 +1,998 @@ +*> \brief \b ZLATBS solves a triangular banded system of equations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATBS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, +* SCALE, CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION CNORM( * ) +* COMPLEX*16 AB( LDAB, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATBS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular band matrix. Here A**T denotes the transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine ZTBSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of subdiagonals or superdiagonals in the +*> triangular matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, ZTBSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTBSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call ZTBSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X, + $ SCALE, CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 AB( LDAB, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTBSV, DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( KD.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATBS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + JLEN = MIN( KD, J-1 ) + CNORM( J ) = DZASUM( JLEN, AB( KD+1-JLEN, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) THEN + CNORM( J ) = DZASUM( JLEN, AB( 2, J ), 1 ) + ELSE + CNORM( J ) = ZERO + END IF + 20 CONTINUE + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTBSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = KD + 1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + MAIND = KD + 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + MAIND = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AB( MAIND, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - +* x(j)* A(max(1,j-kd):j-1,j) +* + JLEN = MIN( KD, J-1 ) + CALL ZAXPY( JLEN, -X( J )*TSCAL, + $ AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - +* x(j) * A(j+1:min(j+kd,n),j) +* + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.0 ) + $ CALL ZAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = ZDOTU( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = ZDOTU( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 130 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )* + $ X( J-JLEN-1+I ) + 130 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 140 I = 1, JLEN + CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AB( MAIND, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + CSUMJ = ZDOTC( JLEN, AB( KD+1-JLEN, J ), 1, + $ X( J-JLEN ), 1 ) + ELSE + JLEN = MIN( KD, N-J ) + IF( JLEN.GT.1 ) + $ CSUMJ = ZDOTC( JLEN, AB( 2, J ), 1, X( J+1 ), + $ 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + JLEN = MIN( KD, J-1 ) + DO 180 I = 1, JLEN + CSUMJ = CSUMJ + ( DCONJG( AB( KD+I-JLEN, J ) )* + $ USCAL )*X( J-JLEN-1+I ) + 180 CONTINUE + ELSE + JLEN = MIN( KD, N-J ) + DO 190 I = 1, JLEN + CSUMJ = CSUMJ + ( DCONJG( AB( I+1, J ) )*USCAL ) + $ *X( J+I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = DCONJG( AB( MAIND, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATBS +* + END diff --git a/dspl/liblapack/SRC/zlatdf.f b/dspl/liblapack/SRC/zlatdf.f new file mode 100644 index 0000000..ab88570 --- /dev/null +++ b/dspl/liblapack/SRC/zlatdf.f @@ -0,0 +1,323 @@ +*> \brief \b ZLATDF uses the LU factorization of the n-by-n matrix computed by sgetc2 and computes a contribution to the reciprocal Dif-estimate. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATDF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, +* JPIV ) +* +* .. Scalar Arguments .. +* INTEGER IJOB, LDZ, N +* DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), JPIV( * ) +* COMPLEX*16 RHS( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATDF computes the contribution to the reciprocal Dif-estimate +*> by solving for x in Z * x = b, where b is chosen such that the norm +*> of x is as large as possible. It is assumed that LU decomposition +*> of Z has been computed by ZGETC2. On entry RHS = f holds the +*> contribution from earlier solved sub-systems, and on return RHS = x. +*> +*> The factorization of Z returned by ZGETC2 has the form +*> Z = P * L * U * Q, where P and Q are permutation matrices. L is lower +*> triangular with unit diagonal elements and U is upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> IJOB = 2: First compute an approximative null-vector e +*> of Z using ZGECON, e is normalized and solve for +*> Zx = +-e - f with the sign giving the greater value of +*> 2-norm(x). About 5 times as expensive as Default. +*> IJOB .ne. 2: Local look ahead strategy where +*> all entries of the r.h.s. b is chosen as either +1 or +*> -1. Default. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Z. +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, the LU part of the factorization of the n-by-n +*> matrix Z computed by ZGETC2: Z = P * L * U * Q +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDA >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] RHS +*> \verbatim +*> RHS is COMPLEX*16 array, dimension (N). +*> On entry, RHS contains contributions from other subsystems. +*> On exit, RHS contains the solution of the subsystem with +*> entries according to the value of IJOB (see above). +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is DOUBLE PRECISION +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by ZTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when ZTGSY2 is called by CTGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is DOUBLE PRECISION +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when ZTGSY2 is called by +*> ZTGSYL. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= i <= N, row i of the +*> matrix has been interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in] JPIV +*> \verbatim +*> JPIV is INTEGER array, dimension (N). +*> The pivot indices; for 1 <= j <= N, column j of the +*> matrix has been interchanged with column JPIV(j). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> This routine is a further developed implementation of algorithm +*> BSOLVE in [1] using complete pivoting in the LU factorization. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] Bo Kagstrom and Lars Westin, +*> Generalized Schur Methods with Condition Estimators for +*> Solving the Generalized Sylvester Equation, IEEE Transactions +*> on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. +*>\n +*> [2] Peter Poromaa, +*> On Efficient and Robust Estimators for the Separation +*> between two Regular Matrix Pairs with Applications in +*> Condition Estimation. Report UMINF-95.05, Department of +*> Computing Science, Umea University, S-901 87 Umea, Sweden, +*> 1995. +* +* ===================================================================== + SUBROUTINE ZLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV, + $ JPIV ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IJOB, LDZ, N + DOUBLE PRECISION RDSCAL, RDSUM +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), JPIV( * ) + COMPLEX*16 RHS( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXDIM + PARAMETER ( MAXDIM = 2 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J, K + DOUBLE PRECISION RTEMP, SCALE, SMINU, SPLUS + COMPLEX*16 BM, BP, PMONE, TEMP +* .. +* .. Local Arrays .. + DOUBLE PRECISION RWORK( MAXDIM ) + COMPLEX*16 WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM ) +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZCOPY, ZGECON, ZGESC2, ZLASSQ, ZLASWP, + $ ZSCAL +* .. +* .. External Functions .. + DOUBLE PRECISION DZASUM + COMPLEX*16 ZDOTC + EXTERNAL DZASUM, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( IJOB.NE.2 ) THEN +* +* Apply permutations IPIV to RHS +* + CALL ZLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 ) +* +* Solve for L-part choosing RHS either to +1 or -1. +* + PMONE = -CONE + DO 10 J = 1, N - 1 + BP = RHS( J ) + CONE + BM = RHS( J ) - CONE + SPLUS = ONE +* +* Lockahead for L- part RHS(1:N-1) = +-1 +* SPLUS and SMIN computed more efficiently than in BSOLVE[1]. +* + SPLUS = SPLUS + DBLE( ZDOTC( N-J, Z( J+1, J ), 1, Z( J+1, + $ J ), 1 ) ) + SMINU = DBLE( ZDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) ) + SPLUS = SPLUS*DBLE( RHS( J ) ) + IF( SPLUS.GT.SMINU ) THEN + RHS( J ) = BP + ELSE IF( SMINU.GT.SPLUS ) THEN + RHS( J ) = BM + ELSE +* +* In this case the updating sums are equal and we can +* choose RHS(J) +1 or -1. The first time this happens we +* choose -1, thereafter +1. This is a simple way to get +* good estimates of matrices like Byers well-known example +* (see [1]). (Not done in BSOLVE.) +* + RHS( J ) = RHS( J ) + PMONE + PMONE = CONE + END IF +* +* Compute the remaining r.h.s. +* + TEMP = -RHS( J ) + CALL ZAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 ) + 10 CONTINUE +* +* Solve for U- part, lockahead for RHS(N) = +-1. This is not done +* In BSOLVE and will hopefully give us a better estimate because +* any ill-conditioning of the original matrix is transfered to U +* and not to L. U(N, N) is an approximation to sigma_min(LU). +* + CALL ZCOPY( N-1, RHS, 1, WORK, 1 ) + WORK( N ) = RHS( N ) + CONE + RHS( N ) = RHS( N ) - CONE + SPLUS = ZERO + SMINU = ZERO + DO 30 I = N, 1, -1 + TEMP = CONE / Z( I, I ) + WORK( I ) = WORK( I )*TEMP + RHS( I ) = RHS( I )*TEMP + DO 20 K = I + 1, N + WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP ) + RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP ) + 20 CONTINUE + SPLUS = SPLUS + ABS( WORK( I ) ) + SMINU = SMINU + ABS( RHS( I ) ) + 30 CONTINUE + IF( SPLUS.GT.SMINU ) + $ CALL ZCOPY( N, WORK, 1, RHS, 1 ) +* +* Apply the permutations JPIV to the computed solution (RHS) +* + CALL ZLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 ) +* +* Compute the sum of squares +* + CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN + END IF +* +* ENTRY IJOB = 2 +* +* Compute approximate nullvector XM of Z +* + CALL ZGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO ) + CALL ZCOPY( N, WORK( N+1 ), 1, XM, 1 ) +* +* Compute RHS +* + CALL ZLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 ) + TEMP = CONE / SQRT( ZDOTC( N, XM, 1, XM, 1 ) ) + CALL ZSCAL( N, TEMP, XM, 1 ) + CALL ZCOPY( N, XM, 1, XP, 1 ) + CALL ZAXPY( N, CONE, RHS, 1, XP, 1 ) + CALL ZAXPY( N, -CONE, XM, 1, RHS, 1 ) + CALL ZGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE ) + CALL ZGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE ) + IF( DZASUM( N, XP, 1 ).GT.DZASUM( N, RHS, 1 ) ) + $ CALL ZCOPY( N, XP, 1, RHS, 1 ) +* +* Compute the sum of squares +* + CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM ) + RETURN +* +* End of ZLATDF +* + END diff --git a/dspl/liblapack/SRC/zlatps.f b/dspl/liblapack/SRC/zlatps.f new file mode 100644 index 0000000..ac148ca --- /dev/null +++ b/dspl/liblapack/SRC/zlatps.f @@ -0,0 +1,978 @@ +*> \brief \b ZLATPS solves a triangular system of equations with the matrix held in packed storage. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATPS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION CNORM( * ) +* COMPLEX*16 AP( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATPS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow, where A is an upper or lower +*> triangular matrix stored in packed form. Here A**T denotes the +*> transpose of A, A**H denotes the conjugate transpose of A, x and b +*> are n-element vectors, and s is a scaling factor, usually less than +*> or equal to 1, chosen so that the components of x will be less than +*> the overflow threshold. If the unscaled problem will not cause +*> overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A +*> is singular (A(j,j) = 0 for some j), then s is set to 0 and a +*> non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, ZTPSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTPSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call ZTPSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 AP( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTPSV, DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATPS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + IP = 1 + DO 10 J = 1, N + CNORM( J ) = DZASUM( J-1, AP( IP ), 1 ) + IP = IP + J + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + IP = 1 + DO 20 J = 1, N - 1 + CNORM( J ) = DZASUM( N-J, AP( IP+1 ), 1 ) + IP = IP + N - J + 1 + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTPSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = N + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + IP = IP + JINC*JLEN + JLEN = JLEN - 1 + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = AP( IP ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL ZAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X, + $ 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP - J + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL ZAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + IP = IP + N - J + 1 + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTU( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 130 I = 1, J - 1 + CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I ) + 130 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 140 I = 1, N - J + CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = AP( IP )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + IP = JFIRST*( JFIRST+1 ) / 2 + JLEN = 1 + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTC( J-1, AP( IP-J+1 ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 180 I = 1, J - 1 + CSUMJ = CSUMJ + ( DCONJG( AP( IP-J+I ) )*USCAL ) + $ *X( I ) + 180 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 190 I = 1, N - J + CSUMJ = CSUMJ + ( DCONJG( AP( IP+I ) )*USCAL )* + $ X( J+I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJS = DCONJG( AP( IP ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + JLEN = JLEN + 1 + IP = IP + JINC*JLEN + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATPS +* + END diff --git a/dspl/liblapack/SRC/zlatrd.f b/dspl/liblapack/SRC/zlatrd.f new file mode 100644 index 0000000..ccc0409 --- /dev/null +++ b/dspl/liblapack/SRC/zlatrd.f @@ -0,0 +1,358 @@ +*> \brief \b ZLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an unitary similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATRD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to +*> Hermitian tridiagonal form by a unitary similarity +*> transformation Q**H * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by ZHETRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a Hermitian rank-2k update of the form: +*> A := A - V*W**H - W*V**H. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEMV, ZHEMV, ZLACGV, ZLARFG, ZSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL ZLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL ZHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL ZSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*ZDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL ZHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*ZDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLATRD +* + END diff --git a/dspl/liblapack/SRC/zlatrs.f b/dspl/liblapack/SRC/zlatrs.f new file mode 100644 index 0000000..f06ac4a --- /dev/null +++ b/dspl/liblapack/SRC/zlatrs.f @@ -0,0 +1,966 @@ +*> \brief \b ZLATRS solves a triangular system of equations with the scale factor set to prevent overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, +* CNORM, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORMIN, TRANS, UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* DOUBLE PRECISION CNORM( * ) +* COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATRS solves one of the triangular systems +*> +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +*> +*> with scaling to prevent overflow. Here A is an upper or lower +*> triangular matrix, A**T denotes the transpose of A, A**H denotes the +*> conjugate transpose of A, x and b are n-element vectors, and s is a +*> scaling factor, usually less than or equal to 1, chosen so that the +*> components of x will be less than the overflow threshold. If the +*> unscaled problem will not cause overflow, the Level 2 BLAS routine +*> ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), +*> then s is set to 0 and a non-trivial solution to A*x = 0 is returned. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the operation applied to A. +*> = 'N': Solve A * x = s*b (No transpose) +*> = 'T': Solve A**T * x = s*b (Transpose) +*> = 'C': Solve A**H * x = s*b (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] NORMIN +*> \verbatim +*> NORMIN is CHARACTER*1 +*> Specifies whether CNORM has been set or not. +*> = 'Y': CNORM contains the column norms on entry +*> = 'N': CNORM is not set on entry. On exit, the norms will +*> be computed and stored in CNORM. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max (1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> On entry, the right hand side b of the triangular system. +*> On exit, X is overwritten by the solution vector x. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scaling factor s for the triangular system +*> A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +*> If SCALE = 0, the matrix A is singular or badly scaled, and +*> the vector x is an exact or approximate solution to A*x = 0. +*> \endverbatim +*> +*> \param[in,out] CNORM +*> \verbatim +*> CNORM is DOUBLE PRECISION array, dimension (N) +*> +*> If NORMIN = 'Y', CNORM is an input argument and CNORM(j) +*> contains the norm of the off-diagonal part of the j-th column +*> of A. If TRANS = 'N', CNORM(j) must be greater than or equal +*> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) +*> must be greater than or equal to the 1-norm. +*> +*> If NORMIN = 'N', CNORM is an output argument and CNORM(j) +*> returns the 1-norm of the offdiagonal part of the j-th column +*> of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A rough bound on x is computed; if that is less than overflow, ZTRSV +*> is called, otherwise, specific code is used which checks for possible +*> overflow or divide-by-zero at every operation. +*> +*> A columnwise scheme is used for solving A*x = b. The basic algorithm +*> if A is lower triangular is +*> +*> x[1:n] := b[1:n] +*> for j = 1, ..., n +*> x(j) := x(j) / A(j,j) +*> x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] +*> end +*> +*> Define bounds on the components of x after j iterations of the loop: +*> M(j) = bound on x[1:j] +*> G(j) = bound on x[j+1:n] +*> Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. +*> +*> Then for iteration j+1 we have +*> M(j+1) <= G(j) / | A(j+1,j+1) | +*> G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | +*> <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) +*> +*> where CNORM(j+1) is greater than or equal to the infinity-norm of +*> column j+1 of A, not counting the diagonal. Hence +*> +*> G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) +*> 1<=i<=j +*> and +*> +*> |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) +*> 1<=i< j +*> +*> Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the +*> reciprocal of the largest M(j), j=1,..,n, is larger than +*> max(underflow, 1/overflow). +*> +*> The bound on x(j) is also used to determine when a step in the +*> columnwise method can be performed without fear of overflow. If +*> the computed bound is greater than a large constant, x is scaled to +*> prevent overflow, but if the bound overflows, x is set to 0, x(j) to +*> 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. +*> +*> Similarly, a row-wise scheme is used to solve A**T *x = b or +*> A**H *x = b. The basic algorithm for A upper triangular is +*> +*> for j = 1, ..., n +*> x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) +*> end +*> +*> We simultaneously compute two bounds +*> G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j +*> M(j) = bound on x(i), 1<=i<=j +*> +*> The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we +*> add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. +*> Then the bound on x(j) is +*> +*> M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | +*> +*> <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) +*> 1<=i<=j +*> +*> and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater +*> than max(underflow, 1/overflow). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE, + $ CNORM, INFO ) +* +* -- LAPACK auxiliary routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORMIN, TRANS, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + DOUBLE PRECISION CNORM( * ) + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, HALF, ONE, TWO + PARAMETER ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0, + $ TWO = 2.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + INTEGER I, IMAX, J, JFIRST, JINC, JLAST + DOUBLE PRECISION BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL, + $ XBND, XJ, XMAX + COMPLEX*16 CSUMJ, TJJS, USCAL, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC, + $ ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTRSV, DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1, CABS2 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) + CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) + + $ ABS( DIMAG( ZDUM ) / 2.D0 ) +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* +* Test the input parameters. +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT. + $ LSAME( NORMIN, 'N' ) ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine machine dependent parameters to control overflow. +* + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM / DLAMCH( 'Precision' ) + BIGNUM = ONE / SMLNUM + SCALE = ONE +* + IF( LSAME( NORMIN, 'N' ) ) THEN +* +* Compute the 1-norm of each column, not including the diagonal. +* + IF( UPPER ) THEN +* +* A is upper triangular. +* + DO 10 J = 1, N + CNORM( J ) = DZASUM( J-1, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* A is lower triangular. +* + DO 20 J = 1, N - 1 + CNORM( J ) = DZASUM( N-J, A( J+1, J ), 1 ) + 20 CONTINUE + CNORM( N ) = ZERO + END IF + END IF +* +* Scale the column norms by TSCAL if the maximum element in CNORM is +* greater than BIGNUM/2. +* + IMAX = IDAMAX( N, CNORM, 1 ) + TMAX = CNORM( IMAX ) + IF( TMAX.LE.BIGNUM*HALF ) THEN + TSCAL = ONE + ELSE + TSCAL = HALF / ( SMLNUM*TMAX ) + CALL DSCAL( N, TSCAL, CNORM, 1 ) + END IF +* +* Compute a bound on the computed solution vector to see if the +* Level 2 BLAS routine ZTRSV can be used. +* + XMAX = ZERO + DO 30 J = 1, N + XMAX = MAX( XMAX, CABS2( X( J ) ) ) + 30 CONTINUE + XBND = XMAX +* + IF( NOTRAN ) THEN +* +* Compute the growth in A * x = b. +* + IF( UPPER ) THEN + JFIRST = N + JLAST = 1 + JINC = -1 + ELSE + JFIRST = 1 + JLAST = N + JINC = 1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 60 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, G(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 40 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = G(j-1) / abs(A(j,j)) +* + XBND = MIN( XBND, MIN( ONE, TJJ )*GROW ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF +* + IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN +* +* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) +* + GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) ) + ELSE +* +* G(j) could overflow, set GROW to 0. +* + GROW = ZERO + END IF + 40 CONTINUE + GROW = XBND + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 50 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 60 +* +* G(j) = G(j-1)*( 1 + CNORM(j) ) +* + GROW = GROW*( ONE / ( ONE+CNORM( J ) ) ) + 50 CONTINUE + END IF + 60 CONTINUE +* + ELSE +* +* Compute the growth in A**T * x = b or A**H * x = b. +* + IF( UPPER ) THEN + JFIRST = 1 + JLAST = N + JINC = 1 + ELSE + JFIRST = N + JLAST = 1 + JINC = -1 + END IF +* + IF( TSCAL.NE.ONE ) THEN + GROW = ZERO + GO TO 90 + END IF +* + IF( NOUNIT ) THEN +* +* A is non-unit triangular. +* +* Compute GROW = 1/G(j) and XBND = 1/M(j). +* Initially, M(0) = max{x(i), i=1,...,n}. +* + GROW = HALF / MAX( XBND, SMLNUM ) + XBND = GROW + DO 70 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) +* + XJ = ONE + CNORM( J ) + GROW = MIN( GROW, XBND / XJ ) +* + TJJS = A( J, J ) + TJJ = CABS1( TJJS ) +* + IF( TJJ.GE.SMLNUM ) THEN +* +* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) +* + IF( XJ.GT.TJJ ) + $ XBND = XBND*( TJJ / XJ ) + ELSE +* +* M(j) could overflow, set XBND to 0. +* + XBND = ZERO + END IF + 70 CONTINUE + GROW = MIN( GROW, XBND ) + ELSE +* +* A is unit triangular. +* +* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. +* + GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) ) + DO 80 J = JFIRST, JLAST, JINC +* +* Exit the loop if the growth factor is too small. +* + IF( GROW.LE.SMLNUM ) + $ GO TO 90 +* +* G(j) = ( 1 + CNORM(j) )*G(j-1) +* + XJ = ONE + CNORM( J ) + GROW = GROW / XJ + 80 CONTINUE + END IF + 90 CONTINUE + END IF +* + IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN +* +* Use the Level 2 BLAS solve if the reciprocal of the bound on +* elements of X is not too small. +* + CALL ZTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 ) + ELSE +* +* Use a Level 1 BLAS solve, scaling intermediate results. +* + IF( XMAX.GT.BIGNUM*HALF ) THEN +* +* Scale X so that its components are less than or equal to +* BIGNUM in absolute value. +* + SCALE = ( BIGNUM*HALF ) / XMAX + CALL ZDSCAL( N, SCALE, X, 1 ) + XMAX = BIGNUM + ELSE + XMAX = XMAX*TWO + END IF +* + IF( NOTRAN ) THEN +* +* Solve A * x = b +* + DO 120 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) / A(j,j), scaling x if necessary. +* + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 110 + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by 1/b(j). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM +* to avoid overflow when dividing by A(j,j). +* + REC = ( TJJ*BIGNUM ) / XJ + IF( CNORM( J ).GT.ONE ) THEN +* +* Scale by 1/CNORM(j) to avoid overflow when +* multiplying x(j) times column j. +* + REC = REC / CNORM( J ) + END IF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + XJ = CABS1( X( J ) ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0, and compute a solution to A*x = 0. +* + DO 100 I = 1, N + X( I ) = ZERO + 100 CONTINUE + X( J ) = ONE + XJ = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 110 CONTINUE +* +* Scale x if necessary to avoid overflow when adding a +* multiple of column j of A. +* + IF( XJ.GT.ONE ) THEN + REC = ONE / XJ + IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN +* +* Scale x by 1/(2*abs(x(j))). +* + REC = REC*HALF + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + END IF + ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN +* +* Scale x by 1/2. +* + CALL ZDSCAL( N, HALF, X, 1 ) + SCALE = SCALE*HALF + END IF +* + IF( UPPER ) THEN + IF( J.GT.1 ) THEN +* +* Compute the update +* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) +* + CALL ZAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X, + $ 1 ) + I = IZAMAX( J-1, X, 1 ) + XMAX = CABS1( X( I ) ) + END IF + ELSE + IF( J.LT.N ) THEN +* +* Compute the update +* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) +* + CALL ZAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1, + $ X( J+1 ), 1 ) + I = J + IZAMAX( N-J, X( J+1 ), 1 ) + XMAX = CABS1( X( I ) ) + END IF + END IF + 120 CONTINUE +* + ELSE IF( LSAME( TRANS, 'T' ) ) THEN +* +* Solve A**T * x = b +* + DO 170 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTU to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTU( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 130 I = 1, J - 1 + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 130 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 140 I = J + 1, N + CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I ) + 140 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = A( J, J )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 160 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**T *x = 0. +* + DO 150 I = 1, N + X( I ) = ZERO + 150 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 160 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 170 CONTINUE +* + ELSE +* +* Solve A**H * x = b +* + DO 220 J = JFIRST, JLAST, JINC +* +* Compute x(j) = b(j) - sum A(k,j)*x(k). +* k<>j +* + XJ = CABS1( X( J ) ) + USCAL = TSCAL + REC = ONE / MAX( XMAX, ONE ) + IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN +* +* If x(j) could overflow, scale x by 1/(2*XMAX). +* + REC = REC*HALF + IF( NOUNIT ) THEN + TJJS = DCONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + END IF + TJJ = CABS1( TJJS ) + IF( TJJ.GT.ONE ) THEN +* +* Divide by A(j,j) when scaling x if A(j,j) > 1. +* + REC = MIN( ONE, REC*TJJ ) + USCAL = ZLADIV( USCAL, TJJS ) + END IF + IF( REC.LT.ONE ) THEN + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF +* + CSUMJ = ZERO + IF( USCAL.EQ.DCMPLX( ONE ) ) THEN +* +* If the scaling needed for A in the dot product is 1, +* call ZDOTC to perform the dot product. +* + IF( UPPER ) THEN + CSUMJ = ZDOTC( J-1, A( 1, J ), 1, X, 1 ) + ELSE IF( J.LT.N ) THEN + CSUMJ = ZDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 ) + END IF + ELSE +* +* Otherwise, use in-line code for the dot product. +* + IF( UPPER ) THEN + DO 180 I = 1, J - 1 + CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* + $ X( I ) + 180 CONTINUE + ELSE IF( J.LT.N ) THEN + DO 190 I = J + 1, N + CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )* + $ X( I ) + 190 CONTINUE + END IF + END IF +* + IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN +* +* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) +* was not used to scale the dotproduct. +* + X( J ) = X( J ) - CSUMJ + XJ = CABS1( X( J ) ) + IF( NOUNIT ) THEN + TJJS = DCONJG( A( J, J ) )*TSCAL + ELSE + TJJS = TSCAL + IF( TSCAL.EQ.ONE ) + $ GO TO 210 + END IF +* +* Compute x(j) = x(j) / A(j,j), scaling if necessary. +* + TJJ = CABS1( TJJS ) + IF( TJJ.GT.SMLNUM ) THEN +* +* abs(A(j,j)) > SMLNUM: +* + IF( TJJ.LT.ONE ) THEN + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale X by 1/abs(x(j)). +* + REC = ONE / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE IF( TJJ.GT.ZERO ) THEN +* +* 0 < abs(A(j,j)) <= SMLNUM: +* + IF( XJ.GT.TJJ*BIGNUM ) THEN +* +* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. +* + REC = ( TJJ*BIGNUM ) / XJ + CALL ZDSCAL( N, REC, X, 1 ) + SCALE = SCALE*REC + XMAX = XMAX*REC + END IF + X( J ) = ZLADIV( X( J ), TJJS ) + ELSE +* +* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and +* scale = 0 and compute a solution to A**H *x = 0. +* + DO 200 I = 1, N + X( I ) = ZERO + 200 CONTINUE + X( J ) = ONE + SCALE = ZERO + XMAX = ZERO + END IF + 210 CONTINUE + ELSE +* +* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot +* product has already been divided by 1/A(j,j). +* + X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ + END IF + XMAX = MAX( XMAX, CABS1( X( J ) ) ) + 220 CONTINUE + END IF + SCALE = SCALE / TSCAL + END IF +* +* Scale the column norms by 1/TSCAL for return. +* + IF( TSCAL.NE.ONE ) THEN + CALL DSCAL( N, ONE / TSCAL, CNORM, 1 ) + END IF +* + RETURN +* +* End of ZLATRS +* + END diff --git a/dspl/liblapack/SRC/zlatrz.f b/dspl/liblapack/SRC/zlatrz.f new file mode 100644 index 0000000..5d415fe --- /dev/null +++ b/dspl/liblapack/SRC/zlatrz.f @@ -0,0 +1,206 @@ +*> \brief \b ZLATRZ factors an upper trapezoidal matrix by means of unitary transformations. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* .. Scalar Arguments .. +* INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix +*> [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R 0 ) * Z by means +*> of unitary transformations, where Z is an (M+L)-by-(M+L) unitary +*> matrix and, R and A1 are M-by-M upper triangular matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing the +*> meaningful part of the Householder vectors. N-M >= L >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements N-L+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The factorization is obtained by Householder's method. The kth +*> transformation matrix, Z( k ), which is used to introduce zeros into +*> the ( m - k + 1 )th row of A, is given in the form +*> +*> Z( k ) = ( I 0 ), +*> ( 0 T( k ) ) +*> +*> where +*> +*> T( k ) = I - tau*u( k )*u( k )**H, u( k ) = ( 1 ), +*> ( 0 ) +*> ( z( k ) ) +*> +*> tau is a scalar and z( k ) is an l element vector. tau and z( k ) +*> are chosen to annihilate the elements of the kth row of A2. +*> +*> The scalar tau is returned in the kth element of TAU and the vector +*> u( k ) in the kth row of A2, such that the elements of z( k ) are +*> in a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in +*> the upper triangular part of A1. +*> +*> Z is given by +*> +*> Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATRZ( M, N, L, A, LDA, TAU, WORK ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER L, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZLACGV, ZLARFG, ZLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + DO 20 I = M, 1, -1 +* +* Generate elementary reflector H(i) to annihilate +* [ A(i,i) A(i,n-l+1:n) ] +* + CALL ZLACGV( L, A( I, N-L+1 ), LDA ) + ALPHA = DCONJG( A( I, I ) ) + CALL ZLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) ) + TAU( I ) = DCONJG( TAU( I ) ) +* +* Apply H(i) to A(1:i-1,i:n) from the right +* + CALL ZLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA, + $ DCONJG( TAU( I ) ), A( 1, I ), LDA, WORK ) + A( I, I ) = DCONJG( ALPHA ) +* + 20 CONTINUE +* + RETURN +* +* End of ZLATRZ +* + END diff --git a/dspl/liblapack/SRC/zlatsqr.f b/dspl/liblapack/SRC/zlatsqr.f new file mode 100644 index 0000000..1fdf3be --- /dev/null +++ b/dspl/liblapack/SRC/zlatsqr.f @@ -0,0 +1,255 @@ +* +* Definition: +* =========== +* +* SUBROUTINE ZLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, +* LWORK, INFO) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> SLATSQR computes a blocked Tall-Skinny QR factorization of +*> an M-by-N matrix A, where M >= N: +*> A = Q * R . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The row block size to be used in the blocked QR. +*> MB > N. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The column block size to be used in the blocked QR. +*> N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, the elements on and above the diagonal +*> of the array contain the N-by-N upper triangular matrix R; +*> the elements below the diagonal represent Q by the columns +*> of blocked V (see Further Details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, +*> dimension (LDT, N * Number_of_row_blocks) +*> where Number_of_row_blocks = CEIL((M-N)/(MB-N)) +*> The blocked upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. +*> See Further Details below. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The dimension of the array WORK. LWORK >= NB*N. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> Tall-Skinny QR (TSQR) performs QR by a sequence of orthogonal transformations, +*> representing Q as a product of other orthogonal matrices +*> Q = Q(1) * Q(2) * . . . * Q(k) +*> where each Q(i) zeros out subdiagonal entries of a block of MB rows of A: +*> Q(1) zeros out the subdiagonal entries of rows 1:MB of A +*> Q(2) zeros out the bottom MB-N rows of rows [1:N,MB+1:2*MB-N] of A +*> Q(3) zeros out the bottom MB-N rows of rows [1:N,2*MB-N+1:3*MB-2*N] of A +*> . . . +*> +*> Q(1) is computed by GEQRT, which represents Q(1) by Householder vectors +*> stored under the diagonal of rows 1:MB of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,1:N). +*> For more information see Further Details in GEQRT. +*> +*> Q(i) for i>1 is computed by TPQRT, which represents Q(i) by Householder vectors +*> stored in rows [(i-1)*(MB-N)+N+1:i*(MB-N)+N] of A, and by upper triangular +*> block reflectors, stored in array T(1:LDT,(i-1)*N+1:i*N). +*> The last Q(k) may use fewer rows. +*> For more information see Further Details in TPQRT. +*> +*> For more details of the overall algorithm, see the description of +*> Sequential TSQR in Section 2.2 of [1]. +*> +*> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,” +*> J. Demmel, L. Grigori, M. Hoemmen, J. Langou, +*> SIAM J. Sci. Comput, vol. 34, no. 1, 2012 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATSQR( M, N, MB, NB, A, LDA, T, LDT, WORK, + $ LWORK, INFO) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. -- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, M, N, MB, NB, LDT, LWORK +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ), T(LDT, *) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, II, KK, CTR +* .. +* .. EXTERNAL FUNCTIONS .. + LOGICAL LSAME + EXTERNAL LSAME +* .. EXTERNAL SUBROUTINES .. + EXTERNAL ZGEQRT, ZTPQRT, XERBLA +* .. INTRINSIC FUNCTIONS .. + INTRINSIC MAX, MIN, MOD +* .. +* .. EXECUTABLE STATEMENTS .. +* +* TEST THE INPUT ARGUMENTS +* + INFO = 0 +* + LQUERY = ( LWORK.EQ.-1 ) +* + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. M.LT.N ) THEN + INFO = -2 + ELSE IF( MB.LE.N ) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. ( NB.GT.N .AND. N.GT.0 )) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDT.LT.NB ) THEN + INFO = -8 + ELSE IF( LWORK.LT.(N*NB) .AND. (.NOT.LQUERY) ) THEN + INFO = -10 + END IF + IF( INFO.EQ.0) THEN + WORK(1) = NB*N + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLATSQR', -INFO ) + RETURN + ELSE IF (LQUERY) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( MIN(M,N).EQ.0 ) THEN + RETURN + END IF +* +* The QR Decomposition +* + IF ((MB.LE.N).OR.(MB.GE.M)) THEN + CALL ZGEQRT( M, N, NB, A, LDA, T, LDT, WORK, INFO) + RETURN + END IF + KK = MOD((M-N),(MB-N)) + II=M-KK+1 +* +* Compute the QR factorization of the first block A(1:MB,1:N) +* + CALL ZGEQRT( MB, N, NB, A(1,1), LDA, T, LDT, WORK, INFO ) + CTR = 1 +* + DO I = MB+1, II-MB+N , (MB-N) +* +* Compute the QR factorization of the current block A(I:I+MB-N,1:N) +* + CALL ZTPQRT( MB-N, N, 0, NB, A(1,1), LDA, A( I, 1 ), LDA, + $ T(1, CTR * N + 1), + $ LDT, WORK, INFO ) + CTR = CTR + 1 + END DO +* +* Compute the QR factorization of the last block A(II:M,1:N) +* + IF (II.LE.M) THEN + CALL ZTPQRT( KK, N, 0, NB, A(1,1), LDA, A( II, 1 ), LDA, + $ T(1,CTR * N + 1), LDT, + $ WORK, INFO ) + END IF +* + work( 1 ) = N*NB + RETURN +* +* End of ZLATSQR +* + END diff --git a/dspl/liblapack/SRC/zlauu2.f b/dspl/liblapack/SRC/zlauu2.f new file mode 100644 index 0000000..3e3623f --- /dev/null +++ b/dspl/liblapack/SRC/zlauu2.f @@ -0,0 +1,206 @@ +*> \brief \b ZLAUU2 computes the product UUH or LHL, where U and L are upper or lower triangular matrices (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAUU2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAUU2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAUU2 computes the product U * U**H or L**H * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the unblocked form of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**H; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**H * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAUU2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + DOUBLE PRECISION AII +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAUU2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the product U * U**H. +* + DO 10 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I, I+1 ), LDA, + $ A( I, I+1 ), LDA ) ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ), + $ LDA, A( I, I+1 ), LDA, DCMPLX( AII ), + $ A( 1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + ELSE + CALL ZDSCAL( I, AII, A( 1, I ), 1 ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the product L**H * L. +* + DO 20 I = 1, N + AII = A( I, I ) + IF( I.LT.N ) THEN + A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I+1, I ), 1, + $ A( I+1, I ), 1 ) ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, + $ DCMPLX( AII ), A( I, 1 ), LDA ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + ELSE + CALL ZDSCAL( I, AII, A( I, 1 ), LDA ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLAUU2 +* + END diff --git a/dspl/liblapack/SRC/zlauum.f b/dspl/liblapack/SRC/zlauum.f new file mode 100644 index 0000000..5afadda --- /dev/null +++ b/dspl/liblapack/SRC/zlauum.f @@ -0,0 +1,223 @@ +*> \brief \b ZLAUUM computes the product UUH or LHL, where U and L are upper or lower triangular matrices (blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLAUUM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLAUUM( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLAUUM computes the product U * U**H or L**H * L, where the triangular +*> factor U or L is stored in the upper or lower triangular part of +*> the array A. +*> +*> If UPLO = 'U' or 'u' then the upper triangle of the result is stored, +*> overwriting the factor U in A. +*> If UPLO = 'L' or 'l' then the lower triangle of the result is stored, +*> overwriting the factor L in A. +*> +*> This is the blocked form of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the triangular factor stored in the array A +*> is upper or lower triangular: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the triangular factor U or L. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular factor U or L. +*> On exit, if UPLO = 'U', the upper triangle of A is +*> overwritten with the upper triangle of the product U * U**H; +*> if UPLO = 'L', the lower triangle of A is overwritten with +*> the lower triangle of the product L**H * L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLAUUM( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZLAUU2, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLAUUM', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZLAUUM', UPLO, N, -1, -1, -1 ) +* + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZLAUU2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute the product U * U**H. +* + DO 10 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Non-unit', I-1, IB, CONE, A( I, I ), LDA, + $ A( 1, I ), LDA ) + CALL ZLAUU2( 'Upper', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ), + $ LDA, A( I, I+IB ), LDA, CONE, A( 1, I ), + $ LDA ) + CALL ZHERK( 'Upper', 'No transpose', IB, N-I-IB+1, + $ ONE, A( I, I+IB ), LDA, ONE, A( I, I ), + $ LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the product L**H * L. +* + DO 20 I = 1, N, NB + IB = MIN( NB, N-I+1 ) + CALL ZTRMM( 'Left', 'Lower', 'Conjugate transpose', + $ 'Non-unit', IB, I-1, CONE, A( I, I ), LDA, + $ A( I, 1 ), LDA ) + CALL ZLAUU2( 'Lower', IB, A( I, I ), LDA, INFO ) + IF( I+IB.LE.N ) THEN + CALL ZGEMM( 'Conjugate transpose', 'No transpose', IB, + $ I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA, + $ A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA ) + CALL ZHERK( 'Lower', 'Conjugate transpose', IB, + $ N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE, + $ A( I, I ), LDA ) + END IF + 20 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZLAUUM +* + END diff --git a/dspl/liblapack/SRC/zpbcon.f b/dspl/liblapack/SRC/zpbcon.f new file mode 100644 index 0000000..debbcae --- /dev/null +++ b/dspl/liblapack/SRC/zpbcon.f @@ -0,0 +1,277 @@ +*> \brief \b ZPBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite band matrix using +*> the Cholesky factorization A = U**H*U or A = L*L**H computed by +*> ZPBTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the Hermitian band matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK, + $ INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ KD, AB, LDAB, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL ZLATBS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK, + $ INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE +* + RETURN +* +* End of ZPBCON +* + END diff --git a/dspl/liblapack/SRC/zpbequ.f b/dspl/liblapack/SRC/zpbequ.f new file mode 100644 index 0000000..d05fd82 --- /dev/null +++ b/dspl/liblapack/SRC/zpbequ.f @@ -0,0 +1,244 @@ +*> \brief \b ZPBEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite band matrix A and reduce its condition +*> number (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular of A is stored; +*> = 'L': Lower triangular of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangle of the Hermitian band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* + IF( UPPER ) THEN + J = KD + 1 + ELSE + J = 1 + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = DBLE( AB( J, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* +* Find the minimum and maximum diagonal elements. +* + DO 10 I = 2, N + S( I ) = DBLE( AB( J, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPBEQU +* + END diff --git a/dspl/liblapack/SRC/zpbrfs.f b/dspl/liblapack/SRC/zpbrfs.f new file mode 100644 index 0000000..4e62245 --- /dev/null +++ b/dspl/liblapack/SRC/zpbrfs.f @@ -0,0 +1,448 @@ +*> \brief \b ZPBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, +* LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and banded, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangle of the Hermitian band matrix A, +*> stored in the first KD+1 rows of the array. The j-th column +*> of A is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H of the band matrix A as computed by +*> ZPBTRF, in the same storage format as A (see AB). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZPBTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, L, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHBMV, ZLACN2, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = MIN( N+1, 2*KD+2 ) + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE, + $ WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + L = KD + 1 - K + DO 40 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( KD+1, K ) ) )* + $ XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( 1, K ) ) )*XK + L = 1 - K + DO 60 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK + S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPBRFS +* + END diff --git a/dspl/liblapack/SRC/zpbstf.f b/dspl/liblapack/SRC/zpbstf.f new file mode 100644 index 0000000..b448210 --- /dev/null +++ b/dspl/liblapack/SRC/zpbstf.f @@ -0,0 +1,332 @@ +*> \brief \b ZPBSTF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBSTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBSTF computes a split Cholesky factorization of a complex +*> Hermitian positive definite band matrix A. +*> +*> This routine is designed to be used in conjunction with ZHBGST. +*> +*> The factorization has the form A = S**H*S where S is a band matrix +*> of the same bandwidth as A and the following structure: +*> +*> S = ( U ) +*> ( M L ) +*> +*> where U is upper triangular of order m = (n+kd)/2, and L is lower +*> triangular of order n-m. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first kd+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the factor S from the split Cholesky +*> factorization A = S**H*S. See Further Details. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the factorization could not be completed, +*> because the updated element a(i,i) was negative; the +*> matrix A is not positive definite. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 7, KD = 2: +*> +*> S = ( s11 s12 s13 ) +*> ( s22 s23 s24 ) +*> ( s33 s34 ) +*> ( s44 ) +*> ( s53 s54 s55 ) +*> ( s64 s65 s66 ) +*> ( s75 s76 s77 ) +*> +*> If UPLO = 'U', the array AB holds: +*> +*> on entry: on exit: +*> +*> * * a13 a24 a35 a46 a57 * * s13 s24 s53**H s64**H s75**H +*> * a12 a23 a34 a45 a56 a67 * s12 s23 s34 s54**H s65**H s76**H +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> +*> If UPLO = 'L', the array AB holds: +*> +*> on entry: on exit: +*> +*> a11 a22 a33 a44 a55 a66 a77 s11 s22 s33 s44 s55 s66 s77 +*> a21 a32 a43 a54 a65 a76 * s12**H s23**H s34**H s54 s65 s76 * +*> a31 a42 a53 a64 a64 * * s13**H s24**H s53 s64 s75 * * +*> +*> Array elements marked * are not used by the routine; s12**H denotes +*> conjg(s12); the diagonal elements of S are real. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPBSTF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KM, M + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* +* Set the splitting point m. +* + M = ( N+KD ) / 2 +* + IF( UPPER ) THEN +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 10 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th column and update the +* the leading submatrix within the band. +* + CALL ZDSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 ) + CALL ZHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1, + $ AB( KD+1, J-KM ), KLD ) + 10 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 20 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th row and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL ZDSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL ZLACGV( KM, AB( KD, J+1 ), KLD ) + CALL ZHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL ZLACGV( KM, AB( KD, J+1 ), KLD ) + END IF + 20 CONTINUE + ELSE +* +* Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m). +* + DO 30 J = N, M + 1, -1 +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( J-1, KD ) +* +* Compute elements j-km:j-1 of the j-th row and update the +* trailing submatrix within the band. +* + CALL ZDSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD ) + CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD ) + CALL ZHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD, + $ AB( 1, J-KM ), KLD ) + CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD ) + 30 CONTINUE +* +* Factorize the updated submatrix A(1:m,1:m) as U**H*U. +* + DO 40 J = 1, M +* +* Compute s(j,j) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 50 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ + KM = MIN( KD, M-J ) +* +* Compute elements j+1:j+km of the j-th column and update the +* trailing submatrix within the band. +* + IF( KM.GT.0 ) THEN + CALL ZDSCAL( KM, ONE / AJJ, AB( 2, J ), 1 ) + CALL ZHER( 'Lower', KM, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 40 CONTINUE + END IF + RETURN +* + 50 CONTINUE + INFO = J + RETURN +* +* End of ZPBSTF +* + END diff --git a/dspl/liblapack/SRC/zpbsv.f b/dspl/liblapack/SRC/zpbsv.f new file mode 100644 index 0000000..4daf6b3 --- /dev/null +++ b/dspl/liblapack/SRC/zpbsv.f @@ -0,0 +1,229 @@ +*> \brief ZPBSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix, with the same number of superdiagonals or +*> subdiagonals as A. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPBTRF, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL ZPBTRF( UPLO, N, KD, AB, LDAB, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPBSV +* + END diff --git a/dspl/liblapack/SRC/zpbsvx.f b/dspl/liblapack/SRC/zpbsvx.f new file mode 100644 index 0000000..bff6f36 --- /dev/null +++ b/dspl/liblapack/SRC/zpbsvx.f @@ -0,0 +1,543 @@ +*> \brief ZPBSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, +* EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite band matrix and X +*> and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular band matrix, and L is a lower +*> triangular band matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFB contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AB and AFB will not +*> be modified. +*> = 'N': The matrix A will be copied to AFB and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFB and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right-hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array, except +*> if FACT = 'F' and EQUED = 'Y', then A must contain the +*> equilibrated matrix diag(S)*A*diag(S). The j-th column of A +*> is stored in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(N,j+KD). +*> See below for further details. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array A. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] AFB +*> \verbatim +*> AFB is COMPLEX*16 array, dimension (LDAFB,N) +*> If FACT = 'F', then AFB is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H of the band matrix +*> A, in the same storage format as A (see AB). If EQUED = 'Y', +*> then AFB is the factored form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H. +*> +*> If FACT = 'E', then AFB is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAFB +*> \verbatim +*> LDAFB is INTEGER +*> The leading dimension of the array AFB. LDAFB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 +*> a22 a23 a24 +*> a33 a34 a35 +*> a44 a45 a46 +*> a55 a56 +*> (aij=conjg(aji)) a66 +*> +*> Band storage of the upper triangle of A: +*> +*> * * a13 a24 a35 a46 +*> * a12 a23 a34 a45 a56 +*> a11 a22 a33 a44 a55 a66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> a11 a22 a33 a44 a55 a66 +*> a21 a32 a43 a54 a65 * +*> a31 a42 a53 a64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, + $ EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU, UPPER + INTEGER I, INFEQU, J, J1, J2 + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHB + EXTERNAL LSAME, DLAMCH, ZLANHB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZLAQHB, ZPBCON, ZPBEQU, + $ ZPBRFS, ZPBTRF, ZPBTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + UPPER = LSAME( UPLO, 'U' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( KD.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + ELSE IF( LDAFB.LT.KD+1 ) THEN + INFO = -9 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + IF( UPPER ) THEN + DO 40 J = 1, N + J1 = MAX( J-KD, 1 ) + CALL ZCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1, + $ AFB( KD+1-J+J1, J ), 1 ) + 40 CONTINUE + ELSE + DO 50 J = 1, N + J2 = MIN( J+KD, N ) + CALL ZCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 ) + 50 CONTINUE + END IF +* + CALL ZPBTRF( UPLO, N, KD, AFB, LDAFB, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 70 J = 1, NRHS + DO 60 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 60 CONTINUE + 70 CONTINUE + DO 80 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 80 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZPBSVX +* + END diff --git a/dspl/liblapack/SRC/zpbtf2.f b/dspl/liblapack/SRC/zpbtf2.f new file mode 100644 index 0000000..b39f1de --- /dev/null +++ b/dspl/liblapack/SRC/zpbtf2.f @@ -0,0 +1,269 @@ +*> \brief \b ZPBTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite band matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBTF2 computes the Cholesky factorization of a complex Hermitian +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix, U**H is the conjugate transpose +*> of U, and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of super-diagonals of the matrix A if UPLO = 'U', +*> or the number of sub-diagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H *U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPBTF2( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, KLD, KN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHER, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + KLD = MAX( 1, LDAB-1 ) +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H * U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( KD+1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( KD+1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( KD+1, J ) = AJJ +* +* Compute elements J+1:J+KN of row J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL ZDSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD ) + CALL ZLACGV( KN, AB( KD, J+1 ), KLD ) + CALL ZHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD, + $ AB( KD+1, J+1 ), KLD ) + CALL ZLACGV( KN, AB( KD, J+1 ), KLD ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AB( 1, J ) ) + IF( AJJ.LE.ZERO ) THEN + AB( 1, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AB( 1, J ) = AJJ +* +* Compute elements J+1:J+KN of column J and update the +* trailing submatrix within the band. +* + KN = MIN( KD, N-J ) + IF( KN.GT.0 ) THEN + CALL ZDSCAL( KN, ONE / AJJ, AB( 2, J ), 1 ) + CALL ZHER( 'Lower', KN, -ONE, AB( 2, J ), 1, + $ AB( 1, J+1 ), KLD ) + END IF + 20 CONTINUE + END IF + RETURN +* + 30 CONTINUE + INFO = J + RETURN +* +* End of ZPBTF2 +* + END diff --git a/dspl/liblapack/SRC/zpbtrf.f b/dspl/liblapack/SRC/zpbtrf.f new file mode 100644 index 0000000..0eee3b6 --- /dev/null +++ b/dspl/liblapack/SRC/zpbtrf.f @@ -0,0 +1,442 @@ +*> \brief \b ZPBTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite band matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in,out] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> On entry, the upper or lower triangle of the Hermitian band +*> matrix A, stored in the first KD+1 rows of the array. The +*> j-th column of A is stored in the j-th column of the array AB +*> as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H*U or A = L*L**H of the band +*> matrix A, in the same storage format as A. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The band storage scheme is illustrated by the following example, when +*> N = 6, KD = 2, and UPLO = 'U': +*> +*> On entry: On exit: +*> +*> * * a13 a24 a35 a46 * * u13 u24 u35 u46 +*> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 +*> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 +*> +*> Similarly, if UPLO = 'L' the format of A is as follows: +*> +*> On entry: On exit: +*> +*> a11 a22 a33 a44 a55 a66 l11 l22 l33 l44 l55 l66 +*> a21 a32 a43 a54 a65 * l21 l32 l43 l54 l65 * +*> a31 a42 a53 a64 * * l31 l42 l53 l64 * * +*> +*> Array elements marked * are not used by the routine. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 +* +* ===================================================================== + SUBROUTINE ZPBTRF( UPLO, N, KD, AB, LDAB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, N +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + INTEGER NBMAX, LDWORK + PARAMETER ( NBMAX = 32, LDWORK = NBMAX+1 ) +* .. +* .. Local Scalars .. + INTEGER I, I2, I3, IB, II, J, JJ, NB +* .. +* .. Local Arrays .. + COMPLEX*16 WORK( LDWORK, NBMAX ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZPBTF2, ZPOTF2, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND. + $ ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment +* + NB = ILAENV( 1, 'ZPBTRF', UPLO, N, KD, -1, -1 ) +* +* The block size must not exceed the semi-bandwidth KD, and must not +* exceed the limit set by the size of the local array WORK. +* + NB = MIN( NB, NBMAX ) +* + IF( NB.LE.1 .OR. NB.GT.KD ) THEN +* +* Use unblocked code +* + CALL ZPBTF2( UPLO, N, KD, AB, LDAB, INFO ) + ELSE +* +* Use blocked code +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the upper triangle of the matrix in band +* storage. +* +* Zero the upper triangle of the work array. +* + DO 20 J = 1, NB + DO 10 I = 1, J - 1 + WORK( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 70 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL ZPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 A12 A13 +* A22 A23 +* A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A12, A22 and +* A23 are empty if IB = KD. The upper triangle of A13 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A12 +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I2, CONE, + $ AB( KD+1, I ), LDAB-1, + $ AB( KD+1-IB, I+IB ), LDAB-1 ) +* +* Update A22 +* + CALL ZHERK( 'Upper', 'Conjugate transpose', I2, IB, + $ -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE, + $ AB( KD+1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the lower triangle of A13 into the work array. +* + DO 40 JJ = 1, I3 + DO 30 II = JJ, IB + WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 ) + 30 CONTINUE + 40 CONTINUE +* +* Update A13 (in the work array). +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', IB, I3, CONE, + $ AB( KD+1, I ), LDAB-1, WORK, LDWORK ) +* +* Update A23 +* + IF( I2.GT.0 ) + $ CALL ZGEMM( 'Conjugate transpose', + $ 'No transpose', I2, I3, IB, -CONE, + $ AB( KD+1-IB, I+IB ), LDAB-1, WORK, + $ LDWORK, CONE, AB( 1+IB, I+KD ), + $ LDAB-1 ) +* +* Update A33 +* + CALL ZHERK( 'Upper', 'Conjugate transpose', I3, IB, + $ -ONE, WORK, LDWORK, ONE, + $ AB( KD+1, I+KD ), LDAB-1 ) +* +* Copy the lower triangle of A13 back into place. +* + DO 60 JJ = 1, I3 + DO 50 II = JJ, IB + AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ ) + 50 CONTINUE + 60 CONTINUE + END IF + END IF + 70 CONTINUE + ELSE +* +* Compute the Cholesky factorization of a Hermitian band +* matrix, given the lower triangle of the matrix in band +* storage. +* +* Zero the lower triangle of the work array. +* + DO 90 J = 1, NB + DO 80 I = J + 1, NB + WORK( I, J ) = ZERO + 80 CONTINUE + 90 CONTINUE +* +* Process the band matrix one diagonal block at a time. +* + DO 140 I = 1, N, NB + IB = MIN( NB, N-I+1 ) +* +* Factorize the diagonal block +* + CALL ZPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II ) + IF( II.NE.0 ) THEN + INFO = I + II - 1 + GO TO 150 + END IF + IF( I+IB.LE.N ) THEN +* +* Update the relevant part of the trailing submatrix. +* If A11 denotes the diagonal block which has just been +* factorized, then we need to update the remaining +* blocks in the diagram: +* +* A11 +* A21 A22 +* A31 A32 A33 +* +* The numbers of rows and columns in the partitioning +* are IB, I2, I3 respectively. The blocks A21, A22 and +* A32 are empty if IB = KD. The lower triangle of A31 +* lies outside the band. +* + I2 = MIN( KD-IB, N-I-IB+1 ) + I3 = MIN( IB, N-I-KD+1 ) +* + IF( I2.GT.0 ) THEN +* +* Update A21 +* + CALL ZTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I2, + $ IB, CONE, AB( 1, I ), LDAB-1, + $ AB( 1+IB, I ), LDAB-1 ) +* +* Update A22 +* + CALL ZHERK( 'Lower', 'No transpose', I2, IB, -ONE, + $ AB( 1+IB, I ), LDAB-1, ONE, + $ AB( 1, I+IB ), LDAB-1 ) + END IF +* + IF( I3.GT.0 ) THEN +* +* Copy the upper triangle of A31 into the work array. +* + DO 110 JJ = 1, IB + DO 100 II = 1, MIN( JJ, I3 ) + WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 ) + 100 CONTINUE + 110 CONTINUE +* +* Update A31 (in the work array). +* + CALL ZTRSM( 'Right', 'Lower', + $ 'Conjugate transpose', 'Non-unit', I3, + $ IB, CONE, AB( 1, I ), LDAB-1, WORK, + $ LDWORK ) +* +* Update A32 +* + IF( I2.GT.0 ) + $ CALL ZGEMM( 'No transpose', + $ 'Conjugate transpose', I3, I2, IB, + $ -CONE, WORK, LDWORK, AB( 1+IB, I ), + $ LDAB-1, CONE, AB( 1+KD-IB, I+IB ), + $ LDAB-1 ) +* +* Update A33 +* + CALL ZHERK( 'Lower', 'No transpose', I3, IB, -ONE, + $ WORK, LDWORK, ONE, AB( 1, I+KD ), + $ LDAB-1 ) +* +* Copy the upper triangle of A31 back into place. +* + DO 130 JJ = 1, IB + DO 120 II = 1, MIN( JJ, I3 ) + AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ ) + 120 CONTINUE + 130 CONTINUE + END IF + END IF + 140 CONTINUE + END IF + END IF + RETURN +* + 150 CONTINUE + RETURN +* +* End of ZPBTRF +* + END diff --git a/dspl/liblapack/SRC/zpbtrs.f b/dspl/liblapack/SRC/zpbtrs.f new file mode 100644 index 0000000..a5a3db0 --- /dev/null +++ b/dspl/liblapack/SRC/zpbtrs.f @@ -0,0 +1,220 @@ +*> \brief \b ZPBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPBTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite band matrix A using the Cholesky factorization +*> A = U**H *U or A = L*L**H computed by ZPBTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor stored in AB; +*> = 'L': Lower triangular factor stored in AB. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals of the matrix A if UPLO = 'U', +*> or the number of subdiagonals if UPLO = 'L'. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H *U or A = L*L**H of the band matrix A, stored in the +*> first KD+1 rows of the array. The j-th column of U or L is +*> stored in the j-th column of the array AB as follows: +*> if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO ='L', AB(1+i-j,j) = L(i,j) for j<=i<=min(n,j+kd). +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( KD.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H *U. +* + DO 10 J = 1, NRHS +* +* Solve U**H *X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L*L**H. +* + DO 20 J = 1, NRHS +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB, + $ LDAB, B( 1, J ), 1 ) +* +* Solve L**H *X = B, overwriting B with X. +* + CALL ZTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ KD, AB, LDAB, B( 1, J ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPBTRS +* + END diff --git a/dspl/liblapack/SRC/zpftrf.f b/dspl/liblapack/SRC/zpftrf.f new file mode 100644 index 0000000..fd30330 --- /dev/null +++ b/dspl/liblapack/SRC/zpftrf.f @@ -0,0 +1,471 @@ +*> \brief \b ZPFTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPFTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER N, INFO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPFTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( N*(N+1)/2 ); +*> On entry, the Hermitian matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'C'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization RFP A = U**H*U or RFP A = L*L**H. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> +*> Further Notes on RFP Format: +*> ============================ +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPFTRF( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER N, INFO +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHERK, ZPOTRF, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPFTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL ZPOTRF( 'L', N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'R', 'L', 'C', 'N', N2, N1, CONE, A( 0 ), N, + $ A( N1 ), N ) + CALL ZHERK( 'U', 'N', N2, N1, -ONE, A( N1 ), N, ONE, + $ A( N ), N ) + CALL ZPOTRF( 'U', N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL ZPOTRF( 'L', N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'L', 'L', 'N', 'N', N1, N2, CONE, A( N2 ), N, + $ A( 0 ), N ) + CALL ZHERK( 'U', 'C', N2, N1, -ONE, A( 0 ), N, ONE, + $ A( N1 ), N ) + CALL ZPOTRF( 'U', N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + CALL ZPOTRF( 'U', N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'L', 'U', 'C', 'N', N1, N2, CONE, A( 0 ), N1, + $ A( N1*N1 ), N1 ) + CALL ZHERK( 'L', 'C', N2, N1, -ONE, A( N1*N1 ), N1, ONE, + $ A( 1 ), N1 ) + CALL ZPOTRF( 'L', N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + CALL ZPOTRF( 'U', N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'R', 'U', 'N', 'N', N2, N1, CONE, A( N2*N2 ), + $ N2, A( 0 ), N2 ) + CALL ZHERK( 'L', 'N', N2, N1, -ONE, A( 0 ), N2, ONE, + $ A( N1*N2 ), N2 ) + CALL ZPOTRF( 'L', N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL ZPOTRF( 'L', K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'R', 'L', 'C', 'N', K, K, CONE, A( 1 ), N+1, + $ A( K+1 ), N+1 ) + CALL ZHERK( 'U', 'N', K, K, -ONE, A( K+1 ), N+1, ONE, + $ A( 0 ), N+1 ) + CALL ZPOTRF( 'U', K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL ZPOTRF( 'L', K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'L', 'L', 'N', 'N', K, K, CONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL ZHERK( 'U', 'C', K, K, -ONE, A( 0 ), N+1, ONE, + $ A( K ), N+1 ) + CALL ZPOTRF( 'U', K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL ZPOTRF( 'U', K, A( 0+K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'L', 'U', 'C', 'N', K, K, CONE, A( K ), N1, + $ A( K*( K+1 ) ), K ) + CALL ZHERK( 'L', 'C', K, K, -ONE, A( K*( K+1 ) ), K, ONE, + $ A( 0 ), K ) + CALL ZPOTRF( 'L', K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL ZPOTRF( 'U', K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRSM( 'R', 'U', 'N', 'N', K, K, CONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL ZHERK( 'L', 'N', K, K, -ONE, A( 0 ), K, ONE, + $ A( K*K ), K ) + CALL ZPOTRF( 'L', K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZPFTRF +* + END diff --git a/dspl/liblapack/SRC/zpftri.f b/dspl/liblapack/SRC/zpftri.f new file mode 100644 index 0000000..e63846b --- /dev/null +++ b/dspl/liblapack/SRC/zpftri.f @@ -0,0 +1,445 @@ +*> \brief \b ZPFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. Array Arguments .. +* COMPLEX*16 A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPFTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by ZPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( N*(N+1)/2 ); +*> On entry, the Hermitian matrix A in RFP format. RFP format is +*> described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' +*> then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A. If UPLO = 'L' the RFP A contains the elements +*> of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = +*> 'C'. When TRANSR is 'N' the LDA is N+1 when N is even and N +*> is odd. See the Note below for more details. +*> +*> On exit, the Hermitian inverse of the original matrix, in the +*> same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPFTRI( TRANSR, UPLO, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. Array Arguments .. + COMPLEX*16 A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.D0, CONE = ( 1.D0, 0.D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTFTRI, ZLAUUM, ZTRMM, ZHERK +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL ZTFTRI( TRANSR, UPLO, 'N', N, A, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or +* inv(L)^C*inv(L). There are eight cases. +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) +* T1 -> a(0), T2 -> a(n), S -> a(N1) +* + CALL ZLAUUM( 'L', N1, A( 0 ), N, INFO ) + CALL ZHERK( 'L', 'C', N1, N2, ONE, A( N1 ), N, ONE, + $ A( 0 ), N ) + CALL ZTRMM( 'L', 'U', 'N', 'N', N2, N1, CONE, A( N ), N, + $ A( N1 ), N ) + CALL ZLAUUM( 'U', N2, A( N ), N, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) +* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) +* T1 -> a(N2), T2 -> a(N1), S -> a(0) +* + CALL ZLAUUM( 'L', N1, A( N2 ), N, INFO ) + CALL ZHERK( 'L', 'N', N1, N2, ONE, A( 0 ), N, ONE, + $ A( N2 ), N ) + CALL ZTRMM( 'R', 'U', 'C', 'N', N1, N2, CONE, A( N1 ), N, + $ A( 0 ), N ) + CALL ZLAUUM( 'U', N2, A( N1 ), N, INFO ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) +* + CALL ZLAUUM( 'U', N1, A( 0 ), N1, INFO ) + CALL ZHERK( 'U', 'N', N1, N2, ONE, A( N1*N1 ), N1, ONE, + $ A( 0 ), N1 ) + CALL ZTRMM( 'R', 'L', 'N', 'N', N1, N2, CONE, A( 1 ), N1, + $ A( N1*N1 ), N1 ) + CALL ZLAUUM( 'L', N2, A( 1 ), N1, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is odd +* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) +* + CALL ZLAUUM( 'U', N1, A( N2*N2 ), N2, INFO ) + CALL ZHERK( 'U', 'C', N1, N2, ONE, A( 0 ), N2, ONE, + $ A( N2*N2 ), N2 ) + CALL ZTRMM( 'L', 'L', 'C', 'N', N2, N1, CONE, A( N1*N2 ), + $ N2, A( 0 ), N2 ) + CALL ZLAUUM( 'L', N2, A( N1*N2 ), N2, INFO ) +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL ZLAUUM( 'L', K, A( 1 ), N+1, INFO ) + CALL ZHERK( 'L', 'C', K, K, ONE, A( K+1 ), N+1, ONE, + $ A( 1 ), N+1 ) + CALL ZTRMM( 'L', 'U', 'N', 'N', K, K, CONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) + CALL ZLAUUM( 'U', K, A( 0 ), N+1, INFO ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL ZLAUUM( 'L', K, A( K+1 ), N+1, INFO ) + CALL ZHERK( 'L', 'N', K, K, ONE, A( 0 ), N+1, ONE, + $ A( K+1 ), N+1 ) + CALL ZTRMM( 'R', 'U', 'C', 'N', K, K, CONE, A( K ), N+1, + $ A( 0 ), N+1 ) + CALL ZLAUUM( 'U', K, A( K ), N+1, INFO ) +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL ZLAUUM( 'U', K, A( K ), K, INFO ) + CALL ZHERK( 'U', 'N', K, K, ONE, A( K*( K+1 ) ), K, ONE, + $ A( K ), K ) + CALL ZTRMM( 'R', 'L', 'N', 'N', K, K, CONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + CALL ZLAUUM( 'L', K, A( 0 ), K, INFO ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE, and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL ZLAUUM( 'U', K, A( K*( K+1 ) ), K, INFO ) + CALL ZHERK( 'U', 'C', K, K, ONE, A( 0 ), K, ONE, + $ A( K*( K+1 ) ), K ) + CALL ZTRMM( 'L', 'L', 'C', 'N', K, K, CONE, A( K*K ), K, + $ A( 0 ), K ) + CALL ZLAUUM( 'L', K, A( K*K ), K, INFO ) +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZPFTRI +* + END diff --git a/dspl/liblapack/SRC/zpftrs.f b/dspl/liblapack/SRC/zpftrs.f new file mode 100644 index 0000000..b045f89 --- /dev/null +++ b/dspl/liblapack/SRC/zpftrs.f @@ -0,0 +1,301 @@ +*> \brief \b ZPFTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPFTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPFTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A using the Cholesky factorization +*> A = U**H*U or A = L*L**H computed by ZPFTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of RFP A is stored; +*> = 'L': Lower triangle of RFP A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( N*(N+1)/2 ); +*> The triangular factor U or L from the Cholesky factorization +*> of RFP A = U**H*U or RFP A = L*L**H, as computed by ZPFTRF. +*> See note below for more details about RFP A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPFTRS( TRANSR, UPLO, N, NRHS, A, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NORMALTRANSR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTFSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPFTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* start execution: there are two triangular solves +* + IF( LOWER ) THEN + CALL ZTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, CONE, A, B, + $ LDB ) + CALL ZTFSM( TRANSR, 'L', UPLO, 'C', 'N', N, NRHS, CONE, A, B, + $ LDB ) + ELSE + CALL ZTFSM( TRANSR, 'L', UPLO, 'C', 'N', N, NRHS, CONE, A, B, + $ LDB ) + CALL ZTFSM( TRANSR, 'L', UPLO, 'N', 'N', N, NRHS, CONE, A, B, + $ LDB ) + END IF +* + RETURN +* +* End of ZPFTRS +* + END diff --git a/dspl/liblapack/SRC/zpocon.f b/dspl/liblapack/SRC/zpocon.f new file mode 100644 index 0000000..ba22de0 --- /dev/null +++ b/dspl/liblapack/SRC/zpocon.f @@ -0,0 +1,260 @@ +*> \brief \b ZPOCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite matrix using the +*> Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the Hermitian matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of inv(A). +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ A, LDA, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZPOCON +* + END diff --git a/dspl/liblapack/SRC/zpoequ.f b/dspl/liblapack/SRC/zpoequ.f new file mode 100644 index 0000000..99b94f3 --- /dev/null +++ b/dspl/liblapack/SRC/zpoequ.f @@ -0,0 +1,207 @@ +*> \brief \b ZPOEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The N-by-N Hermitian positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = DBLE( A( 1, 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = DBLE( A( I, I ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPOEQU +* + END diff --git a/dspl/liblapack/SRC/zpoequb.f b/dspl/liblapack/SRC/zpoequb.f new file mode 100644 index 0000000..6902bdb --- /dev/null +++ b/dspl/liblapack/SRC/zpoequb.f @@ -0,0 +1,223 @@ +*> \brief \b ZPOEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* DOUBLE PRECISION S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOEQUB computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A and reduce its condition number +*> (with respect to the two-norm). S contains the scale factors, +*> S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with +*> elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal. This +*> choice of S puts the condition number of B within a factor N of the +*> smallest possible condition number over all possible diagonal +*> scalings. +*> +*> This routine differs from ZPOEQU by restricting the scaling factors +*> to a power of the radix. Barring over- and underflow, scaling by +*> these factors introduces no additional rounding errors. However, the +*> scaled diagonal entries are no longer approximately 1 but lie +*> between sqrt(radix) and 1/sqrt(radix). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The N-by-N Hermitian positive definite matrix whose scaling +*> factors are to be computed. Only the diagonal elements of A +*> are referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOEQUB( N, A, LDA, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) + DOUBLE PRECISION S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION SMIN, BASE, TMP +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT, LOG, INT, REAL, DIMAG +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* +* Positive definite only performs 1 pass of equilibration. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOEQUB', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF + + BASE = DLAMCH( 'B' ) + TMP = -0.5D+0 / LOG ( BASE ) +* +* Find the minimum and maximum diagonal elements. +* + S( 1 ) = A( 1, 1 ) + SMIN = S( 1 ) + AMAX = S( 1 ) + DO 10 I = 2, N + S( I ) = A( I, I ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 20 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 20 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 30 I = 1, N + S( I ) = BASE ** INT( TMP * LOG( S( I ) ) ) + 30 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)). +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF +* + RETURN +* +* End of ZPOEQUB +* + END diff --git a/dspl/liblapack/SRC/zporfs.f b/dspl/liblapack/SRC/zporfs.f new file mode 100644 index 0000000..3b9753e --- /dev/null +++ b/dspl/liblapack/SRC/zporfs.f @@ -0,0 +1,436 @@ +*> \brief \b ZPORFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPORFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, +* LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPORFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite, +*> and provides error bounds and backward error estimates for the +*> solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The Hermitian matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZPOTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHEMV, ZLACN2, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPORFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPORFS +* + END diff --git a/dspl/liblapack/SRC/zporfsx.f b/dspl/liblapack/SRC/zporfsx.f new file mode 100644 index 0000000..ee8cfbc --- /dev/null +++ b/dspl/liblapack/SRC/zporfsx.f @@ -0,0 +1,693 @@ +*> \brief \b ZPORFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPORFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, +* LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION RWORK( * ), S( * ), PARAMS(*), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPORFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric positive +*> definite, and provides error bounds and backward error estimates +*> for the solution. In addition to normwise error bound, the code +*> provides maximum componentwise error bound if possible. See +*> comments for ERR_BNDS_NORM and ERR_BNDS_COMP for details of the +*> error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**T*U or A = L*L**T, as computed by DPOTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, S, B, + $ LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION RWORK( * ), S( * ), PARAMS(*), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPOCON, ZLA_PORFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, ZLANHE, ZLA_PORCOND_X, ZLA_PORCOND_C + DOUBLE PRECISION DLAMCH, ZLANHE, ZLA_PORCOND_X, ZLA_PORCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS(LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS(LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF (.NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPORFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = ZLANHE( NORM, UPLO, N, A, LDA, RWORK ) + CALL ZPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + CALL ZLA_PORFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1 ) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = ZLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = ZLA_PORCOND_C( UPLO, N, A, LDA, AF, LDAF, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF + + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF (ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = ZLA_PORCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF (RCOND_TMP .LT. ILLRCOND_THRESH) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( PARAMS( LA_LINRX_CWISE_I ) .EQ. 1.0D+0 + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of ZPORFSX +* + END diff --git a/dspl/liblapack/SRC/zposv.f b/dspl/liblapack/SRC/zposv.f new file mode 100644 index 0000000..07ee5b1 --- /dev/null +++ b/dspl/liblapack/SRC/zposv.f @@ -0,0 +1,193 @@ +*> \brief ZPOSV computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H* U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POsolve +* +* ===================================================================== + SUBROUTINE ZPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPOTRF, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL ZPOTRF( UPLO, N, A, LDA, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPOSV +* + END diff --git a/dspl/liblapack/SRC/zposvx.f b/dspl/liblapack/SRC/zposvx.f new file mode 100644 index 0000000..5f67bb8 --- /dev/null +++ b/dspl/liblapack/SRC/zposvx.f @@ -0,0 +1,492 @@ +*> \brief ZPOSVX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H* U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. A and AF will not +*> be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A, except if FACT = 'F' and +*> EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored form +*> of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS righthand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16POsolve +* +* ===================================================================== + SUBROUTINE ZPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACPY, ZLAQHE, ZPOCON, ZPOEQU, ZPORFS, + $ ZPOTRF, ZPOTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHE( '1', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZPOSVX +* + END diff --git a/dspl/liblapack/SRC/zposvxx.f b/dspl/liblapack/SRC/zposvxx.f new file mode 100644 index 0000000..8126f14 --- /dev/null +++ b/dspl/liblapack/SRC/zposvxx.f @@ -0,0 +1,680 @@ +*> \brief ZPOSVXX computes the solution to system of linear equations A * X = B for PO matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, +* S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T +*> to compute the solution to a complex*16 system of linear equations +*> A * X = B, where A is an N-by-N symmetric positive definite matrix +*> and X and B are N-by-NRHS matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. ZPOSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> ZPOSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> ZPOSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what ZPOSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**T* U, if UPLO = 'U', or +*> A = L * L**T, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A (see argument RCOND). If the reciprocal of the condition number +*> is less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF contains the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A and AF are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = +*> 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper +*> triangular part of A contains the upper triangular part of the +*> matrix A, and the strictly lower triangular part of A is not +*> referenced. If UPLO = 'L', the leading N-by-N lower triangular +*> part of A contains the lower triangular part of the matrix A, and +*> the strictly upper triangular part of A is not referenced. A is +*> not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = +*> 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T, in the same storage +*> format as A. If EQUED .ne. 'N', then AF is the factored +*> form of the equilibrated matrix diag(S)*A*diag(S). +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the original +*> matrix A. +*> +*> If FACT = 'E', then AF is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**T*U or A = L*L**T of the equilibrated +*> matrix A (see the description of A for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The row scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16POsolve +* +* ===================================================================== + SUBROUTINE ZPOSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED, + $ S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, ZLA_PORPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLA_PORPVGRW +* .. +* .. External Subroutines .. + EXTERNAL ZPOEQUB, ZPOTRF, ZPOTRS, ZLACPY, + $ ZLAQHE, XERBLA, ZLASCL2, ZPORFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in ZPORFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until ZPORFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. + $ .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -9 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -10 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPOEQUB( N, A, LDA, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) CALL ZLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization of A. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZPOTRF( UPLO, N, AF, LDAF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + RPVGRW = ZLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + RPVGRW = ZLA_PORPVGRW( UPLO, N, A, LDA, AF, LDAF, RWORK ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPORFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) + +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL ZLASCL2( N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of ZPOSVXX +* + END diff --git a/dspl/liblapack/SRC/zpotf2.f b/dspl/liblapack/SRC/zpotf2.f new file mode 100644 index 0000000..0aa457f --- /dev/null +++ b/dspl/liblapack/SRC/zpotf2.f @@ -0,0 +1,237 @@ +*> \brief \b ZPOTF2 computes the Cholesky factorization of a symmetric/Hermitian positive definite matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOTF2 computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOTF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZGEMV, ZLACGV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H *U. +* + DO 10 J = 1, N +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( 1, J ), 1, + $ A( 1, J ), 1 ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ), + $ LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( J, 1 ), LDA, + $ A( J, 1 ), LDA ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of ZPOTF2 +* + END diff --git a/dspl/liblapack/SRC/zpotrf.f b/dspl/liblapack/SRC/zpotrf.f new file mode 100644 index 0000000..044cb90 --- /dev/null +++ b/dspl/liblapack/SRC/zpotrf.f @@ -0,0 +1,249 @@ +*> \brief \b ZPOTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTRF( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the block version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H *U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOTRF( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + COMPLEX*16 CONE + PARAMETER ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JB, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZHERK, ZPOTRF2, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code. +* + CALL ZPOTRF2( UPLO, N, A, LDA, INFO ) + ELSE +* +* Use blocked code. +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H *U. +* + DO 10 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL ZHERK( 'Upper', 'Conjugate transpose', JB, J-1, + $ -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA ) + CALL ZPOTRF2( 'Upper', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block row. +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', JB, + $ N-J-JB+1, J-1, -CONE, A( 1, J ), LDA, + $ A( 1, J+JB ), LDA, CONE, A( J, J+JB ), + $ LDA ) + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', + $ 'Non-unit', JB, N-J-JB+1, CONE, A( J, J ), + $ LDA, A( J, J+JB ), LDA ) + END IF + 10 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization A = L*L**H. +* + DO 20 J = 1, N, NB +* +* Update and factorize the current diagonal block and test +* for non-positive-definiteness. +* + JB = MIN( NB, N-J+1 ) + CALL ZHERK( 'Lower', 'No transpose', JB, J-1, -ONE, + $ A( J, 1 ), LDA, ONE, A( J, J ), LDA ) + CALL ZPOTRF2( 'Lower', JB, A( J, J ), LDA, INFO ) + IF( INFO.NE.0 ) + $ GO TO 30 + IF( J+JB.LE.N ) THEN +* +* Compute the current block column. +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ), + $ LDA ) + CALL ZTRSM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Non-unit', N-J-JB+1, JB, CONE, A( J, J ), + $ LDA, A( J+JB, J ), LDA ) + END IF + 20 CONTINUE + END IF + END IF + GO TO 40 +* + 30 CONTINUE + INFO = INFO + J - 1 +* + 40 CONTINUE + RETURN +* +* End of ZPOTRF +* + END diff --git a/dspl/liblapack/SRC/zpotrf2.f b/dspl/liblapack/SRC/zpotrf2.f new file mode 100644 index 0000000..e37c9f6 --- /dev/null +++ b/dspl/liblapack/SRC/zpotrf2.f @@ -0,0 +1,241 @@ +*> \brief \b ZPOTRF2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE ZPOTRF2( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOTRF2 computes the Cholesky factorization of a real symmetric +*> positive definite matrix A using the recursive algorithm. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> +*> This is the recursive version of the algorithm. It divides +*> the matrix into four submatrices: +*> +*> [ A11 | A12 ] where A11 is n1 by n1 and A22 is n2 by n2 +*> A = [ -----|----- ] with n1 = n/2 +*> [ A21 | A22 ] n2 = n-n1 +*> +*> The subroutine calls itself to factor A11. Update and scale A21 +*> or A12, update A22 then call itself to factor A22. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE ZPOTRF2( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = (1.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER N1, N2, IINFO + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZHERK, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* N=1 case +* + IF( N.EQ.1 ) THEN +* +* Test for non-positive-definiteness +* + AJJ = DBLE( A( 1, 1 ) ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + INFO = 1 + RETURN + END IF +* +* Factor +* + A( 1, 1 ) = SQRT( AJJ ) +* +* Use recursive code +* + ELSE + N1 = N/2 + N2 = N-N1 +* +* Factor A11 +* + CALL ZPOTRF2( UPLO, N1, A( 1, 1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H*U +* + IF( UPPER ) THEN +* +* Update and scale A12 +* + CALL ZTRSM( 'L', 'U', 'C', 'N', N1, N2, CONE, + $ A( 1, 1 ), LDA, A( 1, N1+1 ), LDA ) +* +* Update and factor A22 +* + CALL ZHERK( UPLO, 'C', N2, N1, -ONE, A( 1, N1+1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL ZPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF +* +* Compute the Cholesky factorization A = L*L**H +* + ELSE +* +* Update and scale A21 +* + CALL ZTRSM( 'R', 'L', 'C', 'N', N2, N1, CONE, + $ A( 1, 1 ), LDA, A( N1+1, 1 ), LDA ) +* +* Update and factor A22 +* + CALL ZHERK( UPLO, 'N', N2, N1, -ONE, A( N1+1, 1 ), LDA, + $ ONE, A( N1+1, N1+1 ), LDA ) + CALL ZPOTRF2( UPLO, N2, A( N1+1, N1+1 ), LDA, IINFO ) + IF ( IINFO.NE.0 ) THEN + INFO = IINFO + N1 + RETURN + END IF + END IF + END IF + RETURN +* +* End of ZPOTRF2 +* + END diff --git a/dspl/liblapack/SRC/zpotri.f b/dspl/liblapack/SRC/zpotri.f new file mode 100644 index 0000000..68f61e8 --- /dev/null +++ b/dspl/liblapack/SRC/zpotri.f @@ -0,0 +1,159 @@ +*> \brief \b ZPOTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTRI( UPLO, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by ZPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, as computed by +*> ZPOTRF. +*> On exit, the upper or lower triangle of the (Hermitian) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOTRI( UPLO, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLAUUM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL ZTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO ) + IF( INFO.GT.0 ) + $ RETURN +* +* Form inv(U) * inv(U)**H or inv(L)**H * inv(L). +* + CALL ZLAUUM( UPLO, N, A, LDA, INFO ) +* + RETURN +* +* End of ZPOTRI +* + END diff --git a/dspl/liblapack/SRC/zpotrs.f b/dspl/liblapack/SRC/zpotrs.f new file mode 100644 index 0000000..b476bcb --- /dev/null +++ b/dspl/liblapack/SRC/zpotrs.f @@ -0,0 +1,204 @@ +*> \brief \b ZPOTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPOTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPOTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A using the Cholesky factorization +*> A = U**H * U or A = L * L**H computed by ZPOTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H * U or A = L * L**H, as computed by ZPOTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16POcomputational +* +* ===================================================================== + SUBROUTINE ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPOTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H *U. +* +* Solve U**H *X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A*X = B where A = L*L**H. +* +* Solve L*X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) +* +* Solve L**H *X = B, overwriting B with X. +* + CALL ZTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit', + $ N, NRHS, ONE, A, LDA, B, LDB ) + END IF +* + RETURN +* +* End of ZPOTRS +* + END diff --git a/dspl/liblapack/SRC/zppcon.f b/dspl/liblapack/SRC/zppcon.f new file mode 100644 index 0000000..44d32e0 --- /dev/null +++ b/dspl/liblapack/SRC/zppcon.f @@ -0,0 +1,255 @@ +*> \brief \b ZPPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite packed matrix using +*> the Cholesky factorization A = U**H*U or A = L*L**H computed by +*> ZPPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm (or infinity-norm) of the Hermitian matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + CHARACTER NORMIN + INTEGER IX, KASE + DOUBLE PRECISION AINVNM, SCALE, SCALEL, SCALEU, SMLNUM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* + SMLNUM = DLAMCH( 'Safe minimum' ) +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + NORMIN = 'N' + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( UPPER ) THEN +* +* Multiply by inv(U**H). +* + CALL ZLATPS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(U). +* + CALL ZLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEU, RWORK, INFO ) + ELSE +* +* Multiply by inv(L). +* + CALL ZLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N, + $ AP, WORK, SCALEL, RWORK, INFO ) + NORMIN = 'Y' +* +* Multiply by inv(L**H). +* + CALL ZLATPS( 'Lower', 'Conjugate transpose', 'Non-unit', + $ NORMIN, N, AP, WORK, SCALEU, RWORK, INFO ) + END IF +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + SCALE = SCALEL*SCALEU + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + 20 CONTINUE + RETURN +* +* End of ZPPCON +* + END diff --git a/dspl/liblapack/SRC/zppequ.f b/dspl/liblapack/SRC/zppequ.f new file mode 100644 index 0000000..424e338 --- /dev/null +++ b/dspl/liblapack/SRC/zppequ.f @@ -0,0 +1,240 @@ +*> \brief \b ZPPEQU +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPEQU + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION S( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPEQU computes row and column scalings intended to equilibrate a +*> Hermitian positive definite matrix A in packed storage and reduce +*> its condition number (with respect to the two-norm). S contains the +*> scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix +*> B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal. +*> This choice of S puts the condition number of B within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Absolute value of largest matrix element. If AMAX is very +*> close to overflow or very close to underflow, the matrix +*> should be scaled. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION AMAX, SCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION S( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, JJ + DOUBLE PRECISION SMIN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPEQU', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + SCOND = ONE + AMAX = ZERO + RETURN + END IF +* +* Initialize SMIN and AMAX. +* + S( 1 ) = DBLE( AP( 1 ) ) + SMIN = S( 1 ) + AMAX = S( 1 ) +* + IF( UPPER ) THEN +* +* UPLO = 'U': Upper triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 10 I = 2, N + JJ = JJ + I + S( I ) = DBLE( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 10 CONTINUE +* + ELSE +* +* UPLO = 'L': Lower triangle of A is stored. +* Find the minimum and maximum diagonal elements. +* + JJ = 1 + DO 20 I = 2, N + JJ = JJ + N - I + 2 + S( I ) = DBLE( AP( JJ ) ) + SMIN = MIN( SMIN, S( I ) ) + AMAX = MAX( AMAX, S( I ) ) + 20 CONTINUE + END IF +* + IF( SMIN.LE.ZERO ) THEN +* +* Find the first non-positive diagonal element and return. +* + DO 30 I = 1, N + IF( S( I ).LE.ZERO ) THEN + INFO = I + RETURN + END IF + 30 CONTINUE + ELSE +* +* Set the scale factors to the reciprocals +* of the diagonal elements. +* + DO 40 I = 1, N + S( I ) = ONE / SQRT( S( I ) ) + 40 CONTINUE +* +* Compute SCOND = min(S(I)) / max(S(I)) +* + SCOND = SQRT( SMIN ) / SQRT( AMAX ) + END IF + RETURN +* +* End of ZPPEQU +* + END diff --git a/dspl/liblapack/SRC/zpprfs.f b/dspl/liblapack/SRC/zpprfs.f new file mode 100644 index 0000000..ae6a76e --- /dev/null +++ b/dspl/liblapack/SRC/zpprfs.f @@ -0,0 +1,428 @@ +*> \brief \b ZPPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, +* BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the Hermitian matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H*U or A = L*L**H, as computed by DPPTRF/ZPPTRF, +*> packed columnwise in a linear array in the same format as A +*> (see AP). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZPPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZHPMV, ZLACN2, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )* + $ XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**H). +* + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZPPRFS +* + END diff --git a/dspl/liblapack/SRC/zppsv.f b/dspl/liblapack/SRC/zppsv.f new file mode 100644 index 0000000..49b2e0b --- /dev/null +++ b/dspl/liblapack/SRC/zppsv.f @@ -0,0 +1,205 @@ +*> \brief ZPPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> The Cholesky decomposition is used to factor A as +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is a lower triangular +*> matrix. The factored form of A is then used to solve the system of +*> equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, in the same storage +*> format as A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i of A is not +*> positive definite, so the factorization could not be +*> completed, and the solution has not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPPSV( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPPTRF, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPSV ', -INFO ) + RETURN + END IF +* +* Compute the Cholesky factorization A = U**H *U or A = L*L**H. +* + CALL ZPPTRF( UPLO, N, AP, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZPPSV +* + END diff --git a/dspl/liblapack/SRC/zppsvx.f b/dspl/liblapack/SRC/zppsvx.f new file mode 100644 index 0000000..d527748 --- /dev/null +++ b/dspl/liblapack/SRC/zppsvx.f @@ -0,0 +1,493 @@ +*> \brief ZPPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, +* X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPSVX uses the Cholesky factorization A = U**H * U or A = L * L**H to +*> compute the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N Hermitian positive definite matrix stored in +*> packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', real scaling factors are computed to equilibrate +*> the system: +*> diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to +*> factor the matrix A (after equilibration if FACT = 'E') as +*> A = U**H * U , if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix, L is a lower triangular +*> matrix, and **H indicates conjugate transpose. +*> +*> 3. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(S) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AFP contains the factored form of A. +*> If EQUED = 'Y', the matrix A has been equilibrated +*> with scaling factors given by S. AP and AFP will not +*> be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array, except if FACT = 'F' +*> and EQUED = 'Y', then A must contain the equilibrated matrix +*> diag(S)*A*diag(S). The j-th column of A is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. A is not modified if +*> FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, in the same storage +*> format as A. If EQUED .ne. 'N', then AFP is the factored +*> form of the equilibrated matrix A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H * U or A = L * L**H of the original +*> matrix A. +*> +*> If FACT = 'E', then AFP is an output argument and on exit +*> returns the triangular factor U or L from the Cholesky +*> factorization A = U**H * U or A = L * L**H of the equilibrated +*> matrix A (see the description of AP for the form of the +*> equilibrated matrix). +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Equilibration was done, i.e., A has been replaced by +*> diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A; not accessed if EQUED = 'N'. S is +*> an input argument if FACT = 'F'; otherwise, S is an output +*> argument. If FACT = 'F' and EQUED = 'Y', each element of S +*> must be positive. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', +*> B is overwritten by diag(S) * B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to +*> the original system of equations. Note that if EQUED = 'Y', +*> A and B are modified on exit, and the solution to the +*> equilibrated system is inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A after equilibration (if done). If RCOND is less than the +*> machine precision (in particular, if RCOND = 0), the matrix +*> is singular to working precision. This condition is +*> indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB, + $ X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ), S( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER I, INFEQU, J + DOUBLE PRECISION AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHP + EXTERNAL LSAME, DLAMCH, ZLANHP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZLAQHP, ZPPCON, ZPPEQU, + $ ZPPRFS, ZPPTRF, ZPPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + END IF +* +* Test the input parameters. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -7 + ELSE + IF( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -8 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPSVX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + END IF +* +* Scale the right-hand side. +* + IF( RCEQU ) THEN + DO 30 J = 1, NRHS + DO 20 I = 1, N + B( I, J ) = S( I )*B( I, J ) + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the Cholesky factorization A = U**H * U or A = L * L**H. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZPPTRF( UPLO, N, AFP, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR, + $ WORK, RWORK, INFO ) +* +* Transform the solution matrix X to a solution of the original +* system. +* + IF( RCEQU ) THEN + DO 50 J = 1, NRHS + DO 40 I = 1, N + X( I, J ) = S( I )*X( I, J ) + 40 CONTINUE + 50 CONTINUE + DO 60 J = 1, NRHS + FERR( J ) = FERR( J ) / SCOND + 60 CONTINUE + END IF +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZPPSVX +* + END diff --git a/dspl/liblapack/SRC/zpptrf.f b/dspl/liblapack/SRC/zpptrf.f new file mode 100644 index 0000000..6e50b46 --- /dev/null +++ b/dspl/liblapack/SRC/zpptrf.f @@ -0,0 +1,241 @@ +*> \brief \b ZPPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPTRF( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPTRF computes the Cholesky factorization of a complex Hermitian +*> positive definite matrix A stored in packed format. +*> +*> The factorization has the form +*> A = U**H * U, if UPLO = 'U', or +*> A = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the Hermitian matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, if INFO = 0, the triangular factor U or L from the +*> Cholesky factorization A = U**H*U or A = L*L**H, in the same +*> storage format as A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the factorization could not be +*> completed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the Hermitian matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = conjg(aji)) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPPTRF( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization A = U**H * U. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J +* +* Compute elements 1:J-1 of column J. +* + IF( J.GT.1 ) + $ CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', + $ J-1, AP, AP( JC ), 1 ) +* +* Compute U(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AP( JJ ) ) - ZDOTC( J-1, AP( JC ), 1, AP( JC ), + $ 1 ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AP( JJ ) = SQRT( AJJ ) + 10 CONTINUE + ELSE +* +* Compute the Cholesky factorization A = L * L**H. +* + JJ = 1 + DO 20 J = 1, N +* +* Compute L(J,J) and test for non-positive-definiteness. +* + AJJ = DBLE( AP( JJ ) ) + IF( AJJ.LE.ZERO ) THEN + AP( JJ ) = AJJ + GO TO 30 + END IF + AJJ = SQRT( AJJ ) + AP( JJ ) = AJJ +* +* Compute elements J+1:N of column J and update the trailing +* submatrix. +* + IF( J.LT.N ) THEN + CALL ZDSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 ) + CALL ZHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1, + $ AP( JJ+N-J+1 ) ) + JJ = JJ + N - J + 1 + END IF + 20 CONTINUE + END IF + GO TO 40 +* + 30 CONTINUE + INFO = J +* + 40 CONTINUE + RETURN +* +* End of ZPPTRF +* + END diff --git a/dspl/liblapack/SRC/zpptri.f b/dspl/liblapack/SRC/zpptri.f new file mode 100644 index 0000000..cde2f6d --- /dev/null +++ b/dspl/liblapack/SRC/zpptri.f @@ -0,0 +1,190 @@ +*> \brief \b ZPPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPTRI( UPLO, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPTRI computes the inverse of a complex Hermitian positive definite +*> matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +*> computed by ZPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular factor is stored in AP; +*> = 'L': Lower triangular factor is stored in AP. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the triangular factor U or L from the Cholesky +*> factorization A = U**H*U or A = L*L**H, packed columnwise as +*> a linear array. The j-th column of U or L is stored in the +*> array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> +*> On exit, the upper or lower triangle of the (Hermitian) +*> inverse of A, overwriting the input factor U or L. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the (i,i) element of the factor U or L is +*> zero, and the inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPPTRI( UPLO, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, JC, JJ, JJN + DOUBLE PRECISION AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDSCAL, ZHPR, ZTPMV, ZTPTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Invert the triangular Cholesky factor U or L. +* + CALL ZTPTRI( UPLO, 'Non-unit', N, AP, INFO ) + IF( INFO.GT.0 ) + $ RETURN + IF( UPPER ) THEN +* +* Compute the product inv(U) * inv(U)**H. +* + JJ = 0 + DO 10 J = 1, N + JC = JJ + 1 + JJ = JJ + J + IF( J.GT.1 ) + $ CALL ZHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP ) + AJJ = AP( JJ ) + CALL ZDSCAL( J, AJJ, AP( JC ), 1 ) + 10 CONTINUE +* + ELSE +* +* Compute the product inv(L)**H * inv(L). +* + JJ = 1 + DO 20 J = 1, N + JJN = JJ + N - J + 1 + AP( JJ ) = DBLE( ZDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) ) + IF( J.LT.N ) + $ CALL ZTPMV( 'Lower', 'Conjugate transpose', 'Non-unit', + $ N-J, AP( JJN ), AP( JJ+1 ), 1 ) + JJ = JJN + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPPTRI +* + END diff --git a/dspl/liblapack/SRC/zpptrs.f b/dspl/liblapack/SRC/zpptrs.f new file mode 100644 index 0000000..4856134 --- /dev/null +++ b/dspl/liblapack/SRC/zpptrs.f @@ -0,0 +1,203 @@ +*> \brief \b ZPPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPPTRS solves a system of linear equations A*X = B with a Hermitian +*> positive definite matrix A in packed storage using the Cholesky +*> factorization A = U**H * U or A = L * L**H computed by ZPPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The triangular factor U or L from the Cholesky factorization +*> A = U**H * U or A = L * L**H, packed columnwise in a linear +*> array. The j-th column of U or L is stored in the array AP +*> as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B where A = U**H * U. +* + DO 10 I = 1, NRHS +* +* Solve U**H *X = B, overwriting B with X. +* + CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) +* +* Solve U*X = B, overwriting B with X. +* + CALL ZTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) + 10 CONTINUE + ELSE +* +* Solve A*X = B where A = L * L**H. +* + DO 20 I = 1, NRHS +* +* Solve L*Y = B, overwriting B with X. +* + CALL ZTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP, + $ B( 1, I ), 1 ) +* +* Solve L**H *X = Y, overwriting B with X. +* + CALL ZTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N, + $ AP, B( 1, I ), 1 ) + 20 CONTINUE + END IF +* + RETURN +* +* End of ZPPTRS +* + END diff --git a/dspl/liblapack/SRC/zpstf2.f b/dspl/liblapack/SRC/zpstf2.f new file mode 100644 index 0000000..696d83f --- /dev/null +++ b/dspl/liblapack/SRC/zpstf2.f @@ -0,0 +1,406 @@ +*> \brief \b ZPSTF2 computes the Cholesky factorization with complete pivoting of a complex Hermitian positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPSTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* DOUBLE PRECISION WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPSTF2 computes the Cholesky factorization with complete +*> pivoting of a complex Hermitian positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**H * U , if UPLO = 'U', +*> P**T * A * P = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> User defined tolerance. If TOL < 0, then N*U*MAX( A( K,K ) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPSTF2( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) + DOUBLE PRECISION WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 ZTEMP + DOUBLE PRECISION AJJ, DSTOP, DTEMP + INTEGER I, ITEMP, J, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME, DISNAN + EXTERNAL DLAMCH, LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZGEMV, ZLACGV, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPSTF2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + DO 110 I = 1, N + WORK( I ) = DBLE( A( I, I ) ) + 110 CONTINUE + PVT = MAXLOC( WORK( 1:N ), 1 ) + AJJ = DBLE( A( PVT, PVT ) ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 200 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + DSTOP = N * DLAMCH( 'Epsilon' ) * AJJ + ELSE + DSTOP = TOL + END IF +* +* Set first half of WORK to zero, holds dot products +* + DO 120 I = 1, N + WORK( I ) = 0 + 120 CONTINUE +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**H* U +* + DO 150 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 130 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + + $ DBLE( DCONJG( A( J-1, I ) )* + $ A( J-1, I ) ) + END IF + WORK( N+I ) = DBLE( A( I, I ) ) - WORK( I ) +* + 130 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL ZSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL ZSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + DO 140 I = J + 1, PVT - 1 + ZTEMP = DCONJG( A( J, I ) ) + A( J, I ) = DCONJG( A( I, PVT ) ) + A( I, PVT ) = ZTEMP + 140 CONTINUE + A( J, PVT ) = DCONJG( A( J, PVT ) ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZGEMV( 'Trans', J-1, N-J, -CONE, A( 1, J+1 ), LDA, + $ A( 1, J ), 1, CONE, A( J, J+1 ), LDA ) + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 150 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**H +* + DO 180 J = 1, N +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 160 I = J, N +* + IF( J.GT.1 ) THEN + WORK( I ) = WORK( I ) + + $ DBLE( DCONJG( A( I, J-1 ) )* + $ A( I, J-1 ) ) + END IF + WORK( N+I ) = DBLE( A( I, I ) ) - WORK( I ) +* + 160 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 190 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL ZSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL ZSWAP( N-PVT, A( PVT+1, J ), 1, A( PVT+1, PVT ), + $ 1 ) + DO 170 I = J + 1, PVT - 1 + ZTEMP = DCONJG( A( I, J ) ) + A( I, J ) = DCONJG( A( PVT, I ) ) + A( PVT, I ) = ZTEMP + 170 CONTINUE + A( PVT, J ) = DCONJG( A( PVT, J ) ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZGEMV( 'No Trans', N-J, J-1, -CONE, A( J+1, 1 ), + $ LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 ) + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 180 CONTINUE +* + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 200 + 190 CONTINUE +* +* Rank is number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 200 CONTINUE + RETURN +* +* End of ZPSTF2 +* + END diff --git a/dspl/liblapack/SRC/zpstrf.f b/dspl/liblapack/SRC/zpstrf.f new file mode 100644 index 0000000..2a2937c --- /dev/null +++ b/dspl/liblapack/SRC/zpstrf.f @@ -0,0 +1,464 @@ +*> \brief \b ZPSTRF computes the Cholesky factorization with complete pivoting of a complex Hermitian positive semidefinite matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPSTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION TOL +* INTEGER INFO, LDA, N, RANK +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* DOUBLE PRECISION WORK( 2*N ) +* INTEGER PIV( N ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPSTRF computes the Cholesky factorization with complete +*> pivoting of a complex Hermitian positive semidefinite matrix A. +*> +*> The factorization has the form +*> P**T * A * P = U**H * U , if UPLO = 'U', +*> P**T * A * P = L * L**H, if UPLO = 'L', +*> where U is an upper triangular matrix and L is lower triangular, and +*> P is stored as vector PIV. +*> +*> This algorithm does not attempt to check that A is positive +*> semidefinite. This version of the algorithm calls level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n by n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the factor U or L from the Cholesky +*> factorization as above. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] PIV +*> \verbatim +*> PIV is INTEGER array, dimension (N) +*> PIV is such that the nonzero entries are P( PIV(K), K ) = 1. +*> \endverbatim +*> +*> \param[out] RANK +*> \verbatim +*> RANK is INTEGER +*> The rank of A given by the number of steps the algorithm +*> completed. +*> \endverbatim +*> +*> \param[in] TOL +*> \verbatim +*> TOL is DOUBLE PRECISION +*> User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) +*> will be used. The algorithm terminates at the (K-1)st step +*> if the pivot <= TOL. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (2*N) +*> Work space. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> < 0: If INFO = -K, the K-th argument had an illegal value, +*> = 0: algorithm completed successfully, and +*> > 0: the matrix A is either rank deficient with computed rank +*> as returned in RANK, or is not positive semidefinite. See +*> Section 7 of LAPACK Working Note #161 for further +*> information. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZPSTRF( UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + DOUBLE PRECISION TOL + INTEGER INFO, LDA, N, RANK + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) + DOUBLE PRECISION WORK( 2*N ) + INTEGER PIV( N ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + COMPLEX*16 ZTEMP + DOUBLE PRECISION AJJ, DSTOP, DTEMP + INTEGER I, ITEMP, J, JB, K, NB, PVT + LOGICAL UPPER +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + INTEGER ILAENV + LOGICAL LSAME, DISNAN + EXTERNAL DLAMCH, ILAENV, LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZGEMV, ZHERK, ZLACGV, ZPSTF2, ZSWAP, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCONJG, MAX, MIN, SQRT, MAXLOC +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPSTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get block size +* + NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZPSTF2( UPLO, N, A( 1, 1 ), LDA, PIV, RANK, TOL, WORK, + $ INFO ) + GO TO 230 +* + ELSE +* +* Initialize PIV +* + DO 100 I = 1, N + PIV( I ) = I + 100 CONTINUE +* +* Compute stopping value +* + DO 110 I = 1, N + WORK( I ) = DBLE( A( I, I ) ) + 110 CONTINUE + PVT = MAXLOC( WORK( 1:N ), 1 ) + AJJ = DBLE( A( PVT, PVT ) ) + IF( AJJ.LE.ZERO.OR.DISNAN( AJJ ) ) THEN + RANK = 0 + INFO = 1 + GO TO 230 + END IF +* +* Compute stopping value if not supplied +* + IF( TOL.LT.ZERO ) THEN + DSTOP = N * DLAMCH( 'Epsilon' ) * AJJ + ELSE + DSTOP = TOL + END IF +* +* + IF( UPPER ) THEN +* +* Compute the Cholesky factorization P**T * A * P = U**H * U +* + DO 160 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 120 I = K, N + WORK( I ) = 0 + 120 CONTINUE +* + DO 150 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 130 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + + $ DBLE( DCONJG( A( J-1, I ) )* + $ A( J-1, I ) ) + END IF + WORK( N+I ) = DBLE( A( I, I ) ) - WORK( I ) +* + 130 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 220 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL ZSWAP( J-1, A( 1, J ), 1, A( 1, PVT ), 1 ) + IF( PVT.LT.N ) + $ CALL ZSWAP( N-PVT, A( J, PVT+1 ), LDA, + $ A( PVT, PVT+1 ), LDA ) + DO 140 I = J + 1, PVT - 1 + ZTEMP = DCONJG( A( J, I ) ) + A( J, I ) = DCONJG( A( I, PVT ) ) + A( I, PVT ) = ZTEMP + 140 CONTINUE + A( J, PVT ) = DCONJG( A( J, PVT ) ) +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of row J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZGEMV( 'Trans', J-K, N-J, -CONE, A( K, J+1 ), + $ LDA, A( K, J ), 1, CONE, A( J, J+1 ), + $ LDA ) + CALL ZLACGV( J-1, A( 1, J ), 1 ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA ) + END IF +* + 150 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL ZHERK( 'Upper', 'Conj Trans', N-J+1, JB, -ONE, + $ A( K, J ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 160 CONTINUE +* + ELSE +* +* Compute the Cholesky factorization P**T * A * P = L * L**H +* + DO 210 K = 1, N, NB +* +* Account for last block not being NB wide +* + JB = MIN( NB, N-K+1 ) +* +* Set relevant part of first half of WORK to zero, +* holds dot products +* + DO 170 I = K, N + WORK( I ) = 0 + 170 CONTINUE +* + DO 200 J = K, K + JB - 1 +* +* Find pivot, test for exit, else swap rows and columns +* Update dot products, compute possible pivots which are +* stored in the second half of WORK +* + DO 180 I = J, N +* + IF( J.GT.K ) THEN + WORK( I ) = WORK( I ) + + $ DBLE( DCONJG( A( I, J-1 ) )* + $ A( I, J-1 ) ) + END IF + WORK( N+I ) = DBLE( A( I, I ) ) - WORK( I ) +* + 180 CONTINUE +* + IF( J.GT.1 ) THEN + ITEMP = MAXLOC( WORK( (N+J):(2*N) ), 1 ) + PVT = ITEMP + J - 1 + AJJ = WORK( N+PVT ) + IF( AJJ.LE.DSTOP.OR.DISNAN( AJJ ) ) THEN + A( J, J ) = AJJ + GO TO 220 + END IF + END IF +* + IF( J.NE.PVT ) THEN +* +* Pivot OK, so can now swap pivot rows and columns +* + A( PVT, PVT ) = A( J, J ) + CALL ZSWAP( J-1, A( J, 1 ), LDA, A( PVT, 1 ), LDA ) + IF( PVT.LT.N ) + $ CALL ZSWAP( N-PVT, A( PVT+1, J ), 1, + $ A( PVT+1, PVT ), 1 ) + DO 190 I = J + 1, PVT - 1 + ZTEMP = DCONJG( A( I, J ) ) + A( I, J ) = DCONJG( A( PVT, I ) ) + A( PVT, I ) = ZTEMP + 190 CONTINUE + A( PVT, J ) = DCONJG( A( PVT, J ) ) +* +* +* Swap dot products and PIV +* + DTEMP = WORK( J ) + WORK( J ) = WORK( PVT ) + WORK( PVT ) = DTEMP + ITEMP = PIV( PVT ) + PIV( PVT ) = PIV( J ) + PIV( J ) = ITEMP + END IF +* + AJJ = SQRT( AJJ ) + A( J, J ) = AJJ +* +* Compute elements J+1:N of column J. +* + IF( J.LT.N ) THEN + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZGEMV( 'No Trans', N-J, J-K, -CONE, + $ A( J+1, K ), LDA, A( J, K ), LDA, CONE, + $ A( J+1, J ), 1 ) + CALL ZLACGV( J-1, A( J, 1 ), LDA ) + CALL ZDSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 ) + END IF +* + 200 CONTINUE +* +* Update trailing matrix, J already incremented +* + IF( K+JB.LE.N ) THEN + CALL ZHERK( 'Lower', 'No Trans', N-J+1, JB, -ONE, + $ A( J, K ), LDA, ONE, A( J, J ), LDA ) + END IF +* + 210 CONTINUE +* + END IF + END IF +* +* Ran to completion, A has full rank +* + RANK = N +* + GO TO 230 + 220 CONTINUE +* +* Rank is the number of steps completed. Set INFO = 1 to signal +* that the factorization cannot be used to solve a system. +* + RANK = J - 1 + INFO = 1 +* + 230 CONTINUE + RETURN +* +* End of ZPSTRF +* + END diff --git a/dspl/liblapack/SRC/zptcon.f b/dspl/liblapack/SRC/zptcon.f new file mode 100644 index 0000000..d2b560f --- /dev/null +++ b/dspl/liblapack/SRC/zptcon.f @@ -0,0 +1,223 @@ +*> \brief \b ZPTCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), RWORK( * ) +* COMPLEX*16 E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTCON computes the reciprocal of the condition number (in the +*> 1-norm) of a complex Hermitian positive definite tridiagonal matrix +*> using the factorization A = L*D*L**H or A = U**H*D*U computed by +*> ZPTTRF. +*> +*> Norm(inv(A)) is computed by a direct method, and the reciprocal of +*> the condition number is computed as +*> RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization of A, as computed by ZPTTRF. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal factor +*> U or L from the factorization of A, as computed by ZPTTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the +*> 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The method used is described in Nicholas J. Higham, "Efficient +*> Algorithms for Computing the Condition Number of a Tridiagonal +*> Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZPTCON( N, D, E, ANORM, RCOND, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), RWORK( * ) + COMPLEX*16 E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, IX + DOUBLE PRECISION AINVNM +* .. +* .. External Functions .. + INTEGER IDAMAX + EXTERNAL IDAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.EQ.ZERO ) THEN + RETURN + END IF +* +* Check that D(1:N) is positive. +* + DO 10 I = 1, N + IF( D( I ).LE.ZERO ) + $ RETURN + 10 CONTINUE +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**H. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 20 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * M(L)**H * x = b. +* + RWORK( N ) = RWORK( N ) / D( N ) + DO 30 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) ) + 30 CONTINUE +* +* Compute AINVNM = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, RWORK, 1 ) + AINVNM = ABS( RWORK( IX ) ) +* +* Compute the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZPTCON +* + END diff --git a/dspl/liblapack/SRC/zpteqr.f b/dspl/liblapack/SRC/zpteqr.f new file mode 100644 index 0000000..0b443e1 --- /dev/null +++ b/dspl/liblapack/SRC/zpteqr.f @@ -0,0 +1,263 @@ +*> \brief \b ZPTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric positive definite tridiagonal matrix by first factoring the +*> matrix using DPTTRF and then calling ZBDSQR to compute the singular +*> values of the bidiagonal factor. +*> +*> This routine computes the eigenvalues of the positive definite +*> tridiagonal matrix to high relative accuracy. This means that if the +*> eigenvalues range over many orders of magnitude in size, then the +*> small eigenvalues and corresponding eigenvectors will be computed +*> more accurately than, for example, with the standard QR method. +*> +*> The eigenvectors of a full or band positive definite Hermitian matrix +*> can also be found if ZHETRD, ZHPTRD, or ZHBTRD has been used to +*> reduce this matrix to tridiagonal form. (The reduction to +*> tridiagonal form, however, may preclude the possibility of obtaining +*> high relative accuracy in the small eigenvalues of the original +*> matrix, if these eigenvalues range over many orders of magnitude.) +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvectors of original Hermitian +*> matrix also. Array Z contains the unitary matrix +*> used to reduce the original matrix to tridiagonal +*> form. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix. +*> On normal exit, D contains the eigenvalues, in descending +*> order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', the unitary matrix used in the +*> reduction to tridiagonal form. +*> On exit, if COMPZ = 'V', the orthonormal eigenvectors of the +*> original Hermitian matrix; +*> if COMPZ = 'I', the orthonormal eigenvectors of the +*> tridiagonal matrix. +*> If INFO > 0 on exit, Z contains the eigenvectors associated +*> with only the stored eigenvalues. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> COMPZ = 'V' or 'I', LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (4*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, and i is: +*> <= N the Cholesky factorization of the matrix could +*> not be performed because the i-th principal minor +*> was not positive definite. +*> > N the SVD algorithm failed to converge; +*> if INFO = N+i, i off-diagonal elements of the +*> bidiagonal factor did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTcomputational +* +* ===================================================================== + SUBROUTINE ZPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DPTTRF, XERBLA, ZBDSQR, ZLASET +* .. +* .. Local Arrays .. + COMPLEX*16 C( 1, 1 ), VT( 1, 1 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, NRU +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.GT.0 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF + IF( ICOMPZ.EQ.2 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* +* Call DPTTRF to factor the matrix. +* + CALL DPTTRF( N, D, E, INFO ) + IF( INFO.NE.0 ) + $ RETURN + DO 10 I = 1, N + D( I ) = SQRT( D( I ) ) + 10 CONTINUE + DO 20 I = 1, N - 1 + E( I ) = E( I )*D( I ) + 20 CONTINUE +* +* Call ZBDSQR to compute the singular values/vectors of the +* bidiagonal factor. +* + IF( ICOMPZ.GT.0 ) THEN + NRU = N + ELSE + NRU = 0 + END IF + CALL ZBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1, + $ WORK, INFO ) +* +* Square the singular values. +* + IF( INFO.EQ.0 ) THEN + DO 30 I = 1, N + D( I ) = D( I )*D( I ) + 30 CONTINUE + ELSE + INFO = N + INFO + END IF +* + RETURN +* +* End of ZPTEQR +* + END diff --git a/dspl/liblapack/SRC/zptrfs.f b/dspl/liblapack/SRC/zptrfs.f new file mode 100644 index 0000000..f12ac31 --- /dev/null +++ b/dspl/liblapack/SRC/zptrfs.f @@ -0,0 +1,468 @@ +*> \brief \b ZPTRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), +* $ RWORK( * ) +* COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is Hermitian positive definite +*> and tridiagonal, and provides error bounds and backward error +*> estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the superdiagonal or the subdiagonal of the +*> tridiagonal matrix A is stored and the form of the +*> factorization: +*> = 'U': E is the superdiagonal of A, and A = U**H*D*U; +*> = 'L': E is the subdiagonal of A, and A = L*D*L**H. +*> (The two forms are equivalent if A is real.) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n real diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> The (n-1) off-diagonal elements of the tridiagonal matrix A +*> (see UPLO). +*> \endverbatim +*> +*> \param[in] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from +*> the factorization computed by ZPTTRF. +*> \endverbatim +*> +*> \param[in] EF +*> \verbatim +*> EF is COMPLEX*16 array, dimension (N-1) +*> The (n-1) off-diagonal elements of the unit bidiagonal +*> factor U or L from the factorization computed by ZPTTRF +*> (see UPLO). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZPTTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTcomputational +* +* ===================================================================== + SUBROUTINE ZPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IX, J, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN + COMPLEX*16 BI, CX, DX, EX, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IDAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = 4 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 100 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X. Also compute +* abs(A)*abs(x) + abs(b) for use in the backward error bound. +* + IF( UPPER ) THEN + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = E( 1 )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 30 I = 2, N - 1 + BI = B( I, J ) + CX = DCONJG( E( I-1 ) )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = E( I )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 30 CONTINUE + BI = B( N, J ) + CX = DCONJG( E( N-1 ) )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + ELSE + IF( N.EQ.1 ) THEN + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + WORK( 1 ) = BI - DX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + ELSE + BI = B( 1, J ) + DX = D( 1 )*X( 1, J ) + EX = DCONJG( E( 1 ) )*X( 2, J ) + WORK( 1 ) = BI - DX - EX + RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) + + $ CABS1( E( 1 ) )*CABS1( X( 2, J ) ) + DO 40 I = 2, N - 1 + BI = B( I, J ) + CX = E( I-1 )*X( I-1, J ) + DX = D( I )*X( I, J ) + EX = DCONJG( E( I ) )*X( I+1, J ) + WORK( I ) = BI - CX - DX - EX + RWORK( I ) = CABS1( BI ) + + $ CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) + + $ CABS1( DX ) + CABS1( E( I ) )* + $ CABS1( X( I+1, J ) ) + 40 CONTINUE + BI = B( N, J ) + CX = E( N-1 )*X( N-1, J ) + DX = D( N )*X( N, J ) + WORK( N ) = BI - CX - DX + RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )* + $ CABS1( X( N-1, J ) ) + CABS1( DX ) + END IF + END IF +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + S = ZERO + DO 50 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 50 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO ) + CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* + DO 60 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 60 CONTINUE + IX = IDAMAX( N, RWORK, 1 ) + FERR( J ) = RWORK( IX ) +* +* Estimate the norm of inv(A). +* +* Solve M(A) * x = e, where M(A) = (m(i,j)) is given by +* +* m(i,j) = abs(A(i,j)), i = j, +* m(i,j) = -abs(A(i,j)), i .ne. j, +* +* and e = [ 1, 1, ..., 1 ]**T. Note M(A) = M(L)*D*M(L)**H. +* +* Solve M(L) * x = e. +* + RWORK( 1 ) = ONE + DO 70 I = 2, N + RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) ) + 70 CONTINUE +* +* Solve D * M(L)**H * x = b. +* + RWORK( N ) = RWORK( N ) / DF( N ) + DO 80 I = N - 1, 1, -1 + RWORK( I ) = RWORK( I ) / DF( I ) + + $ RWORK( I+1 )*ABS( EF( I ) ) + 80 CONTINUE +* +* Compute norm(inv(A)) = max(x(i)), 1<=i<=n. +* + IX = IDAMAX( N, RWORK, 1 ) + FERR( J ) = FERR( J )*ABS( RWORK( IX ) ) +* +* Normalize error. +* + LSTRES = ZERO + DO 90 I = 1, N + LSTRES = MAX( LSTRES, ABS( X( I, J ) ) ) + 90 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 100 CONTINUE +* + RETURN +* +* End of ZPTRFS +* + END diff --git a/dspl/liblapack/SRC/zptsv.f b/dspl/liblapack/SRC/zptsv.f new file mode 100644 index 0000000..4ae2fcf --- /dev/null +++ b/dspl/liblapack/SRC/zptsv.f @@ -0,0 +1,169 @@ +*> \brief ZPTSV computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTSV computes the solution to a complex system of linear equations +*> A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal +*> matrix, and X and B are N-by-NRHS matrices. +*> +*> A is factored as A = L*D*L**H, and the factored form of A is then +*> used to solve the system of equations. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**H factorization of +*> A. E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**H*D*U factorization of A. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the leading minor of order i is not +*> positive definite, and the solution has not been +*> computed. The factorization has not been completed +*> unless i = N. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTsolve +* +* ===================================================================== + SUBROUTINE ZPTSV( N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL XERBLA, ZPTTRF, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTSV ', -INFO ) + RETURN + END IF +* +* Compute the L*D*L**H (or U**H*D*U) factorization of A. +* + CALL ZPTTRF( N, D, E, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO ) + END IF + RETURN +* +* End of ZPTSV +* + END diff --git a/dspl/liblapack/SRC/zptsvx.f b/dspl/liblapack/SRC/zptsvx.f new file mode 100644 index 0000000..085fb2e --- /dev/null +++ b/dspl/liblapack/SRC/zptsvx.f @@ -0,0 +1,343 @@ +*> \brief ZPTSVX computes the solution to system of linear equations A * X = B for PT matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, +* RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), +* $ RWORK( * ) +* COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTSVX uses the factorization A = L*D*L**H to compute the solution +*> to a complex system of linear equations A*X = B, where A is an +*> N-by-N Hermitian positive definite tridiagonal matrix and X and B +*> are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L +*> is a unit lower bidiagonal matrix and D is diagonal. The +*> factorization can also be regarded as having the form +*> A = U**H*D*U. +*> +*> 2. If the leading i-by-i principal minor is not positive definite, +*> then the routine returns with INFO = i. Otherwise, the factored +*> form of A is used to estimate the condition number of the matrix +*> A. If the reciprocal of the condition number is less than machine +*> precision, INFO = N+1 is returned as a warning, but the routine +*> still goes on to solve for X and compute error bounds as +*> described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix +*> A is supplied on entry. +*> = 'F': On entry, DF and EF contain the factored form of A. +*> D, E, DF, and EF will not be modified. +*> = 'N': The matrix A will be copied to DF and EF and +*> factored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix A. +*> \endverbatim +*> +*> \param[in,out] DF +*> \verbatim +*> DF is DOUBLE PRECISION array, dimension (N) +*> If FACT = 'F', then DF is an input argument and on entry +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**H factorization of A. +*> If FACT = 'N', then DF is an output argument and on exit +*> contains the n diagonal elements of the diagonal matrix D +*> from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in,out] EF +*> \verbatim +*> EF is COMPLEX*16 array, dimension (N-1) +*> If FACT = 'F', then EF is an input argument and on entry +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**H factorization of A. +*> If FACT = 'N', then EF is an output argument and on exit +*> contains the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal condition number of the matrix A. If RCOND +*> is less than the machine precision (in particular, if +*> RCOND = 0), the matrix is singular to working precision. +*> This condition is indicated by a return code of INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in any +*> element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: the leading minor of order i of A is +*> not positive definite, so the factorization +*> could not be completed, and the solution has not +*> been computed. RCOND = 0 is returned. +*> = N+1: U is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTsolve +* +* ===================================================================== + SUBROUTINE ZPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX, + $ RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER FACT + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), D( * ), DF( * ), FERR( * ), + $ RWORK( * ) + COMPLEX*16 B( LDB, * ), E( * ), EF( * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANHT + EXTERNAL LSAME, DLAMCH, ZLANHT +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, XERBLA, ZCOPY, ZLACPY, ZPTCON, ZPTRFS, + $ ZPTTRF, ZPTTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the L*D*L**H (or U**H*D*U) factorization of A. +* + CALL DCOPY( N, D, 1, DF, 1 ) + IF( N.GT.1 ) + $ CALL ZCOPY( N-1, E, 1, EF, 1 ) + CALL ZPTTRF( N, DF, EF, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANHT( '1', N, D, E ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZPTSVX +* + END diff --git a/dspl/liblapack/SRC/zpttrf.f b/dspl/liblapack/SRC/zpttrf.f new file mode 100644 index 0000000..d46a1c3 --- /dev/null +++ b/dspl/liblapack/SRC/zpttrf.f @@ -0,0 +1,228 @@ +*> \brief \b ZPTTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTTRF( N, D, E, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTTRF computes the L*D*L**H factorization of a complex Hermitian +*> positive definite tridiagonal matrix A. The factorization may also +*> be regarded as having the form A = U**H *D*U. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the n diagonal elements of the tridiagonal matrix +*> A. On exit, the n diagonal elements of the diagonal matrix +*> D from the L*D*L**H factorization of A. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix A. On exit, the (n-1) subdiagonal elements of the +*> unit bidiagonal factor L from the L*D*L**H factorization of A. +*> E can also be regarded as the superdiagonal of the unit +*> bidiagonal factor U from the U**H *D*U factorization of A. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, the leading minor of order k is not +*> positive definite; if k < N, the factorization could not +*> be completed, while if k = N, the factorization was +*> completed, but D(N) <= 0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16PTcomputational +* +* ===================================================================== + SUBROUTINE ZPTTRF( N, D, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, I4 + DOUBLE PRECISION EII, EIR, F, G +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + CALL XERBLA( 'ZPTTRF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Compute the L*D*L**H (or U**H *D*U) factorization of A. +* + I4 = MOD( N-1, 4 ) + DO 10 I = 1, I4 + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF + EIR = DBLE( E( I ) ) + EII = DIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = DCMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII + 10 CONTINUE +* + DO 20 I = I4 + 1, N - 4, 4 +* +* Drop out of the loop if d(i) <= 0: the matrix is not positive +* definite. +* + IF( D( I ).LE.ZERO ) THEN + INFO = I + GO TO 30 + END IF +* +* Solve for e(i) and d(i+1). +* + EIR = DBLE( E( I ) ) + EII = DIMAG( E( I ) ) + F = EIR / D( I ) + G = EII / D( I ) + E( I ) = DCMPLX( F, G ) + D( I+1 ) = D( I+1 ) - F*EIR - G*EII +* + IF( D( I+1 ).LE.ZERO ) THEN + INFO = I + 1 + GO TO 30 + END IF +* +* Solve for e(i+1) and d(i+2). +* + EIR = DBLE( E( I+1 ) ) + EII = DIMAG( E( I+1 ) ) + F = EIR / D( I+1 ) + G = EII / D( I+1 ) + E( I+1 ) = DCMPLX( F, G ) + D( I+2 ) = D( I+2 ) - F*EIR - G*EII +* + IF( D( I+2 ).LE.ZERO ) THEN + INFO = I + 2 + GO TO 30 + END IF +* +* Solve for e(i+2) and d(i+3). +* + EIR = DBLE( E( I+2 ) ) + EII = DIMAG( E( I+2 ) ) + F = EIR / D( I+2 ) + G = EII / D( I+2 ) + E( I+2 ) = DCMPLX( F, G ) + D( I+3 ) = D( I+3 ) - F*EIR - G*EII +* + IF( D( I+3 ).LE.ZERO ) THEN + INFO = I + 3 + GO TO 30 + END IF +* +* Solve for e(i+3) and d(i+4). +* + EIR = DBLE( E( I+3 ) ) + EII = DIMAG( E( I+3 ) ) + F = EIR / D( I+3 ) + G = EII / D( I+3 ) + E( I+3 ) = DCMPLX( F, G ) + D( I+4 ) = D( I+4 ) - F*EIR - G*EII + 20 CONTINUE +* +* Check d(n) for positive definiteness. +* + IF( D( N ).LE.ZERO ) + $ INFO = N +* + 30 CONTINUE + RETURN +* +* End of ZPTTRF +* + END diff --git a/dspl/liblapack/SRC/zpttrs.f b/dspl/liblapack/SRC/zpttrs.f new file mode 100644 index 0000000..146a8e2 --- /dev/null +++ b/dspl/liblapack/SRC/zpttrs.f @@ -0,0 +1,208 @@ +*> \brief \b ZPTTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTTRS solves a tridiagonal system of the form +*> A * X = B +*> using the factorization A = U**H *D* U or A = L*D*L**H computed by ZPTTRF. +*> D is a diagonal matrix specified in the vector D, U (or L) is a unit +*> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +*> the vector E, and X and B are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the form of the factorization and whether the +*> vector E is the superdiagonal of the upper bidiagonal factor +*> U or the subdiagonal of the lower bidiagonal factor L. +*> = 'U': A = U**H *D*U, E is the superdiagonal of U +*> = 'L': A = L*D*L**H, E is the subdiagonal of L +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization A = U**H *D*U or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> If UPLO = 'U', the (n-1) superdiagonal elements of the unit +*> bidiagonal factor U from the factorization A = U**H*D*U. +*> If UPLO = 'L', the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16PTcomputational +* +* ===================================================================== + SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER + INTEGER IUPLO, J, JB, NB +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZPTTS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments. +* + INFO = 0 + UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' ) + IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZPTTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Determine the number of right-hand sides to solve at a time. +* + IF( NRHS.EQ.1 ) THEN + NB = 1 + ELSE + NB = MAX( 1, ILAENV( 1, 'ZPTTRS', UPLO, N, NRHS, -1, -1 ) ) + END IF +* +* Decode UPLO +* + IF( UPPER ) THEN + IUPLO = 1 + ELSE + IUPLO = 0 + END IF +* + IF( NB.GE.NRHS ) THEN + CALL ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) + ELSE + DO 10 J = 1, NRHS, NB + JB = MIN( NRHS-J+1, NB ) + CALL ZPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB ) + 10 CONTINUE + END IF +* + RETURN +* +* End of ZPTTRS +* + END diff --git a/dspl/liblapack/SRC/zptts2.f b/dspl/liblapack/SRC/zptts2.f new file mode 100644 index 0000000..0d40d03 --- /dev/null +++ b/dspl/liblapack/SRC/zptts2.f @@ -0,0 +1,245 @@ +*> \brief \b ZPTTS2 solves a tridiagonal system of the form AX=B using the L D LH factorization computed by spttrf. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZPTTS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* .. Scalar Arguments .. +* INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ) +* COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZPTTS2 solves a tridiagonal system of the form +*> A * X = B +*> using the factorization A = U**H *D*U or A = L*D*L**H computed by ZPTTRF. +*> D is a diagonal matrix specified in the vector D, U (or L) is a unit +*> bidiagonal matrix whose superdiagonal (subdiagonal) is specified in +*> the vector E, and X and B are N by NRHS matrices. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IUPLO +*> \verbatim +*> IUPLO is INTEGER +*> Specifies the form of the factorization and whether the +*> vector E is the superdiagonal of the upper bidiagonal factor +*> U or the subdiagonal of the lower bidiagonal factor L. +*> = 1: A = U**H *D*U, E is the superdiagonal of U +*> = 0: A = L*D*L**H, E is the subdiagonal of L +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the tridiagonal matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the diagonal matrix D from the +*> factorization A = U**H *D*U or A = L*D*L**H. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N-1) +*> If IUPLO = 1, the (n-1) superdiagonal elements of the unit +*> bidiagonal factor U from the factorization A = U**H*D*U. +*> If IUPLO = 0, the (n-1) subdiagonal elements of the unit +*> bidiagonal factor L from the factorization A = L*D*L**H. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side vectors B for the system of +*> linear equations. +*> On exit, the solution vectors, X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16PTcomputational +* +* ===================================================================== + SUBROUTINE ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + INTEGER IUPLO, LDB, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ) + COMPLEX*16 B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) + $ CALL ZDSCAL( NRHS, 1.D0 / D( 1 ), B, LDB ) + RETURN + END IF +* + IF( IUPLO.EQ.1 ) THEN +* +* Solve A * X = B using the factorization A = U**H *D*U, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 10 CONTINUE +* +* Solve U**H * x = b. +* + DO 20 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) ) + 20 CONTINUE +* +* Solve D * U * x = b. +* + DO 30 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 30 CONTINUE + DO 40 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*E( I ) + 40 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 10 + END IF + ELSE + DO 70 J = 1, NRHS +* +* Solve U**H * x = b. +* + DO 50 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) ) + 50 CONTINUE +* +* Solve D * U * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 60 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I ) + 60 CONTINUE + 70 CONTINUE + END IF + ELSE +* +* Solve A * X = B using the factorization A = L*D*L**H, +* overwriting each right hand side vector with its solution. +* + IF( NRHS.LE.2 ) THEN + J = 1 + 80 CONTINUE +* +* Solve L * x = b. +* + DO 90 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 90 CONTINUE +* +* Solve D * L**H * x = b. +* + DO 100 I = 1, N + B( I, J ) = B( I, J ) / D( I ) + 100 CONTINUE + DO 110 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) - B( I+1, J )*DCONJG( E( I ) ) + 110 CONTINUE + IF( J.LT.NRHS ) THEN + J = J + 1 + GO TO 80 + END IF + ELSE + DO 140 J = 1, NRHS +* +* Solve L * x = b. +* + DO 120 I = 2, N + B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 ) + 120 CONTINUE +* +* Solve D * L**H * x = b. +* + B( N, J ) = B( N, J ) / D( N ) + DO 130 I = N - 1, 1, -1 + B( I, J ) = B( I, J ) / D( I ) - + $ B( I+1, J )*DCONJG( E( I ) ) + 130 CONTINUE + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZPTTS2 +* + END diff --git a/dspl/liblapack/SRC/zrot.f b/dspl/liblapack/SRC/zrot.f new file mode 100644 index 0000000..f372df0 --- /dev/null +++ b/dspl/liblapack/SRC/zrot.f @@ -0,0 +1,162 @@ +*> \brief \b ZROT applies a plane rotation with real cosine and complex sine to a pair of complex vectors. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZROT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZROT( N, CX, INCX, CY, INCY, C, S ) +* +* .. Scalar Arguments .. +* INTEGER INCX, INCY, N +* DOUBLE PRECISION C +* COMPLEX*16 S +* .. +* .. Array Arguments .. +* COMPLEX*16 CX( * ), CY( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZROT applies a plane rotation, where the cos (C) is real and the +*> sin (S) is complex, and the vectors CX and CY are complex. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements in the vectors CX and CY. +*> \endverbatim +*> +*> \param[in,out] CX +*> \verbatim +*> CX is COMPLEX*16 array, dimension (N) +*> On input, the vector X. +*> On output, CX is overwritten with C*X + S*Y. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of CY. INCX <> 0. +*> \endverbatim +*> +*> \param[in,out] CY +*> \verbatim +*> CY is COMPLEX*16 array, dimension (N) +*> On input, the vector Y. +*> On output, CY is overwritten with -CONJG(S)*X + C*Y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> The increment between successive values of CY. INCX <> 0. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX*16 +*> C and S define a rotation +*> [ C S ] +*> [ -conjg(S) C ] +*> where C*C + S*CONJG(S) = 1.0. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZROT( N, CX, INCX, CY, INCY, C, S ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INCX, INCY, N + DOUBLE PRECISION C + COMPLEX*16 S +* .. +* .. Array Arguments .. + COMPLEX*16 CX( * ), CY( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IX, IY + COMPLEX*16 STEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) + $ RETURN + IF( INCX.EQ.1 .AND. INCY.EQ.1 ) + $ GO TO 20 +* +* Code for unequal increments or equal increments not equal to 1 +* + IX = 1 + IY = 1 + IF( INCX.LT.0 ) + $ IX = ( -N+1 )*INCX + 1 + IF( INCY.LT.0 ) + $ IY = ( -N+1 )*INCY + 1 + DO 10 I = 1, N + STEMP = C*CX( IX ) + S*CY( IY ) + CY( IY ) = C*CY( IY ) - DCONJG( S )*CX( IX ) + CX( IX ) = STEMP + IX = IX + INCX + IY = IY + INCY + 10 CONTINUE + RETURN +* +* Code for both increments equal to 1 +* + 20 CONTINUE + DO 30 I = 1, N + STEMP = C*CX( I ) + S*CY( I ) + CY( I ) = C*CY( I ) - DCONJG( S )*CX( I ) + CX( I ) = STEMP + 30 CONTINUE + RETURN + END diff --git a/dspl/liblapack/SRC/zspcon.f b/dspl/liblapack/SRC/zspcon.f new file mode 100644 index 0000000..eae8f9e --- /dev/null +++ b/dspl/liblapack/SRC/zspcon.f @@ -0,0 +1,231 @@ +*> \brief \b ZSPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric packed matrix A using the +*> factorization A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSPTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IP, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACN2, ZSPTRS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + IP = N*( N+1 ) / 2 + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP - I + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + IP = 1 + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO ) + $ RETURN + IP = IP + N - I + 1 + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL ZSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSPCON +* + END diff --git a/dspl/liblapack/SRC/zspmv.f b/dspl/liblapack/SRC/zspmv.f new file mode 100644 index 0000000..9442801 --- /dev/null +++ b/dspl/liblapack/SRC/zspmv.f @@ -0,0 +1,340 @@ +*> \brief \b ZSPMV computes a matrix-vector product for complex vectors using a complex symmetric packed matrix +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, INCY, N +* COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n symmetric matrix, supplied in packed form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the matrix A is supplied in the packed +*> array AP as follows: +*> +*> UPLO = 'U' or 'u' The upper triangular part of A is +*> supplied in AP. +*> +*> UPLO = 'L' or 'l' The lower triangular part of A is +*> supplied in AP. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension at least +*> ( ( N*( N + 1 ) )/2 ). +*> Before entry, with UPLO = 'U' or 'u', the array AP must +*> contain the upper triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +*> and a( 2, 2 ) respectively, and so on. +*> Before entry, with UPLO = 'L' or 'l', the array AP must +*> contain the lower triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +*> and a( 3, 1 ) respectively, and so on. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 6 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when AP contains the upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + K = KK + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + KK = KK + J + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 K = KK, KK + J - 2 + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + J + 80 CONTINUE + END IF + ELSE +* +* Form y when AP contains the lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*AP( KK ) + K = KK + 1 + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( I ) + K = K + 1 + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + KK = KK + ( N-J+1 ) + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*AP( KK ) + IX = JX + IY = JY + DO 110 K = KK + 1, KK + N - J + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*AP( K ) + TEMP2 = TEMP2 + AP( K )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + KK = KK + ( N-J+1 ) + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSPMV +* + END diff --git a/dspl/liblapack/SRC/zspr.f b/dspl/liblapack/SRC/zspr.f new file mode 100644 index 0000000..f91a99a --- /dev/null +++ b/dspl/liblapack/SRC/zspr.f @@ -0,0 +1,280 @@ +*> \brief \b ZSPR performs the symmetrical rank-1 update of a complex symmetric packed matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, N +* COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPR performs the symmetric rank 1 operation +*> +*> A := alpha*x*x**H + A, +*> +*> where alpha is a complex scalar, x is an n element vector and A is an +*> n by n symmetric matrix, supplied in packed form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the matrix A is supplied in the packed +*> array AP as follows: +*> +*> UPLO = 'U' or 'u' The upper triangular part of A is +*> supplied in AP. +*> +*> UPLO = 'L' or 'l' The lower triangular part of A is +*> supplied in AP. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension at least +*> ( ( N*( N + 1 ) )/2 ). +*> Before entry, with UPLO = 'U' or 'u', the array AP must +*> contain the upper triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) +*> and a( 2, 2 ) respectively, and so on. On exit, the array +*> AP is overwritten by the upper triangular part of the +*> updated matrix. +*> Before entry, with UPLO = 'L' or 'l', the array AP must +*> contain the lower triangular part of the symmetric matrix +*> packed sequentially, column by column, so that AP( 1 ) +*> contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) +*> and a( 3, 1 ) respectively, and so on. On exit, the array +*> AP is overwritten by the lower triangular part of the +*> updated matrix. +*> Note that the imaginary parts of the diagonal elements need +*> not be set, they are assumed to be zero, and on exit they +*> are set to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZSPR( UPLO, N, ALPHA, X, INCX, AP ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, N + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, K, KK, KX + COMPLEX*16 TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of the array AP +* are accessed sequentially with one pass through AP. +* + KK = 1 + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when upper triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + K = KK + DO 10 I = 1, J - 1 + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 10 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + KK = KK + J + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 K = KK, KK + J - 2 + AP( K ) = AP( K ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP + ELSE + AP( KK+J-1 ) = AP( KK+J-1 ) + END IF + JX = JX + INCX + KK = KK + J + 40 CONTINUE + END IF + ELSE +* +* Form A when lower triangle is stored in AP. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + AP( KK ) = AP( KK ) + TEMP*X( J ) + K = KK + 1 + DO 50 I = J + 1, N + AP( K ) = AP( K ) + X( I )*TEMP + K = K + 1 + 50 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + KK = KK + N - J + 1 + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + AP( KK ) = AP( KK ) + TEMP*X( JX ) + IX = JX + DO 70 K = KK + 1, KK + N - J + IX = IX + INCX + AP( K ) = AP( K ) + X( IX )*TEMP + 70 CONTINUE + ELSE + AP( KK ) = AP( KK ) + END IF + JX = JX + INCX + KK = KK + N - J + 1 + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSPR +* + END diff --git a/dspl/liblapack/SRC/zsprfs.f b/dspl/liblapack/SRC/zsprfs.f new file mode 100644 index 0000000..3474d75 --- /dev/null +++ b/dspl/liblapack/SRC/zsprfs.f @@ -0,0 +1,437 @@ +*> \brief \b ZSPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite +*> and packed, and provides error bounds and backward error estimates +*> for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The factored form of the matrix A. AFP contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by ZSPTRF, stored as a packed +*> triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZSPTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, IK, J, K, KASE, KK, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACN2, ZSPMV, ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + KK = 1 + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + IK = KK + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S + KK = KK + K + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK + IK = KK + 1 + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK + S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) ) + IK = IK + 1 + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + KK = KK + ( N-K+1 ) + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZSPRFS +* + END diff --git a/dspl/liblapack/SRC/zspsv.f b/dspl/liblapack/SRC/zspsv.f new file mode 100644 index 0000000..f4c79db --- /dev/null +++ b/dspl/liblapack/SRC/zspsv.f @@ -0,0 +1,224 @@ +*> \brief ZSPSV computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix stored in packed format and X +*> and B are N-by-NRHS matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, D is symmetric and block diagonal with 1-by-1 +*> and 2-by-2 diagonal blocks. The factored form of A is then used to +*> solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by ZSPTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be +*> computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSPTRF, ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPSV ', -INFO ) + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL ZSPTRF( UPLO, N, AP, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* + END IF + RETURN +* +* End of ZSPSV +* + END diff --git a/dspl/liblapack/SRC/zspsvx.f b/dspl/liblapack/SRC/zspsvx.f new file mode 100644 index 0000000..8c4dc10 --- /dev/null +++ b/dspl/liblapack/SRC/zspsvx.f @@ -0,0 +1,387 @@ +*> \brief ZSPSVX computes the solution to system of linear equations A * X = B for OTHER matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, +* LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPSVX uses the diagonal pivoting factorization A = U*D*U**T or +*> A = L*D*L**T to compute the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix stored +*> in packed format and X and B are N-by-NRHS matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AFP and IPIV contain the factored form +*> of A. AP, AFP and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AFP and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangle of the symmetric matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> \endverbatim +*> +*> \param[in,out] AFP +*> \verbatim +*> AFP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> If FACT = 'F', then AFP is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> +*> If FACT = 'N', then AFP is an output argument and on exit +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as +*> a packed triangular matrix in the same storage format as A. +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by ZSPTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by ZSPTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERsolve +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The packed storage scheme is illustrated by the following example +*> when N = 4, UPLO = 'U': +*> +*> Two-dimensional storage of the symmetric matrix A: +*> +*> a11 a12 a13 a14 +*> a22 a23 a24 +*> a33 a34 (aij = aji) +*> a44 +*> +*> Packed storage of the upper triangle of A: +*> +*> AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ] +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, + $ LDX, RCOND, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDB, LDX, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AFP( * ), AP( * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOFACT + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANSP + EXTERNAL LSAME, DLAMCH, ZLANSP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZLACPY, ZSPCON, ZSPRFS, ZSPTRF, + $ ZSPTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPSVX', -INFO ) + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 ) + CALL ZSPTRF( UPLO, N, AFP, IPIV, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANSP( 'I', UPLO, N, AP, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR, + $ BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + RETURN +* +* End of ZSPSVX +* + END diff --git a/dspl/liblapack/SRC/zsptrf.f b/dspl/liblapack/SRC/zsptrf.f new file mode 100644 index 0000000..07c6eec --- /dev/null +++ b/dspl/liblapack/SRC/zsptrf.f @@ -0,0 +1,619 @@ +*> \brief \b ZSPTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPTRF computes the factorization of a complex symmetric matrix A +*> stored in packed format using the Bunch-Kaufman diagonal pivoting +*> method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangle of the symmetric matrix +*> A, packed columnwise in a linear array. The j-th column of A +*> is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L, stored as a packed triangular +*> matrix overwriting A (see below for further details). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> 5-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZSPTRF( UPLO, N, AP, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC, + $ KSTEP, KX, NPP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + EXTERNAL LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZSPR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRF', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + KC = ( N-1 )*N / 2 + 1 + 10 CONTINUE + KNC = KC +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC+K-1 ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, AP( KC ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-1 ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + ROWMAX = ZERO + JMAX = IMAX + KX = IMAX*( IMAX+1 ) / 2 + IMAX + DO 20 J = IMAX + 1, K + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + J + 20 CONTINUE + KPC = ( IMAX-1 )*IMAX / 2 + 1 + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 30 J = KP + 1, KK - 1 + KX = KX + J - 1 + T = AP( KNC+J-1 ) + AP( KNC+J-1 ) = AP( KX ) + AP( KX ) = T + 30 CONTINUE + T = AP( KNC+KK-1 ) + AP( KNC+KK-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+K-2 ) + AP( KC+K-2 ) = AP( KC+KP-1 ) + AP( KC+KP-1 ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = CONE / AP( KC+K-1 ) + CALL ZSPR( UPLO, K-1, -R1, AP( KC ), 1, AP ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, R1, AP( KC ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = AP( K-1+( K-1 )*K / 2 ) + D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12 + D11 = AP( K+( K-1 )*K / 2 ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 50 J = K - 2, 1, -1 + WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )- + $ AP( J+( K-1 )*K / 2 ) ) + WK = D12*( D22*AP( J+( K-1 )*K / 2 )- + $ AP( J+( K-2 )*( K-1 ) / 2 ) ) + DO 40 I = J, 1, -1 + AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) - + $ AP( I+( K-1 )*K / 2 )*WK - + $ AP( I+( K-2 )*( K-1 ) / 2 )*WKM1 + 40 CONTINUE + AP( J+( K-1 )*K / 2 ) = WK + AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1 + 50 CONTINUE +* + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + KC = KNC - K + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + KC = 1 + NPP = N*( N+1 ) / 2 + 60 CONTINUE + KNC = KC +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 110 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( AP( KC ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 ) + COLMAX = CABS1( AP( KC+IMAX-K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +* +* Column K is zero: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + ROWMAX = ZERO + KX = KC + IMAX - K + DO 70 J = K, IMAX - 1 + IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN + ROWMAX = CABS1( AP( KX ) ) + JMAX = J + END IF + KX = KX + N - J + 70 CONTINUE + KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1 + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KSTEP.EQ.2 ) + $ KNC = KNC + N - K + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ), + $ 1 ) + KX = KNC + KP - KK + DO 80 J = KK + 1, KP - 1 + KX = KX + N - J + 1 + T = AP( KNC+J-KK ) + AP( KNC+J-KK ) = AP( KX ) + AP( KX ) = T + 80 CONTINUE + T = AP( KNC ) + AP( KNC ) = AP( KPC ) + AP( KPC ) = T + IF( KSTEP.EQ.2 ) THEN + T = AP( KC+1 ) + AP( KC+1 ) = AP( KC+KP-K ) + AP( KC+KP-K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = CONE / AP( KC ) + CALL ZSPR( UPLO, N-K, -R1, AP( KC+1 ), 1, + $ AP( KC+N-K+1 ) ) +* +* Store L(k) in column K +* + CALL ZSCAL( N-K, R1, AP( KC+1 ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k): columns K and K+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) + D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21 + D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 100 J = K + 2, N + WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )- + $ AP( J+K*( 2*N-K-1 ) / 2 ) ) + WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )- + $ AP( J+( K-1 )*( 2*N-K ) / 2 ) ) + DO 90 I = J, N + AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )* + $ ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) / + $ 2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1 + 90 CONTINUE + AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK + AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1 + 100 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + KC = KNC + N - K + 2 + GO TO 60 +* + END IF +* + 110 CONTINUE + RETURN +* +* End of ZSPTRF +* + END diff --git a/dspl/liblapack/SRC/zsptri.f b/dspl/liblapack/SRC/zsptri.f new file mode 100644 index 0000000..cb9efc8 --- /dev/null +++ b/dspl/liblapack/SRC/zsptri.f @@ -0,0 +1,404 @@ +*> \brief \b ZSPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPTRI computes the inverse of a complex symmetric indefinite matrix +*> A in packed storage using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by ZSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSPTRF, +*> stored as a packed triangular matrix. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix, stored as a packed triangular matrix. The j-th column +*> of inv(A) is stored in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j; +*> if UPLO = 'L', +*> AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSPTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSPTRI( UPLO, N, AP, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP + COMPLEX*16 AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTU + EXTERNAL LSAME, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZSPMV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + KP = N*( N+1 ) / 2 + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP - INFO + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + KP = 1 + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO ) + $ RETURN + KP = KP + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + KCNEXT = KC + K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC+K-1 ) = ONE / AP( KC+K-1 ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ ZDOTU( K-1, WORK, 1, AP( KC ), 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+K-1 ) + AK = AP( KC+K-1 ) / T + AKP1 = AP( KCNEXT+K ) / T + AKKP1 = AP( KCNEXT+K-1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KC+K-1 ) = AKP1 / D + AP( KCNEXT+K ) = AK / D + AP( KCNEXT+K-1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ), + $ 1 ) + AP( KC+K-1 ) = AP( KC+K-1 ) - + $ ZDOTU( K-1, WORK, 1, AP( KC ), 1 ) + AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) - + $ ZDOTU( K-1, AP( KC ), 1, AP( KCNEXT ), + $ 1 ) + CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, + $ AP( KCNEXT ), 1 ) + AP( KCNEXT+K ) = AP( KCNEXT+K ) - + $ ZDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT + K + 1 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + KPC = ( KP-1 )*KP / 2 + 1 + CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 ) + KX = KPC + KP - 1 + DO 40 J = KP + 1, K - 1 + KX = KX + J - 1 + TEMP = AP( KC+J-1 ) + AP( KC+J-1 ) = AP( KX ) + AP( KX ) = TEMP + 40 CONTINUE + TEMP = AP( KC+K-1 ) + AP( KC+K-1 ) = AP( KPC+KP-1 ) + AP( KPC+KP-1 ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC+K+K-1 ) + AP( KC+K+K-1 ) = AP( KC+K+KP-1 ) + AP( KC+K+KP-1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + KC = KCNEXT + GO TO 30 + 50 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + NPP = N*( N+1 ) / 2 + K = N + KC = NPP + 60 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 80 +* + KCNEXT = KC - ( N-K+2 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + AP( KC ) = ONE / AP( KC ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = AP( KCNEXT+1 ) + AK = AP( KCNEXT ) / T + AKP1 = AP( KC ) / T + AKKP1 = AP( KCNEXT+1 ) / T + D = T*( AK*AKP1-ONE ) + AP( KCNEXT ) = AKP1 / D + AP( KC ) = AK / D + AP( KCNEXT+1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KC+1 ), 1 ) + AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ), + $ 1 ) + AP( KCNEXT+1 ) = AP( KCNEXT+1 ) - + $ ZDOTU( N-K, AP( KC+1 ), 1, + $ AP( KCNEXT+2 ), 1 ) + CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 ) + CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1, + $ ZERO, AP( KCNEXT+2 ), 1 ) + AP( KCNEXT ) = AP( KCNEXT ) - + $ ZDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 ) + END IF + KSTEP = 2 + KCNEXT = KCNEXT - ( N-K+3 ) + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1 + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 ) + KX = KC + KP - K + DO 70 J = K + 1, KP - 1 + KX = KX + N - J + 1 + TEMP = AP( KC+J-K ) + AP( KC+J-K ) = AP( KX ) + AP( KX ) = TEMP + 70 CONTINUE + TEMP = AP( KC ) + AP( KC ) = AP( KPC ) + AP( KPC ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = AP( KC-N+K-1 ) + AP( KC-N+K-1 ) = AP( KC-N+KP-1 ) + AP( KC-N+KP-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + KC = KCNEXT + GO TO 60 + 80 CONTINUE + END IF +* + RETURN +* +* End of ZSPTRI +* + END diff --git a/dspl/liblapack/SRC/zsptrs.f b/dspl/liblapack/SRC/zsptrs.f new file mode 100644 index 0000000..feb29bc --- /dev/null +++ b/dspl/liblapack/SRC/zsptrs.f @@ -0,0 +1,450 @@ +*> \brief \b ZSPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSPTRS solves a system of linear equations A*X = B with a complex +*> symmetric matrix A stored in packed format using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by ZSPTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSPTRF, stored as a +*> packed triangular matrix. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSPTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KC, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + KC = KC - K + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1, + $ B( K-1, 1 ), LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+K-2 ) + AKM1 = AP( KC-1 ) / AKM1K + AK = AP( KC+K-1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + KC = KC - K + 1 + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + K + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC + 2*K + 1 + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + KC = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB ) + KC = KC + N - K + 1 + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = AP( KC+1 ) + AKM1 = AP( KC ) / AKM1K + AK = AP( KC+N-K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + KC = KC + 2*( N-K ) + 1 + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + KC = N*( N+1 ) / 2 + 1 + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + KC = KC - ( N-K+1 ) + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + KC = KC - ( N-K+2 ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZSPTRS +* + END diff --git a/dspl/liblapack/SRC/zstedc.f b/dspl/liblapack/SRC/zstedc.f new file mode 100644 index 0000000..b6be431 --- /dev/null +++ b/dspl/liblapack/SRC/zstedc.f @@ -0,0 +1,486 @@ +*> \brief \b ZSTEDC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEDC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, +* LRWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), RWORK( * ) +* COMPLEX*16 WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the divide and conquer method. +*> The eigenvectors of a full or band complex Hermitian matrix can also +*> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> +*> This code makes very mild assumptions about floating point +*> arithmetic. It will work on machines with a guard digit in +*> add/subtract, or on those binary machines without guard digits +*> which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. +*> It could conceivably fail on hexadecimal or decimal machines +*> without guard digits, but we know of none. See DLAED3 for details. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'I': Compute eigenvectors of tridiagonal matrix also. +*> = 'V': Compute eigenvectors of original Hermitian matrix +*> also. On entry, Z contains the unitary matrix used +*> to reduce the original matrix to tridiagonal form. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The dimension of the symmetric tridiagonal matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the subdiagonal elements of the tridiagonal matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> On entry, if COMPZ = 'V', then Z contains the unitary +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original Hermitian matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1. +*> If COMPZ = 'V' and N > 1, LWORK must be at least N*N. +*> Note that for COMPZ = 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LWORK need +*> only be 1. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal sizes of the WORK, RWORK and +*> IWORK arrays, returns these values as the first entries of +*> the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> If COMPZ = 'N' or N <= 1, LRWORK must be at least 1. +*> If COMPZ = 'V' and N > 1, LRWORK must be at least +*> 1 + 3*N + 2*N*lg N + 4*N**2 , +*> where lg( N ) = smallest integer k such +*> that 2**k >= N. +*> If COMPZ = 'I' and N > 1, LRWORK must be at least +*> 1 + 4*N + 2*N**2 . +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LRWORK +*> need only be max(1,2*(N-1)). +*> +*> If LRWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. +*> If COMPZ = 'N' or N <= 1, LIWORK must be at least 1. +*> If COMPZ = 'V' or N > 1, LIWORK must be at least +*> 6 + 6*N + 5*N*lg N. +*> If COMPZ = 'I' or N > 1, LIWORK must be at least +*> 3 + 5*N . +*> Note that for COMPZ = 'I' or 'V', then if N is less than or +*> equal to the minimum divide size, usually 25, then LIWORK +*> need only be 1. +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal sizes of the WORK, RWORK +*> and IWORK arrays, returns these values as the first entries +*> of the WORK, RWORK and IWORK arrays, and no error message +*> related to LWORK or LRWORK or LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: The algorithm failed to compute an eigenvalue while +*> working on the submatrix lying in rows and columns +*> INFO/(N+1) through mod(INFO,N+1). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Jeff Rutter, Computer Science Division, University of California +*> at Berkeley, USA +* +* ===================================================================== + SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK, + $ LRWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, LIWORK, LRWORK, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), RWORK( * ) + COMPLEX*16 WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER FINISH, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL, + $ LRWMIN, LWMIN, M, SMLSIZ, START + DOUBLE PRECISION EPS, ORGNRM, P, TINY +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, ILAENV, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DLASET, DSTEDC, DSTEQR, DSTERF, XERBLA, + $ ZLACPY, ZLACRM, ZLAED0, ZSTEQR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, INT, LOG, MAX, MOD, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. + $ ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + SMLSIZ = ILAENV( 9, 'ZSTEDC', ' ', 0, 0, 0, 0 ) + IF( N.LE.1 .OR. ICOMPZ.EQ.0 ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 1 + ELSE IF( N.LE.SMLSIZ ) THEN + LWMIN = 1 + LIWMIN = 1 + LRWMIN = 2*( N - 1 ) + ELSE IF( ICOMPZ.EQ.1 ) THEN + LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) ) + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + IF( 2**LGN.LT.N ) + $ LGN = LGN + 1 + LWMIN = N*N + LRWMIN = 1 + 3*N + 2*N*LGN + 4*N**2 + LIWMIN = 6 + 6*N + 5*N*LGN + ELSE IF( ICOMPZ.EQ.2 ) THEN + LWMIN = 1 + LRWMIN = 1 + 4*N + 2*N**2 + LIWMIN = 3 + 5*N + END IF + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -8 + ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN + INFO = -10 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEDC', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN + IF( N.EQ.1 ) THEN + IF( ICOMPZ.NE.0 ) + $ Z( 1, 1 ) = ONE + RETURN + END IF +* +* If the following conditional clause is removed, then the routine +* will use the Divide and Conquer routine to compute only the +* eigenvalues, which requires (3N + 3N**2) real workspace and +* (2 + 5N + 2N lg(N)) integer workspace. +* Since on many architectures DSTERF is much faster than any other +* algorithm for finding eigenvalues only, it is used here +* as the default. If the conditional clause is removed, then +* information on the size of workspace needs to be changed. +* +* If COMPZ = 'N', use DSTERF to compute the eigenvalues. +* + IF( ICOMPZ.EQ.0 ) THEN + CALL DSTERF( N, D, E, INFO ) + GO TO 70 + END IF +* +* If N is smaller than the minimum divide size (SMLSIZ+1), then +* solve the problem with another solver. +* + IF( N.LE.SMLSIZ ) THEN +* + CALL ZSTEQR( COMPZ, N, D, E, Z, LDZ, RWORK, INFO ) +* + ELSE +* +* If COMPZ = 'I', we simply call DSTEDC instead. +* + IF( ICOMPZ.EQ.2 ) THEN + CALL DLASET( 'Full', N, N, ZERO, ONE, RWORK, N ) + LL = N*N + 1 + CALL DSTEDC( 'I', N, D, E, RWORK, N, + $ RWORK( LL ), LRWORK-LL+1, IWORK, LIWORK, INFO ) + DO 20 J = 1, N + DO 10 I = 1, N + Z( I, J ) = RWORK( ( J-1 )*N+I ) + 10 CONTINUE + 20 CONTINUE + GO TO 70 + END IF +* +* From now on, only option left to be handled is COMPZ = 'V', +* i.e. ICOMPZ = 1. +* +* Scale. +* + ORGNRM = DLANST( 'M', N, D, E ) + IF( ORGNRM.EQ.ZERO ) + $ GO TO 70 +* + EPS = DLAMCH( 'Epsilon' ) +* + START = 1 +* +* while ( START <= N ) +* + 30 CONTINUE + IF( START.LE.N ) THEN +* +* Let FINISH be the position of the next subdiagonal entry +* such that E( FINISH ) <= TINY or FINISH = N if no such +* subdiagonal exists. The matrix identified by the elements +* between START and FINISH constitutes an independent +* sub-problem. +* + FINISH = START + 40 CONTINUE + IF( FINISH.LT.N ) THEN + TINY = EPS*SQRT( ABS( D( FINISH ) ) )* + $ SQRT( ABS( D( FINISH+1 ) ) ) + IF( ABS( E( FINISH ) ).GT.TINY ) THEN + FINISH = FINISH + 1 + GO TO 40 + END IF + END IF +* +* (Sub) Problem determined. Compute its size and solve it. +* + M = FINISH - START + 1 + IF( M.GT.SMLSIZ ) THEN +* +* Scale. +* + ORGNRM = DLANST( 'M', M, D( START ), E( START ) ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ), + $ M-1, INFO ) +* + CALL ZLAED0( N, M, D( START ), E( START ), Z( 1, START ), + $ LDZ, WORK, N, RWORK, IWORK, INFO ) + IF( INFO.GT.0 ) THEN + INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) + + $ MOD( INFO, ( M+1 ) ) + START - 1 + GO TO 70 + END IF +* +* Scale back. +* + CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M, + $ INFO ) +* + ELSE + CALL DSTEQR( 'I', M, D( START ), E( START ), RWORK, M, + $ RWORK( M*M+1 ), INFO ) + CALL ZLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N, + $ RWORK( M*M+1 ) ) + CALL ZLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ ) + IF( INFO.GT.0 ) THEN + INFO = START*( N+1 ) + FINISH + GO TO 70 + END IF + END IF +* + START = FINISH + 1 + GO TO 30 + END IF +* +* endwhile +* +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 60 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 50 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 50 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 60 CONTINUE + END IF +* + 70 CONTINUE + WORK( 1 ) = LWMIN + RWORK( 1 ) = LRWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZSTEDC +* + END diff --git a/dspl/liblapack/SRC/zstegr.f b/dspl/liblapack/SRC/zstegr.f new file mode 100644 index 0000000..5ad9810 --- /dev/null +++ b/dspl/liblapack/SRC/zstegr.f @@ -0,0 +1,302 @@ +*> \brief \b ZSTEGR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEGR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, +* LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N +* DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEGR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> ZSTEGR is a compatibility wrapper around the improved ZSTEMR routine. +*> See DSTEMR for further details. +*> +*> One important change is that the ABSTOL parameter no longer provides any +*> benefit and hence is no longer used. +*> +*> Note : ZSTEGR and ZSTEMR work only on machines which follow +*> IEEE-754 floating-point standard in their handling of infinities and +*> NaNs. Normal execution may create these exceptiona values and hence +*> may abort due to a floating point exception in environments which +*> do not conform to the IEEE-754 standard. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] ABSTOL +*> \verbatim +*> ABSTOL is DOUBLE PRECISION +*> Unused. Was the absolute error tolerance for the +*> eigenvalues/eigenvectors in previous versions. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and an upper bound must be used. +*> Supplying N columns is always safe. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in DLARRE, +*> if INFO = 2X, internal error in ZLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by DLARRE or +*> ZLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Inderjit Dhillon, IBM Almaden, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, LBNL/NERSC, USA \n +* +* ===================================================================== + SUBROUTINE ZSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, + $ LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + INTEGER IL, INFO, IU, LDZ, LIWORK, LWORK, M, N + DOUBLE PRECISION ABSTOL, VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL TRYRAC +* .. +* .. External Subroutines .. + EXTERNAL ZSTEMR +* .. +* .. Executable Statements .. + INFO = 0 + TRYRAC = .FALSE. + + CALL ZSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, N, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* End of ZSTEGR +* + END diff --git a/dspl/liblapack/SRC/zstein.f b/dspl/liblapack/SRC/zstein.f new file mode 100644 index 0000000..ff8384a --- /dev/null +++ b/dspl/liblapack/SRC/zstein.f @@ -0,0 +1,469 @@ +*> \brief \b ZSTEIN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEIN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, +* IWORK, IFAIL, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. +* INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), +* $ IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEIN computes the eigenvectors of a real symmetric tridiagonal +*> matrix T corresponding to specified eigenvalues, using inverse +*> iteration. +*> +*> The maximum number of iterations allowed for each eigenvector is +*> specified by an internal parameter MAXITS (currently set to 5). +*> +*> Although the eigenvectors are real, they are stored in a complex +*> array, which may be passed to ZUNMTR or ZUPMTR for back +*> transformation to the eigenvectors of a complex Hermitian matrix +*> which was reduced to tridiagonal form. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The n diagonal elements of the tridiagonal matrix T. +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The (n-1) subdiagonal elements of the tridiagonal matrix +*> T, stored in elements 1 to N-1. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of eigenvectors to be found. 0 <= M <= N. +*> \endverbatim +*> +*> \param[in] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements of W contain the eigenvalues for +*> which eigenvectors are to be computed. The eigenvalues +*> should be grouped by split-off block and ordered from +*> smallest to largest within the block. ( The output array +*> W from DSTEBZ with ORDER = 'B' is expected here. ) +*> \endverbatim +*> +*> \param[in] IBLOCK +*> \verbatim +*> IBLOCK is INTEGER array, dimension (N) +*> The submatrix indices associated with the corresponding +*> eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to +*> the first submatrix from the top, =2 if W(i) belongs to +*> the second submatrix, etc. ( The output array IBLOCK +*> from DSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[in] ISPLIT +*> \verbatim +*> ISPLIT is INTEGER array, dimension (N) +*> The splitting points, at which T breaks up into submatrices. +*> The first submatrix consists of rows/columns 1 to +*> ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 +*> through ISPLIT( 2 ), etc. +*> ( The output array ISPLIT from DSTEBZ is expected here. ) +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, M) +*> The computed eigenvectors. The eigenvector associated +*> with the eigenvalue W(i) is stored in the i-th column of +*> Z. Any vector which fails to converge is set to its current +*> iterate after MAXITS iterations. +*> The imaginary parts of the eigenvectors are set to zero. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (5*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] IFAIL +*> \verbatim +*> IFAIL is INTEGER array, dimension (M) +*> On normal exit, all elements of IFAIL are zero. +*> If one or more eigenvectors fail to converge after +*> MAXITS iterations, then their indices are stored in +*> array IFAIL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, then i eigenvectors failed to converge +*> in MAXITS iterations. Their indices are stored in +*> array IFAIL. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXITS INTEGER, default = 5 +*> The maximum number of iterations performed. +*> +*> EXTRA INTEGER, default = 2 +*> The number of iterations performed after norm growth +*> criterion is satisfied, should be at least 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK, + $ IWORK, IFAIL, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDZ, M, N +* .. +* .. Array Arguments .. + INTEGER IBLOCK( * ), IFAIL( * ), ISPLIT( * ), + $ IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION ZERO, ONE, TEN, ODM3, ODM1 + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1, + $ ODM3 = 1.0D-3, ODM1 = 1.0D-1 ) + INTEGER MAXITS, EXTRA + PARAMETER ( MAXITS = 5, EXTRA = 2 ) +* .. +* .. Local Scalars .. + INTEGER B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1, + $ INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1, + $ JBLK, JMAX, JR, NBLK, NRMCHK + DOUBLE PRECISION DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL, + $ SCL, SEP, TOL, XJ, XJM, ZTR +* .. +* .. Local Arrays .. + INTEGER ISEED( 4 ) +* .. +* .. External Functions .. + INTEGER IDAMAX + DOUBLE PRECISION DLAMCH, DNRM2 + EXTERNAL IDAMAX, DLAMCH, DNRM2 +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + DO 10 I = 1, M + IFAIL( I ) = 0 + 10 CONTINUE +* + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( M.LT.0 .OR. M.GT.N ) THEN + INFO = -4 + ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE + DO 20 J = 2, M + IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN + INFO = -6 + GO TO 30 + END IF + IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) ) + $ THEN + INFO = -5 + GO TO 30 + END IF + 20 CONTINUE + 30 CONTINUE + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEIN', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) THEN + RETURN + ELSE IF( N.EQ.1 ) THEN + Z( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + EPS = DLAMCH( 'Precision' ) +* +* Initialize seed for random number generator DLARNV. +* + DO 40 I = 1, 4 + ISEED( I ) = 1 + 40 CONTINUE +* +* Initialize pointers. +* + INDRV1 = 0 + INDRV2 = INDRV1 + N + INDRV3 = INDRV2 + N + INDRV4 = INDRV3 + N + INDRV5 = INDRV4 + N +* +* Compute eigenvectors of matrix blocks. +* + J1 = 1 + DO 180 NBLK = 1, IBLOCK( M ) +* +* Find starting and ending indices of block nblk. +* + IF( NBLK.EQ.1 ) THEN + B1 = 1 + ELSE + B1 = ISPLIT( NBLK-1 ) + 1 + END IF + BN = ISPLIT( NBLK ) + BLKSIZ = BN - B1 + 1 + IF( BLKSIZ.EQ.1 ) + $ GO TO 60 + GPIND = J1 +* +* Compute reorthogonalization criterion and stopping criterion. +* + ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) ) + ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) ) + DO 50 I = B1 + 1, BN - 1 + ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+ + $ ABS( E( I ) ) ) + 50 CONTINUE + ORTOL = ODM3*ONENRM +* + DTPCRT = SQRT( ODM1 / BLKSIZ ) +* +* Loop through eigenvalues of block nblk. +* + 60 CONTINUE + JBLK = 0 + DO 170 J = J1, M + IF( IBLOCK( J ).NE.NBLK ) THEN + J1 = J + GO TO 180 + END IF + JBLK = JBLK + 1 + XJ = W( J ) +* +* Skip all the work if the block size is one. +* + IF( BLKSIZ.EQ.1 ) THEN + WORK( INDRV1+1 ) = ONE + GO TO 140 + END IF +* +* If eigenvalues j and j-1 are too close, add a relatively +* small perturbation. +* + IF( JBLK.GT.1 ) THEN + EPS1 = ABS( EPS*XJ ) + PERTOL = TEN*EPS1 + SEP = XJ - XJM + IF( SEP.LT.PERTOL ) + $ XJ = XJM + PERTOL + END IF +* + ITS = 0 + NRMCHK = 0 +* +* Get random starting vector. +* + CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) ) +* +* Copy the matrix T so it won't be destroyed in factorization. +* + CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 ) + CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 ) +* +* Compute LU factors with partial pivoting ( PT = LU ) +* + TOL = ZERO + CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK, + $ IINFO ) +* +* Update iteration count. +* + 70 CONTINUE + ITS = ITS + 1 + IF( ITS.GT.MAXITS ) + $ GO TO 120 +* +* Normalize and scale the righthand side vector Pb. +* + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + SCL = BLKSIZ*ONENRM*MAX( EPS, + $ ABS( WORK( INDRV4+BLKSIZ ) ) ) / + $ ABS( WORK( INDRV1+JMAX ) ) + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) +* +* Solve the system LU = Pb. +* + CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ), + $ WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK, + $ WORK( INDRV1+1 ), TOL, IINFO ) +* +* Reorthogonalize by modified Gram-Schmidt if eigenvalues are +* close enough. +* + IF( JBLK.EQ.1 ) + $ GO TO 110 + IF( ABS( XJ-XJM ).GT.ORTOL ) + $ GPIND = J + IF( GPIND.NE.J ) THEN + DO 100 I = GPIND, J - 1 + ZTR = ZERO + DO 80 JR = 1, BLKSIZ + ZTR = ZTR + WORK( INDRV1+JR )* + $ DBLE( Z( B1-1+JR, I ) ) + 80 CONTINUE + DO 90 JR = 1, BLKSIZ + WORK( INDRV1+JR ) = WORK( INDRV1+JR ) - + $ ZTR*DBLE( Z( B1-1+JR, I ) ) + 90 CONTINUE + 100 CONTINUE + END IF +* +* Check the infinity norm of the iterate. +* + 110 CONTINUE + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + NRM = ABS( WORK( INDRV1+JMAX ) ) +* +* Continue for additional iterations after norm reaches +* stopping criterion. +* + IF( NRM.LT.DTPCRT ) + $ GO TO 70 + NRMCHK = NRMCHK + 1 + IF( NRMCHK.LT.EXTRA+1 ) + $ GO TO 70 +* + GO TO 130 +* +* If stopping criterion was not satisfied, update info and +* store eigenvector number in array ifail. +* + 120 CONTINUE + INFO = INFO + 1 + IFAIL( INFO ) = J +* +* Accept iterate as jth eigenvector. +* + 130 CONTINUE + SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 ) + JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 ) + IF( WORK( INDRV1+JMAX ).LT.ZERO ) + $ SCL = -SCL + CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 ) + 140 CONTINUE + DO 150 I = 1, N + Z( I, J ) = CZERO + 150 CONTINUE + DO 160 I = 1, BLKSIZ + Z( B1+I-1, J ) = DCMPLX( WORK( INDRV1+I ), ZERO ) + 160 CONTINUE +* +* Save the shift to check eigenvalue spacing at next +* iteration. +* + XJM = XJ +* + 170 CONTINUE + 180 CONTINUE +* + RETURN +* +* End of ZSTEIN +* + END diff --git a/dspl/liblapack/SRC/zstemr.f b/dspl/liblapack/SRC/zstemr.f new file mode 100644 index 0000000..ac7552a --- /dev/null +++ b/dspl/liblapack/SRC/zstemr.f @@ -0,0 +1,791 @@ +*> \brief \b ZSTEMR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEMR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, +* M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, +* IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, RANGE +* LOGICAL TRYRAC +* INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N +* DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. +* INTEGER ISUPPZ( * ), IWORK( * ) +* DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEMR computes selected eigenvalues and, optionally, eigenvectors +*> of a real symmetric tridiagonal matrix T. Any such unreduced matrix has +*> a well defined set of pairwise different real eigenvalues, the corresponding +*> real eigenvectors are pairwise orthogonal. +*> +*> The spectrum may be computed either completely or partially by specifying +*> either an interval (VL,VU] or a range of indices IL:IU for the desired +*> eigenvalues. +*> +*> Depending on the number of desired eigenvalues, these are computed either +*> by bisection or the dqds algorithm. Numerically orthogonal eigenvectors are +*> computed by the use of various suitable L D L^T factorizations near clusters +*> of close eigenvalues (referred to as RRRs, Relatively Robust +*> Representations). An informal sketch of the algorithm follows. +*> +*> For each unreduced block (submatrix) of T, +*> (a) Compute T - sigma I = L D L^T, so that L and D +*> define all the wanted eigenvalues to high relative accuracy. +*> This means that small relative changes in the entries of D and L +*> cause only small relative changes in the eigenvalues and +*> eigenvectors. The standard (unfactored) representation of the +*> tridiagonal matrix T does not have this property in general. +*> (b) Compute the eigenvalues to suitable accuracy. +*> If the eigenvectors are desired, the algorithm attains full +*> accuracy of the computed eigenvalues only right before +*> the corresponding vectors have to be computed, see steps c) and d). +*> (c) For each cluster of close eigenvalues, select a new +*> shift close to the cluster, find a new factorization, and refine +*> the shifted eigenvalues to suitable accuracy. +*> (d) For each eigenvalue with a large enough relative separation compute +*> the corresponding eigenvector by forming a rank revealing twisted +*> factorization. Go back to (c) for any clusters that remain. +*> +*> For more details, see: +*> - Inderjit S. Dhillon and Beresford N. Parlett: "Multiple representations +*> to compute orthogonal eigenvectors of symmetric tridiagonal matrices," +*> Linear Algebra and its Applications, 387(1), pp. 1-28, August 2004. +*> - Inderjit Dhillon and Beresford Parlett: "Orthogonal Eigenvectors and +*> Relative Gaps," SIAM Journal on Matrix Analysis and Applications, Vol. 25, +*> 2004. Also LAPACK Working Note 154. +*> - Inderjit Dhillon: "A new O(n^2) algorithm for the symmetric +*> tridiagonal eigenvalue/eigenvector problem", +*> Computer Science Division Technical Report No. UCB/CSD-97-971, +*> UC Berkeley, May 1997. +*> +*> Further Details +*> 1.ZSTEMR works only on machines which follow IEEE-754 +*> floating-point standard in their handling of infinities and NaNs. +*> This permits the use of efficient inner loops avoiding a check for +*> zero divisors. +*> +*> 2. LAPACK routines can be used to reduce a complex Hermitean matrix to +*> real symmetric tridiagonal form. +*> +*> (Any complex Hermitean tridiagonal matrix has real values on its diagonal +*> and potentially complex numbers on its off-diagonals. By applying a +*> similarity transform with an appropriate diagonal matrix +*> diag(1,e^{i \phy_1}, ... , e^{i \phy_{n-1}}), the complex Hermitean +*> matrix can be transformed into a real symmetric matrix and complex +*> arithmetic can be entirely avoided.) +*> +*> While the eigenvectors of the real symmetric tridiagonal matrix are real, +*> the eigenvectors of original complex Hermitean matrix have complex entries +*> in general. +*> Since LAPACK drivers overwrite the matrix data with the eigenvectors, +*> ZSTEMR accepts complex workspace to facilitate interoperability +*> with ZUNMTR or ZUPMTR. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] RANGE +*> \verbatim +*> RANGE is CHARACTER*1 +*> = 'A': all eigenvalues will be found. +*> = 'V': all eigenvalues in the half-open interval (VL,VU] +*> will be found. +*> = 'I': the IL-th through IU-th eigenvalues will be found. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the N diagonal elements of the tridiagonal matrix +*> T. On exit, D is overwritten. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N) +*> On entry, the (N-1) subdiagonal elements of the tridiagonal +*> matrix T in elements 1 to N-1 of E. E(N) need not be set on +*> input, but is used internally as workspace. +*> On exit, E is overwritten. +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is DOUBLE PRECISION +*> +*> If RANGE='V', the lower bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] VU +*> \verbatim +*> VU is DOUBLE PRECISION +*> +*> If RANGE='V', the upper bound of the interval to +*> be searched for eigenvalues. VL < VU. +*> Not referenced if RANGE = 'A' or 'I'. +*> \endverbatim +*> +*> \param[in] IL +*> \verbatim +*> IL is INTEGER +*> +*> If RANGE='I', the index of the +*> smallest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[in] IU +*> \verbatim +*> IU is INTEGER +*> +*> If RANGE='I', the index of the +*> largest eigenvalue to be returned. +*> 1 <= IL <= IU <= N, if N > 0. +*> Not referenced if RANGE = 'A' or 'V'. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The total number of eigenvalues found. 0 <= M <= N. +*> If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> The first M elements contain the selected eigenvalues in +*> ascending order. +*> \endverbatim +*> +*> \param[out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, max(1,M) ) +*> If JOBZ = 'V', and if INFO = 0, then the first M columns of Z +*> contain the orthonormal eigenvectors of the matrix T +*> corresponding to the selected eigenvalues, with the i-th +*> column of Z holding the eigenvector associated with W(i). +*> If JOBZ = 'N', then Z is not referenced. +*> Note: the user must ensure that at least max(1,M) columns are +*> supplied in the array Z; if RANGE = 'V', the exact value of M +*> is not known in advance and can be computed with a workspace +*> query by setting NZC = -1, see below. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> JOBZ = 'V', then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[in] NZC +*> \verbatim +*> NZC is INTEGER +*> The number of eigenvectors to be held in the array Z. +*> If RANGE = 'A', then NZC >= max(1,N). +*> If RANGE = 'V', then NZC >= the number of eigenvalues in (VL,VU]. +*> If RANGE = 'I', then NZC >= IU-IL+1. +*> If NZC = -1, then a workspace query is assumed; the +*> routine calculates the number of columns of the array Z that +*> are needed to hold the eigenvectors. +*> This value is returned as the first entry of the Z array, and +*> no error message related to NZC is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] ISUPPZ +*> \verbatim +*> ISUPPZ is INTEGER array, dimension ( 2*max(1,M) ) +*> The support of the eigenvectors in Z, i.e., the indices +*> indicating the nonzero elements in Z. The i-th computed eigenvector +*> is nonzero only in elements ISUPPZ( 2*i-1 ) through +*> ISUPPZ( 2*i ). This is relevant in the case when the matrix +*> is split. ISUPPZ is only accessed when JOBZ is 'V' and N > 0. +*> \endverbatim +*> +*> \param[in,out] TRYRAC +*> \verbatim +*> TRYRAC is LOGICAL +*> If TRYRAC.EQ..TRUE., indicates that the code should check whether +*> the tridiagonal matrix defines its eigenvalues to high relative +*> accuracy. If so, the code uses relative-accuracy preserving +*> algorithms that might be (a bit) slower depending on the matrix. +*> If the matrix does not define its eigenvalues to high relative +*> accuracy, the code can uses possibly faster algorithms. +*> If TRYRAC.EQ..FALSE., the code is not required to guarantee +*> relatively accurate eigenvalues and can use the fastest possible +*> techniques. +*> On exit, a .TRUE. TRYRAC will be set to .FALSE. if the matrix +*> does not define its eigenvalues to high relative accuracy. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (LWORK) +*> On exit, if INFO = 0, WORK(1) returns the optimal +*> (and minimal) LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,18*N) +*> if JOBZ = 'V', and LWORK >= max(1,12*N) if JOBZ = 'N'. +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (LIWORK) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= max(1,10*N) +*> if the eigenvectors are desired, and LIWORK >= max(1,8*N) +*> if only the eigenvalues are to be computed. +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, INFO +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = 1X, internal error in DLARRE, +*> if INFO = 2X, internal error in ZLARRV. +*> Here, the digit X = ABS( IINFO ) < 10, where IINFO is +*> the nonzero error code returned by DLARRE or +*> ZLARRV, respectively. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> Beresford Parlett, University of California, Berkeley, USA \n +*> Jim Demmel, University of California, Berkeley, USA \n +*> Inderjit Dhillon, University of Texas, Austin, USA \n +*> Osni Marques, LBNL/NERSC, USA \n +*> Christof Voemel, University of California, Berkeley, USA \n +* +* ===================================================================== + SUBROUTINE ZSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, + $ M, W, Z, LDZ, NZC, ISUPPZ, TRYRAC, WORK, LWORK, + $ IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, RANGE + LOGICAL TRYRAC + INTEGER IL, INFO, IU, LDZ, NZC, LIWORK, LWORK, M, N + DOUBLE PRECISION VL, VU +* .. +* .. Array Arguments .. + INTEGER ISUPPZ( * ), IWORK( * ) + DOUBLE PRECISION D( * ), E( * ), W( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, FOUR, MINRGP + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, + $ FOUR = 4.0D0, + $ MINRGP = 1.0D-3 ) +* .. +* .. Local Scalars .. + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, + $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, + $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, + $ ITMP2, J, JBLK, JJ, LIWMIN, LWMIN, NSPLIT, + $ NZCMIN, OFFSET, WBEGIN, WEND + DOUBLE PRECISION BIGNUM, CS, EPS, PIVMIN, R1, R2, RMAX, RMIN, + $ RTOL1, RTOL2, SAFMIN, SCALE, SMLNUM, SN, + $ THRESH, TMP, TNRM, WL, WU +* .. +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST + EXTERNAL LSAME, DLAMCH, DLANST +* .. +* .. External Subroutines .. + EXTERNAL DCOPY, DLAE2, DLAEV2, DLARRC, DLARRE, DLARRJ, + $ DLARRR, DLASRT, DSCAL, XERBLA, ZLARRV, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN, SQRT + + +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + ALLEIG = LSAME( RANGE, 'A' ) + VALEIG = LSAME( RANGE, 'V' ) + INDEIG = LSAME( RANGE, 'I' ) +* + LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) + ZQUERY = ( NZC.EQ.-1 ) + +* DSTEMR needs WORK of size 6*N, IWORK of size 3*N. +* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. +* Furthermore, ZLARRV needs WORK of size 12*N, IWORK of size 7*N. + IF( WANTZ ) THEN + LWMIN = 18*N + LIWMIN = 10*N + ELSE +* need less workspace if only the eigenvalues are wanted + LWMIN = 12*N + LIWMIN = 8*N + ENDIF + + WL = ZERO + WU = ZERO + IIL = 0 + IIU = 0 + NSPLIT = 0 + + IF( VALEIG ) THEN +* We do not reference VL, VU in the cases RANGE = 'I','A' +* The interval (WL, WU] contains all the wanted eigenvalues. +* It is either given by the user or computed in DLARRE. + WL = VL + WU = VU + ELSEIF( INDEIG ) THEN +* We do not reference IL, IU in the cases RANGE = 'V','A' + IIL = IL + IIU = IU + ENDIF +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( VALEIG .AND. N.GT.0 .AND. WU.LE.WL ) THEN + INFO = -7 + ELSE IF( INDEIG .AND. ( IIL.LT.1 .OR. IIL.GT.N ) ) THEN + INFO = -8 + ELSE IF( INDEIG .AND. ( IIU.LT.IIL .OR. IIU.GT.N ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -17 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) ) +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( WANTZ .AND. ALLEIG ) THEN + NZCMIN = N + ELSE IF( WANTZ .AND. VALEIG ) THEN + CALL DLARRC( 'T', N, VL, VU, D, E, SAFMIN, + $ NZCMIN, ITMP, ITMP2, INFO ) + ELSE IF( WANTZ .AND. INDEIG ) THEN + NZCMIN = IIU-IIL+1 + ELSE +* WANTZ .EQ. FALSE. + NZCMIN = 0 + ENDIF + IF( ZQUERY .AND. INFO.EQ.0 ) THEN + Z( 1,1 ) = NZCMIN + ELSE IF( NZC.LT.NZCMIN .AND. .NOT.ZQUERY ) THEN + INFO = -14 + END IF + END IF + + IF( INFO.NE.0 ) THEN +* + CALL XERBLA( 'ZSTEMR', -INFO ) +* + RETURN + ELSE IF( LQUERY .OR. ZQUERY ) THEN + RETURN + END IF +* +* Handle N = 0, 1, and 2 cases immediately +* + M = 0 + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ALLEIG .OR. INDEIG ) THEN + M = 1 + W( 1 ) = D( 1 ) + ELSE + IF( WL.LT.D( 1 ) .AND. WU.GE.D( 1 ) ) THEN + M = 1 + W( 1 ) = D( 1 ) + END IF + END IF + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, 1 ) = ONE + ISUPPZ(1) = 1 + ISUPPZ(2) = 1 + END IF + RETURN + END IF +* + IF( N.EQ.2 ) THEN + IF( .NOT.WANTZ ) THEN + CALL DLAE2( D(1), E(1), D(2), R1, R2 ) + ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) + END IF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R2.GT.WL).AND. + $ (R2.LE.WU)).OR. + $ (INDEIG.AND.(IIL.EQ.1)) ) THEN + M = M+1 + W( M ) = R2 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + IF( ALLEIG.OR. + $ (VALEIG.AND.(R1.GT.WL).AND. + $ (R1.LE.WU)).OR. + $ (INDEIG.AND.(IIU.EQ.2)) ) THEN + M = M+1 + W( M ) = R1 + IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN +* Note: At most one of SN and CS can be zero. + IF (SN.NE.ZERO) THEN + IF (CS.NE.ZERO) THEN + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 2 + ELSE + ISUPPZ(2*M-1) = 1 + ISUPPZ(2*M) = 1 + END IF + ELSE + ISUPPZ(2*M-1) = 2 + ISUPPZ(2*M) = 2 + END IF + ENDIF + ENDIF + ELSE + +* Continue with general N + + INDGRS = 1 + INDERR = 2*N + 1 + INDGP = 3*N + 1 + INDD = 4*N + 1 + INDE2 = 5*N + 1 + INDWRK = 6*N + 1 +* + IINSPL = 1 + IINDBL = N + 1 + IINDW = 2*N + 1 + IINDWK = 3*N + 1 +* +* Scale matrix to allowable range, if necessary. +* The allowable range is related to the PIVMIN parameter; see the +* comments in DLARRD. The preference for scaling small values +* up is heuristic; we expect users' matrices not to be close to the +* RMAX threshold. +* + SCALE = ONE + TNRM = DLANST( 'M', N, D, E ) + IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN + SCALE = RMIN / TNRM + ELSE IF( TNRM.GT.RMAX ) THEN + SCALE = RMAX / TNRM + END IF + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( N, SCALE, D, 1 ) + CALL DSCAL( N-1, SCALE, E, 1 ) + TNRM = TNRM*SCALE + IF( VALEIG ) THEN +* If eigenvalues in interval have to be found, +* scale (WL, WU] accordingly + WL = WL*SCALE + WU = WU*SCALE + ENDIF + END IF +* +* Compute the desired eigenvalues of the tridiagonal after splitting +* into smaller subblocks if the corresponding off-diagonal elements +* are small +* THRESH is the splitting parameter for DLARRE +* A negative THRESH forces the old splitting criterion based on the +* size of the off-diagonal. A positive THRESH switches to splitting +* which preserves relative accuracy. +* + IF( TRYRAC ) THEN +* Test whether the matrix warrants the more expensive relative approach. + CALL DLARRR( N, D, E, IINFO ) + ELSE +* The user does not care about relative accurately eigenvalues + IINFO = -1 + ENDIF +* Set the splitting criterion + IF (IINFO.EQ.0) THEN + THRESH = EPS + ELSE + THRESH = -EPS +* relative accuracy is desired but T does not guarantee it + TRYRAC = .FALSE. + ENDIF +* + IF( TRYRAC ) THEN +* Copy original diagonal, needed to guarantee relative accuracy + CALL DCOPY(N,D,1,WORK(INDD),1) + ENDIF +* Store the squares of the offdiagonal values of T + DO 5 J = 1, N-1 + WORK( INDE2+J-1 ) = E(J)**2 + 5 CONTINUE + +* Set the tolerance parameters for bisection + IF( .NOT.WANTZ ) THEN +* DLARRE computes the eigenvalues to full precision. + RTOL1 = FOUR * EPS + RTOL2 = FOUR * EPS + ELSE +* DLARRE computes the eigenvalues to less than full precision. +* ZLARRV will refine the eigenvalue approximations, and we only +* need less accurate initial bisection in DLARRE. +* Note: these settings do only affect the subset case and DLARRE + RTOL1 = SQRT(EPS) + RTOL2 = MAX( SQRT(EPS)*5.0D-3, FOUR * EPS ) + ENDIF + CALL DLARRE( RANGE, N, WL, WU, IIL, IIU, D, E, + $ WORK(INDE2), RTOL1, RTOL2, THRESH, NSPLIT, + $ IWORK( IINSPL ), M, W, WORK( INDERR ), + $ WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), PIVMIN, + $ WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 10 + ABS( IINFO ) + RETURN + END IF +* Note that if RANGE .NE. 'V', DLARRE computes bounds on the desired +* part of the spectrum. All desired eigenvalues are contained in +* (WL,WU] + + + IF( WANTZ ) THEN +* +* Compute the desired eigenvectors corresponding to the computed +* eigenvalues +* + CALL ZLARRV( N, WL, WU, D, E, + $ PIVMIN, IWORK( IINSPL ), M, + $ 1, M, MINRGP, RTOL1, RTOL2, + $ W, WORK( INDERR ), WORK( INDGP ), IWORK( IINDBL ), + $ IWORK( IINDW ), WORK( INDGRS ), Z, LDZ, + $ ISUPPZ, WORK( INDWRK ), IWORK( IINDWK ), IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 20 + ABS( IINFO ) + RETURN + END IF + ELSE +* DLARRE computes eigenvalues of the (shifted) root representation +* ZLARRV returns the eigenvalues of the unshifted matrix. +* However, if the eigenvectors are not desired by the user, we need +* to apply the corresponding shifts from DLARRE to obtain the +* eigenvalues of the original matrix. + DO 20 J = 1, M + ITMP = IWORK( IINDBL+J-1 ) + W( J ) = W( J ) + E( IWORK( IINSPL+ITMP-1 ) ) + 20 CONTINUE + END IF +* + + IF ( TRYRAC ) THEN +* Refine computed eigenvalues so that they are relatively accurate +* with respect to the original matrix T. + IBEGIN = 1 + WBEGIN = 1 + DO 39 JBLK = 1, IWORK( IINDBL+M-1 ) + IEND = IWORK( IINSPL+JBLK-1 ) + IN = IEND - IBEGIN + 1 + WEND = WBEGIN - 1 +* check if any eigenvalues have to be refined in this block + 36 CONTINUE + IF( WEND.LT.M ) THEN + IF( IWORK( IINDBL+WEND ).EQ.JBLK ) THEN + WEND = WEND + 1 + GO TO 36 + END IF + END IF + IF( WEND.LT.WBEGIN ) THEN + IBEGIN = IEND + 1 + GO TO 39 + END IF + + OFFSET = IWORK(IINDW+WBEGIN-1)-1 + IFIRST = IWORK(IINDW+WBEGIN-1) + ILAST = IWORK(IINDW+WEND-1) + RTOL2 = FOUR * EPS + CALL DLARRJ( IN, + $ WORK(INDD+IBEGIN-1), WORK(INDE2+IBEGIN-1), + $ IFIRST, ILAST, RTOL2, OFFSET, W(WBEGIN), + $ WORK( INDERR+WBEGIN-1 ), + $ WORK( INDWRK ), IWORK( IINDWK ), PIVMIN, + $ TNRM, IINFO ) + IBEGIN = IEND + 1 + WBEGIN = WEND + 1 + 39 CONTINUE + ENDIF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( SCALE.NE.ONE ) THEN + CALL DSCAL( M, ONE / SCALE, W, 1 ) + END IF + END IF +* +* If eigenvalues are not in increasing order, then sort them, +* possibly along with eigenvectors. +* + IF( NSPLIT.GT.1 .OR. N.EQ.2 ) THEN + IF( .NOT. WANTZ ) THEN + CALL DLASRT( 'I', M, W, IINFO ) + IF( IINFO.NE.0 ) THEN + INFO = 3 + RETURN + END IF + ELSE + DO 60 J = 1, M - 1 + I = 0 + TMP = W( J ) + DO 50 JJ = J + 1, M + IF( W( JJ ).LT.TMP ) THEN + I = JJ + TMP = W( JJ ) + END IF + 50 CONTINUE + IF( I.NE.0 ) THEN + W( I ) = W( J ) + W( J ) = TMP + IF( WANTZ ) THEN + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 ) + ITMP = ISUPPZ( 2*I-1 ) + ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 ) + ISUPPZ( 2*J-1 ) = ITMP + ITMP = ISUPPZ( 2*I ) + ISUPPZ( 2*I ) = ISUPPZ( 2*J ) + ISUPPZ( 2*J ) = ITMP + END IF + END IF + 60 CONTINUE + END IF + ENDIF +* +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN + RETURN +* +* End of ZSTEMR +* + END diff --git a/dspl/liblapack/SRC/zsteqr.f b/dspl/liblapack/SRC/zsteqr.f new file mode 100644 index 0000000..ac47890 --- /dev/null +++ b/dspl/liblapack/SRC/zsteqr.f @@ -0,0 +1,576 @@ +*> \brief \b ZSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band complex Hermitian matrix can also +*> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> Hermitian matrix. On entry, Z must contain the +*> unitary matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the unitary +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original Hermitian matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is unitarily similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA, + $ ZLASET, ZLASR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL ZLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL ZLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL ZLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL ZLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of ZSTEQR +* + END diff --git a/dspl/liblapack/SRC/zsycon.f b/dspl/liblapack/SRC/zsycon.f new file mode 100644 index 0000000..98ec83e --- /dev/null +++ b/dspl/liblapack/SRC/zsycon.f @@ -0,0 +1,239 @@ +*> \brief \b ZSYCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYCON estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACN2, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL ZSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSYCON +* + END diff --git a/dspl/liblapack/SRC/zsycon_3.f b/dspl/liblapack/SRC/zsycon_3.f new file mode 100644 index 0000000..8568459 --- /dev/null +++ b/dspl/liblapack/SRC/zsycon_3.f @@ -0,0 +1,287 @@ +*> \brief \b ZSYCON_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCON_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, +* WORK, IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IWORK( * ) +* COMPLEX*16 A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYCON_3 estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> computed by ZSYTRF_RK or ZSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> This routine uses BLAS3 solver ZSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by ZSYTRF_RK and ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_RK or ZSYTRF_BK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYCON_3( UPLO, N, A, LDA, E, IPIV, ANORM, RCOND, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZSYTRS_3, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCON_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + END DO + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL ZSYTRS_3( UPLO, N, 1, A, LDA, E, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSYCON_3 +* + END diff --git a/dspl/liblapack/SRC/zsycon_rook.f b/dspl/liblapack/SRC/zsycon_rook.f new file mode 100644 index 0000000..c7c0c43 --- /dev/null +++ b/dspl/liblapack/SRC/zsycon_rook.f @@ -0,0 +1,255 @@ +*> \brief \b ZSYCON_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCON_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYCON_ROOK estimates the reciprocal of the condition number (in the +*> 1-norm) of a complex symmetric matrix A using the factorization +*> A = U*D*U**T or A = L*D*L**T computed by ZSYTRF_ROOK. +*> +*> An estimate is obtained for norm(inv(A)), and the reciprocal of the +*> condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] ANORM +*> \verbatim +*> ANORM is DOUBLE PRECISION +*> The 1-norm of the original matrix A. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an +*> estimate of the 1-norm of inv(A) computed in this routine. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYCON_ROOK( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION ANORM, RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, KASE + DOUBLE PRECISION AINVNM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLACN2, ZSYTRS_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( ANORM.LT.ZERO ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCON_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + RCOND = ZERO + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + ELSE IF( ANORM.LE.ZERO ) THEN + RETURN + END IF +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 I = N, 1, -1 + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 I = 1, N + IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF +* +* Estimate the 1-norm of the inverse. +* + KASE = 0 + 30 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN +* +* Multiply by inv(L*D*L**T) or inv(U*D*U**T). +* + CALL ZSYTRS_ROOK( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO ) + GO TO 30 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / AINVNM ) / ANORM +* + RETURN +* +* End of ZSYCON_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsyconv.f b/dspl/liblapack/SRC/zsyconv.f new file mode 100644 index 0000000..94235ef --- /dev/null +++ b/dspl/liblapack/SRC/zsyconv.f @@ -0,0 +1,366 @@ +*> \brief \b ZSYCONV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCONV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYCONV converts A given by ZHETRF into L and D or vice-versa. +*> Get nondiagonal elements of D (returned in workspace) and +*> apply or reverse permutation done in TRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> E stores the supdiagonal/subdiagonal of the symmetric 1-by-1 +*> or 2-by-2 block diagonal matrix D in LDLT. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYCONV( UPLO, WAY, N, A, LDA, IPIV, E, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0,0.0D+0) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, J + COMPLEX*16 TEMP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCONV', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* Convert VALUE +* + I=N + E(1)=ZERO + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + E(I)=A(I-1,I) + E(I-1)=ZERO + A(I-1,I)=ZERO + I=I-1 + ELSE + E(I)=ZERO + ENDIF + I=I-1 + END DO +* +* Convert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO 12 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 12 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF( I .LT. N) THEN + DO 13 J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + 13 CONTINUE + ENDIF + I=I-1 + ENDIF + I=I-1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* Revert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I+1 + IF( I .LT. N) THEN + DO J= I+1,N + TEMP=A(IP,J) + A(IP,J)=A(I-1,J) + A(I-1,J)=TEMP + END DO + ENDIF + ENDIF + I=I+1 + END DO +* +* Revert VALUE +* + I=N + DO WHILE ( I .GT. 1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I-1,I)=E(I) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + ELSE +* +* A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* Convert VALUE +* + I=1 + E(N)=ZERO + DO WHILE ( I .LE. N ) + IF( I.LT.N .AND. IPIV(I) .LT. 0 ) THEN + E(I)=A(I+1,I) + E(I+1)=ZERO + A(I+1,I)=ZERO + I=I+1 + ELSE + E(I)=ZERO + ENDIF + I=I+1 + END DO +* +* Convert PERMUTATIONS +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO 22 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I,J) + A(I,J)=TEMP + 22 CONTINUE + ENDIF + ELSE + IP=-IPIV(I) + IF (I .GT. 1) THEN + DO 23 J= 1,I-1 + TEMP=A(IP,J) + A(IP,J)=A(I+1,J) + A(I+1,J)=TEMP + 23 CONTINUE + ENDIF + I=I+1 + ENDIF + I=I+1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* Revert PERMUTATIONS +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I,J) + A(I,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ELSE + IP=-IPIV(I) + I=I-1 + IF (I .GT. 1) THEN + DO J= 1,I-1 + TEMP=A(I+1,J) + A(I+1,J)=A(IP,J) + A(IP,J)=TEMP + END DO + ENDIF + ENDIF + I=I-1 + END DO +* +* Revert VALUE +* + I=1 + DO WHILE ( I .LE. N-1 ) + IF( IPIV(I) .LT. 0 ) THEN + A(I+1,I)=E(I) + I=I+1 + ENDIF + I=I+1 + END DO + END IF + END IF +* + RETURN +* +* End of ZSYCONV +* + END diff --git a/dspl/liblapack/SRC/zsyconvf.f b/dspl/liblapack/SRC/zsyconvf.f new file mode 100644 index 0000000..b26bfd6 --- /dev/null +++ b/dspl/liblapack/SRC/zsyconvf.f @@ -0,0 +1,562 @@ +*> \brief \b ZSYCONVF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCONVF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> ZSYCONVF converts the factorization output format used in +*> ZSYTRF provided on entry in parameter A into the factorization +*> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored +*> on exit in parameters A and E. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in ZSYTRF into +*> the format used in ZSYTRF_RK (or ZSYTRF_BK). +*> +*> If parameter WAY = 'R': +*> ZSYCONVF performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in ZSYTRF_RK +*> (or ZSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in ZSYTRF that is stored +*> on exit in parameter A. It also coverts in place details of +*> the intechanges stored in IPIV from the format used in ZSYTRF_RK +*> (or ZSYTRF_BK) into the format used in ZSYTRF. +*> +*> ZSYCONVF can also convert in Hermitian matrix case, i.e. between +*> formats used in ZHETRF and ZHETRF_RK (or ZHETRF_BK). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> ZSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> ZSYTRF_RK or ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> ZSYTRF_RK or ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> ZSYTRF: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> +*> 1) If WAY ='C': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in ZSYTRF. +*> On exit, details of the interchanges and the block +*> structure of D in the format used in ZSYTRF_RK +*> ( or ZSYTRF_BK). +*> +*> 1) If WAY ='R': +*> On entry, details of the interchanges and the block +*> structure of D in the format used in ZSYTRF_RK +*> ( or ZSYTRF_BK). +*> On exit, details of the interchanges and the block +*> structure of D in the format used in ZSYTRF. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE ZSYCONVF( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL ZSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCONVF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL ZSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i) in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.(I-1) ) THEN + CALL ZSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i-1 and IPIV(i-1), +* so this should be recorded in two consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I-1 ) +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where k increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL ZSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is no interchnge of rows i and and IPIV(i), +* so this should be reflected in IPIV format for +* *SYTRF_RK ( or *SYTRF_BK) +* + IPIV( I ) = I +* + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS and IPIV +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i) in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.(I+1) ) THEN + CALL ZSWAP( I-1, A( IP, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + END IF +* +* Convert IPIV +* There is one interchange of rows i+1 and IPIV(i+1), +* so this should be recorded in consecutive entries +* in IPIV format for *SYTRF +* + IPIV( I ) = IPIV( I+1 ) +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of ZSYCONVF +* + END diff --git a/dspl/liblapack/SRC/zsyconvf_rook.f b/dspl/liblapack/SRC/zsyconvf_rook.f new file mode 100644 index 0000000..5c36f4b --- /dev/null +++ b/dspl/liblapack/SRC/zsyconvf_rook.f @@ -0,0 +1,547 @@ +*> \brief \b ZSYCONVF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYCONVF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, WAY +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> If parameter WAY = 'C': +*> ZSYCONVF_ROOK converts the factorization output format used in +*> ZSYTRF_ROOK provided on entry in parameter A into the factorization +*> output format used in ZSYTRF_RK (or ZSYTRF_BK) that is stored +*> on exit in parameters A and E. IPIV format for ZSYTRF_ROOK and +*> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. +*> +*> If parameter WAY = 'R': +*> ZSYCONVF_ROOK performs the conversion in reverse direction, i.e. +*> converts the factorization output format used in ZSYTRF_RK +*> (or ZSYTRF_BK) provided on entry in parameters A and E into +*> the factorization output format used in ZSYTRF_ROOK that is stored +*> on exit in parameter A. IPIV format for ZSYTRF_ROOK and +*> ZSYTRF_RK (or ZSYTRF_BK) is the same and is not converted. +*> +*> ZSYCONVF_ROOK can also convert in Hermitian matrix case, i.e. between +*> formats used in ZHETRF_ROOK and ZHETRF_RK (or ZHETRF_BK). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix A. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] WAY +*> \verbatim +*> WAY is CHARACTER*1 +*> = 'C': Convert +*> = 'R': Revert +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> +*> 1) If WAY ='C': +*> +*> On entry, contains factorization details in format used in +*> ZSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> ZSYTRF_RK or ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains factorization details in format used in +*> ZSYTRF_RK or ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, contains factorization details in format used in +*> ZSYTRF_ROOK: +*> a) all elements of the symmetric block diagonal +*> matrix D on the diagonal of A and on superdiagonal +*> (or subdiagonal) of A, and +*> b) If UPLO = 'U': multipliers used to obtain factor U +*> in the superdiagonal part of A. +*> If UPLO = 'L': multipliers used to obtain factor L +*> in the superdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> +*> 1) If WAY ='C': +*> +*> On entry, just a workspace. +*> +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> 2) If WAY = 'R': +*> +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> On exit, is not changed +*> \endverbatim +*. +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On entry, details of the interchanges and the block +*> structure of D as determined: +*> 1) by ZSYTRF_ROOK, if WAY ='C'; +*> 2) by ZSYTRF_RK (or ZSYTRF_BK), if WAY ='R'. +*> The IPIV format is the same for all these routines. +*> +*> On exit, is not changed. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* ===================================================================== + SUBROUTINE ZSYCONVF_ROOK( UPLO, WAY, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO, WAY + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* +* .. External Subroutines .. + EXTERNAL ZSWAP, XERBLA +* .. Local Scalars .. + LOGICAL UPPER, CONVERT + INTEGER I, IP, IP2 +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + CONVERT = LSAME( WAY, 'C' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.CONVERT .AND. .NOT.LSAME( WAY, 'R' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYCONVF_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin A is UPPER +* + IF ( CONVERT ) THEN +* +* Convert A (A is upper) +* +* +* Convert VALUE +* +* Assign superdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = N + E( 1 ) = ZERO + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + E( I ) = A( I-1, I ) + E( I-1 ) = ZERO + A( I-1, I ) = ZERO + I = I - 1 + ELSE + E( I ) = ZERO + END IF + I = I - 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i-1 and IPIV(i-1) +* in A(1:i,N-i:N) +* + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( I, I+1 ), LDA, + $ A( IP, I+1 ), LDA ) + END IF + IF( IP2.NE.(I-1) ) THEN + CALL ZSWAP( N-I, A( I-1, I+1 ), LDA, + $ A( IP2, I+1 ), LDA ) + END IF + END IF + I = I - 1 +* + END IF + I = I - 1 + END DO +* + ELSE +* +* Revert A (A is upper) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of upper part of A +* in reverse factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(1:i,N-i:N) +* + IP = IPIV( I ) + IF( I.LT.N ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i-1 and IPIV(i-1) and i and IPIV(i) +* in A(1:i,N-i:N) +* + I = I + 1 + IP = -IPIV( I ) + IP2 = -IPIV( I-1 ) + IF( I.LT.N ) THEN + IF( IP2.NE.(I-1) ) THEN + CALL ZSWAP( N-I, A( IP2, I+1 ), LDA, + $ A( I-1, I+1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL ZSWAP( N-I, A( IP, I+1 ), LDA, + $ A( I, I+1 ), LDA ) + END IF + END IF +* + END IF + I = I + 1 + END DO +* +* Revert VALUE +* Assign superdiagonal entries of D from array E to +* superdiagonal entries of A. +* + I = N + DO WHILE ( I.GT.1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I-1, I ) = E( I ) + I = I - 1 + END IF + I = I - 1 + END DO +* +* End A is UPPER +* + END IF +* + ELSE +* +* Begin A is LOWER +* + IF ( CONVERT ) THEN +* +* Convert A (A is lower) +* +* +* Convert VALUE +* Assign subdiagonal entries of D to array E and zero out +* corresponding entries in input storage A +* + I = 1 + E( N ) = ZERO + DO WHILE ( I.LE.N ) + IF( I.LT.N .AND. IPIV(I).LT.0 ) THEN + E( I ) = A( I+1, I ) + E( I+1 ) = ZERO + A( I+1, I ) = ZERO + I = I + 1 + ELSE + E( I ) = ZERO + END IF + I = I + 1 + END DO +* +* Convert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in factorization order where i increases from 1 to N +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i and IPIV(i) and i+1 and IPIV(i+1) +* in A(i:N,1:i-1) +* + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + IF( IP2.NE.(I+1) ) THEN + CALL ZSWAP( I-1, A( I+1, 1 ), LDA, + $ A( IP2, 1 ), LDA ) + END IF + END IF + I = I + 1 +* + END IF + I = I + 1 + END DO +* + ELSE +* +* Revert A (A is lower) +* +* +* Revert PERMUTATIONS +* +* Apply permutaions to submatrices of lower part of A +* in reverse factorization order where i decreases from N to 1 +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN +* +* 1-by-1 pivot interchange +* +* Swap rows i and IPIV(i) in A(i:N,1:i-1) +* + IP = IPIV( I ) + IF ( I.GT.1 ) THEN + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot interchange +* +* Swap rows i+1 and IPIV(i+1) and i and IPIV(i) +* in A(i:N,1:i-1) +* + I = I - 1 + IP = -IPIV( I ) + IP2 = -IPIV( I+1 ) + IF ( I.GT.1 ) THEN + IF( IP2.NE.(I+1) ) THEN + CALL ZSWAP( I-1, A( IP2, 1 ), LDA, + $ A( I+1, 1 ), LDA ) + END IF + IF( IP.NE.I ) THEN + CALL ZSWAP( I-1, A( IP, 1 ), LDA, + $ A( I, 1 ), LDA ) + END IF + END IF +* + END IF + I = I - 1 + END DO +* +* Revert VALUE +* Assign subdiagonal entries of D from array E to +* subgiagonal entries of A. +* + I = 1 + DO WHILE ( I.LE.N-1 ) + IF( IPIV( I ).LT.0 ) THEN + A( I + 1, I ) = E( I ) + I = I + 1 + END IF + I = I + 1 + END DO +* + END IF +* +* End A is LOWER +* + END IF + + RETURN +* +* End of ZSYCONVF_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsyequb.f b/dspl/liblapack/SRC/zsyequb.f new file mode 100644 index 0000000..1cf4119 --- /dev/null +++ b/dspl/liblapack/SRC/zsyequb.f @@ -0,0 +1,343 @@ +*> \brief \b ZSYEQUB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYEQUB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, N +* DOUBLE PRECISION AMAX, SCOND +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), WORK( * ) +* DOUBLE PRECISION S( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYEQUB computes row and column scalings intended to equilibrate a +*> symmetric matrix A (with respect to the Euclidean norm) and reduce +*> its condition number. The scale factors S are computed by the BIN +*> algorithm (see references) so that the scaled matrix B with elements +*> B(i,j) = S(i)*A(i,j)*S(j) has a condition number within a factor N of +*> the smallest possible condition number over all possible diagonal +*> scalings. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The N-by-N symmetric matrix whose scaling factors are to be +*> computed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, S contains the scale factors for A. +*> \endverbatim +*> +*> \param[out] SCOND +*> \verbatim +*> SCOND is DOUBLE PRECISION +*> If INFO = 0, S contains the ratio of the smallest S(i) to +*> the largest S(i). If SCOND >= 0.1 and AMAX is neither too +*> large nor too small, it is not worth scaling by S. +*> \endverbatim +*> +*> \param[out] AMAX +*> \verbatim +*> AMAX is DOUBLE PRECISION +*> Largest absolute value of any matrix element. If AMAX is +*> very close to overflow or very close to underflow, the +*> matrix should be scaled. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element is nonpositive. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par References: +* ================ +*> +*> Livne, O.E. and Golub, G.H., "Scaling by Binormalization", \n +*> Numerical Algorithms, vol. 35, no. 1, pp. 97-120, January 2004. \n +*> DOI 10.1023/B:NUMA.0000016606.32820.69 \n +*> Tech report version: http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.3.1679 +*> +* ===================================================================== + SUBROUTINE ZSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, N + DOUBLE PRECISION AMAX, SCOND + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), WORK( * ) + DOUBLE PRECISION S( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 ) + INTEGER MAX_ITER + PARAMETER ( MAX_ITER = 100 ) +* .. +* .. Local Scalars .. + INTEGER I, J, ITER + DOUBLE PRECISION AVG, STD, TOL, C0, C1, C2, T, U, SI, D, BASE, + $ SMIN, SMAX, SMLNUM, BIGNUM, SCALE, SUMSQ + LOGICAL UP + COMPLEX*16 ZDUM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + LOGICAL LSAME + EXTERNAL DLAMCH, LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, INT, LOG, MAX, MIN, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function Definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF ( .NOT. ( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -1 + ELSE IF ( N .LT. 0 ) THEN + INFO = -2 + ELSE IF ( LDA .LT. MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF ( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZSYEQUB', -INFO ) + RETURN + END IF + + UP = LSAME( UPLO, 'U' ) + AMAX = ZERO +* +* Quick return if possible. +* + IF ( N .EQ. 0 ) THEN + SCOND = ONE + RETURN + END IF + + DO I = 1, N + S( I ) = ZERO + END DO + + AMAX = ZERO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + END DO + ELSE + DO J = 1, N + S( J ) = MAX( S( J ), CABS1( A( J, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( J, J ) ) ) + DO I = J+1, N + S( I ) = MAX( S( I ), CABS1( A( I, J ) ) ) + S( J ) = MAX( S( J ), CABS1( A( I, J ) ) ) + AMAX = MAX( AMAX, CABS1( A( I, J ) ) ) + END DO + END DO + END IF + DO J = 1, N + S( J ) = 1.0D0 / S( J ) + END DO + + TOL = ONE / SQRT( 2.0D0 * N ) + + DO ITER = 1, MAX_ITER + SCALE = 0.0D0 + SUMSQ = 0.0D0 +* beta = |A|s + DO I = 1, N + WORK( I ) = ZERO + END DO + IF ( UP ) THEN + DO J = 1, N + DO I = 1, J-1 + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + END DO + ELSE + DO J = 1, N + WORK( J ) = WORK( J ) + CABS1( A( J, J ) ) * S( J ) + DO I = J+1, N + WORK( I ) = WORK( I ) + CABS1( A( I, J ) ) * S( J ) + WORK( J ) = WORK( J ) + CABS1( A( I, J ) ) * S( I ) + END DO + END DO + END IF + +* avg = s^T beta / n + AVG = 0.0D0 + DO I = 1, N + AVG = AVG + S( I )*WORK( I ) + END DO + AVG = AVG / N + + STD = 0.0D0 + DO I = N+1, 2*N + WORK( I ) = S( I-N ) * WORK( I-N ) - AVG + END DO + CALL ZLASSQ( N, WORK( N+1 ), 1, SCALE, SUMSQ ) + STD = SCALE * SQRT( SUMSQ / N ) + + IF ( STD .LT. TOL * AVG ) GOTO 999 + + DO I = 1, N + T = CABS1( A( I, I ) ) + SI = S( I ) + C2 = ( N-1 ) * T + C1 = ( N-2 ) * ( WORK( I ) - T*SI ) + C0 = -(T*SI)*SI + 2*WORK( I )*SI - N*AVG + D = C1*C1 - 4*C0*C2 + + IF ( D .LE. 0 ) THEN + INFO = -1 + RETURN + END IF + SI = -2*C0 / ( C1 + SQRT( D ) ) + + D = SI - S( I ) + U = ZERO + IF ( UP ) THEN + DO J = 1, I + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + ELSE + DO J = 1, I + T = CABS1( A( I, J ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + DO J = I+1,N + T = CABS1( A( J, I ) ) + U = U + S( J )*T + WORK( J ) = WORK( J ) + D*T + END DO + END IF + + AVG = AVG + ( U + WORK( I ) ) * D / N + S( I ) = SI + END DO + END DO + + 999 CONTINUE + + SMLNUM = DLAMCH( 'SAFEMIN' ) + BIGNUM = ONE / SMLNUM + SMIN = BIGNUM + SMAX = ZERO + T = ONE / SQRT( AVG ) + BASE = DLAMCH( 'B' ) + U = ONE / LOG( BASE ) + DO I = 1, N + S( I ) = BASE ** INT( U * LOG( S( I ) * T ) ) + SMIN = MIN( SMIN, S( I ) ) + SMAX = MAX( SMAX, S( I ) ) + END DO + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) +* + END diff --git a/dspl/liblapack/SRC/zsymv.f b/dspl/liblapack/SRC/zsymv.f new file mode 100644 index 0000000..926a05f --- /dev/null +++ b/dspl/liblapack/SRC/zsymv.f @@ -0,0 +1,343 @@ +*> \brief \b ZSYMV computes a matrix-vector product for a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYMV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, INCY, LDA, N +* COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, N ) +*> Before entry, with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. +*> Before entry, with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, N ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + SUBROUTINE ZSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, INCY, LDA, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ), Y( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, IY, J, JX, JY, KX, KY + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 5 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 7 + ELSE IF( INCY.EQ.0 ) THEN + INFO = 10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYMV ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) ) + $ RETURN +* +* Set up the start points in X and Y. +* + IF( INCX.GT.0 ) THEN + KX = 1 + ELSE + KX = 1 - ( N-1 )*INCX + END IF + IF( INCY.GT.0 ) THEN + KY = 1 + ELSE + KY = 1 - ( N-1 )*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF( BETA.NE.ONE ) THEN + IF( INCY.EQ.1 ) THEN + IF( BETA.EQ.ZERO ) THEN + DO 10 I = 1, N + Y( I ) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1, N + Y( I ) = BETA*Y( I ) + 20 CONTINUE + END IF + ELSE + IY = KY + IF( BETA.EQ.ZERO ) THEN + DO 30 I = 1, N + Y( IY ) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1, N + Y( IY ) = BETA*Y( IY ) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF( ALPHA.EQ.ZERO ) + $ RETURN + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form y when A is stored in upper triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 60 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + DO 50 I = 1, J - 1 + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 50 CONTINUE + Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1, J - 1 + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN + DO 100 J = 1, N + TEMP1 = ALPHA*X( J ) + TEMP2 = ZERO + Y( J ) = Y( J ) + TEMP1*A( J, J ) + DO 90 I = J + 1, N + Y( I ) = Y( I ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( I ) + 90 CONTINUE + Y( J ) = Y( J ) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1, N + TEMP1 = ALPHA*X( JX ) + TEMP2 = ZERO + Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + IX = JX + IY = JY + DO 110 I = J + 1, N + IX = IX + INCX + IY = IY + INCY + Y( IY ) = Y( IY ) + TEMP1*A( I, J ) + TEMP2 = TEMP2 + A( I, J )*X( IX ) + 110 CONTINUE + Y( JY ) = Y( JY ) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYMV +* + END diff --git a/dspl/liblapack/SRC/zsyr.f b/dspl/liblapack/SRC/zsyr.f new file mode 100644 index 0000000..2bed663 --- /dev/null +++ b/dspl/liblapack/SRC/zsyr.f @@ -0,0 +1,268 @@ +*> \brief \b ZSYR performs the symmetric rank-1 update of a complex symmetric matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INCX, LDA, N +* COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYR performs the symmetric rank 1 operation +*> +*> A := alpha*x*x**H + A, +*> +*> where alpha is a complex scalar, x is an n element vector and A is an +*> n by n symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension at least +*> ( 1 + ( N - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the N- +*> element vector x. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, N ) +*> Before entry, with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the symmetric matrix and the strictly +*> lower triangular part of A is not referenced. On exit, the +*> upper triangular part of the array A is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry, with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the symmetric matrix and the strictly +*> upper triangular part of A is not referenced. On exit, the +*> lower triangular part of the array A is overwritten by the +*> lower triangular part of the updated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, N ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + SUBROUTINE ZSYR( UPLO, N, ALPHA, X, INCX, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INCX, LDA, N + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, IX, J, JX, KX + COMPLEX*16 TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = 1 + ELSE IF( N.LT.0 ) THEN + INFO = 2 + ELSE IF( INCX.EQ.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = 7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYR ', INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) ) + $ RETURN +* +* Set the start point in X if the increment is not unity. +* + IF( INCX.LE.0 ) THEN + KX = 1 - ( N-1 )*INCX + ELSE IF( INCX.NE.1 ) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Form A when A is stored in upper triangle. +* + IF( INCX.EQ.1 ) THEN + DO 20 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 10 I = 1, J + A( I, J ) = A( I, J ) + X( I )*TEMP + 10 CONTINUE + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = KX + DO 30 I = 1, J + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in lower triangle. +* + IF( INCX.EQ.1 ) THEN + DO 60 J = 1, N + IF( X( J ).NE.ZERO ) THEN + TEMP = ALPHA*X( J ) + DO 50 I = J, N + A( I, J ) = A( I, J ) + X( I )*TEMP + 50 CONTINUE + END IF + 60 CONTINUE + ELSE + JX = KX + DO 80 J = 1, N + IF( X( JX ).NE.ZERO ) THEN + TEMP = ALPHA*X( JX ) + IX = JX + DO 70 I = J, N + A( I, J ) = A( I, J ) + X( IX )*TEMP + IX = IX + INCX + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZSYR +* + END diff --git a/dspl/liblapack/SRC/zsyrfs.f b/dspl/liblapack/SRC/zsyrfs.f new file mode 100644 index 0000000..c114f40 --- /dev/null +++ b/dspl/liblapack/SRC/zsyrfs.f @@ -0,0 +1,446 @@ +*> \brief \b ZSYRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, +* X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYRFS improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the solution. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or +*> A = L*D*L**T as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by ZSYTRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> ITMAX is the maximum number of steps of iterative refinement. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, + $ X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER ITMAX + PARAMETER ( ITMAX = 5 ) + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWO + PARAMETER ( TWO = 2.0D+0 ) + DOUBLE PRECISION THREE + PARAMETER ( THREE = 3.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER COUNT, I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACN2, ZSYMV, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 140 J = 1, NRHS +* + COUNT = 1 + LSTRES = THREE + 20 CONTINUE +* +* Loop until stopping criterion is satisfied. +* +* Compute residual R = B - A * X +* + CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 30 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 30 CONTINUE +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + DO 50 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + DO 40 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 40 CONTINUE + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S + 50 CONTINUE + ELSE + DO 70 K = 1, N + S = ZERO + XK = CABS1( X( K, J ) ) + RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + DO 60 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 60 CONTINUE + RWORK( K ) = RWORK( K ) + S + 70 CONTINUE + END IF + S = ZERO + DO 80 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 80 CONTINUE + BERR( J ) = S +* +* Test stopping criterion. Continue iterating if +* 1) The residual BERR(J) is larger than machine epsilon, and +* 2) BERR(J) decreased by at least a factor of 2 during the +* last iteration, and +* 3) At most ITMAX iterations tried. +* + IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND. + $ COUNT.LE.ITMAX ) THEN +* +* Update solution and try again. +* + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 ) + LSTRES = BERR( J ) + COUNT = COUNT + 1 + GO TO 20 + END IF +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(A))* +* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(A) is the inverse of A +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(A)*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(A) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) +* + DO 90 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 90 CONTINUE +* + KASE = 0 + 100 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(A**T). +* + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + DO 110 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 110 CONTINUE + ELSE IF( KASE.EQ.2 ) THEN +* +* Multiply by inv(A)*diag(W). +* + DO 120 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 120 CONTINUE + CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO ) + END IF + GO TO 100 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 130 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 130 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 140 CONTINUE +* + RETURN +* +* End of ZSYRFS +* + END diff --git a/dspl/liblapack/SRC/zsyrfsx.f b/dspl/liblapack/SRC/zsyrfsx.f new file mode 100644 index 0000000..3420d70 --- /dev/null +++ b/dspl/liblapack/SRC/zsyrfsx.f @@ -0,0 +1,703 @@ +*> \brief \b ZSYRFSX +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYRFSX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, +* S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, +* ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, +* WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO, EQUED +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYRFSX improves the computed solution to a system of linear +*> equations when the coefficient matrix is symmetric indefinite, and +*> provides error bounds and backward error estimates for the +*> solution. In addition to normwise error bound, the code provides +*> maximum componentwise error bound if possible. See comments for +*> ERR_BNDS_NORM and ERR_BNDS_COMP for details of the error bounds. +*> +*> The original system of linear equations may have been equilibrated +*> before calling this routine, as described by arguments EQUED and S +*> below. In this case, the solution and error bounds returned are +*> for the original unequilibrated system. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done to A +*> before calling this routine. This is needed to compute +*> the solution and error bounds correctly. +*> = 'N': No equilibration +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> The right hand side B has been changed accordingly. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> The factored form of the matrix A. AF contains the block +*> diagonal matrix D and the multipliers used to obtain the +*> factor U or L from the factorization A = U*D*U**T or A = +*> L*D*L**T as computed by DSYTRF. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> On entry, the solution matrix X, as computed by DGETRS. +*> On exit, the improved solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the double-precision refinement algorithm, +*> possibly with doubled-single computations if the +*> compilation environment does not support DOUBLE +*> PRECISION. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, + $ ERR_BNDS_NORM, ERR_BNDS_COMP, NPARAMS, PARAMS, + $ WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO, EQUED + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), RWORK( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION ITREF_DEFAULT, ITHRESH_DEFAULT + DOUBLE PRECISION COMPONENTWISE_DEFAULT, RTHRESH_DEFAULT + DOUBLE PRECISION DZTHRESH_DEFAULT + PARAMETER ( ITREF_DEFAULT = 1.0D+0 ) + PARAMETER ( ITHRESH_DEFAULT = 10.0D+0 ) + PARAMETER ( COMPONENTWISE_DEFAULT = 1.0D+0 ) + PARAMETER ( RTHRESH_DEFAULT = 0.5D+0 ) + PARAMETER ( DZTHRESH_DEFAULT = 0.25D+0 ) + INTEGER LA_LINRX_ITREF_I, LA_LINRX_ITHRESH_I, + $ LA_LINRX_CWISE_I + PARAMETER ( LA_LINRX_ITREF_I = 1, + $ LA_LINRX_ITHRESH_I = 2 ) + PARAMETER ( LA_LINRX_CWISE_I = 3 ) + INTEGER LA_LINRX_TRUST_I, LA_LINRX_ERR_I, + $ LA_LINRX_RCOND_I + PARAMETER ( LA_LINRX_TRUST_I = 1, LA_LINRX_ERR_I = 2 ) + PARAMETER ( LA_LINRX_RCOND_I = 3 ) +* .. +* .. Local Scalars .. + CHARACTER(1) NORM + LOGICAL RCEQU + INTEGER J, PREC_TYPE, REF_TYPE + INTEGER N_NORMS + DOUBLE PRECISION ANORM, RCOND_TMP + DOUBLE PRECISION ILLRCOND_THRESH, ERR_LBND, CWISE_WRONG + LOGICAL IGNORE_CWISE + INTEGER ITHRESH + DOUBLE PRECISION RTHRESH, UNSTABLE_THRESH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYCON, ZLA_SYRFSX_EXTENDED +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT, TRANSFER +* .. +* .. External Functions .. + EXTERNAL LSAME, ILAPREC + EXTERNAL DLAMCH, ZLANSY, ZLA_SYRCOND_X, ZLA_SYRCOND_C + DOUBLE PRECISION DLAMCH, ZLANSY, ZLA_SYRCOND_X, ZLA_SYRCOND_C + LOGICAL LSAME + INTEGER ILAPREC +* .. +* .. Executable Statements .. +* +* Check the input parameters. +* + INFO = 0 + REF_TYPE = INT( ITREF_DEFAULT ) + IF ( NPARAMS .GE. LA_LINRX_ITREF_I ) THEN + IF ( PARAMS( LA_LINRX_ITREF_I ) .LT. 0.0D+0 ) THEN + PARAMS( LA_LINRX_ITREF_I ) = ITREF_DEFAULT + ELSE + REF_TYPE = PARAMS( LA_LINRX_ITREF_I ) + END IF + END IF +* +* Set default parameters. +* + ILLRCOND_THRESH = DBLE( N ) * DLAMCH( 'Epsilon' ) + ITHRESH = INT( ITHRESH_DEFAULT ) + RTHRESH = RTHRESH_DEFAULT + UNSTABLE_THRESH = DZTHRESH_DEFAULT + IGNORE_CWISE = COMPONENTWISE_DEFAULT .EQ. 0.0D+0 +* + IF ( NPARAMS.GE.LA_LINRX_ITHRESH_I ) THEN + IF ( PARAMS( LA_LINRX_ITHRESH_I ).LT.0.0D+0 ) THEN + PARAMS( LA_LINRX_ITHRESH_I ) = ITHRESH + ELSE + ITHRESH = INT( PARAMS( LA_LINRX_ITHRESH_I ) ) + END IF + END IF + IF ( NPARAMS.GE.LA_LINRX_CWISE_I ) THEN + IF ( PARAMS( LA_LINRX_CWISE_I ).LT.0.0D+0 ) THEN + IF ( IGNORE_CWISE ) THEN + PARAMS( LA_LINRX_CWISE_I ) = 0.0D+0 + ELSE + PARAMS( LA_LINRX_CWISE_I ) = 1.0D+0 + END IF + ELSE + IGNORE_CWISE = PARAMS( LA_LINRX_CWISE_I ) .EQ. 0.0D+0 + END IF + END IF + IF ( REF_TYPE .EQ. 0 .OR. N_ERR_BNDS .EQ. 0 ) THEN + N_NORMS = 0 + ELSE IF ( IGNORE_CWISE ) THEN + N_NORMS = 1 + ELSE + N_NORMS = 2 + END IF +* + RCEQU = LSAME( EQUED, 'Y' ) +* +* Test input parameters. +* + IF ( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.RCEQU .AND. .NOT.LSAME( EQUED, 'N' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -12 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -14 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYRFSX', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + RCOND = 1.0D+0 + DO J = 1, NRHS + BERR( J ) = 0.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 0.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 1.0D+0 + END IF + END DO + RETURN + END IF +* +* Default to failure. +* + RCOND = 0.0D+0 + DO J = 1, NRHS + BERR( J ) = 1.0D+0 + IF ( N_ERR_BNDS .GE. 1 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 2 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + END IF + IF ( N_ERR_BNDS .GE. 3 ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = 0.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = 0.0D+0 + END IF + END DO +* +* Compute the norm of A and the reciprocal of the condition +* number of A. +* + NORM = 'I' + ANORM = ZLANSY( NORM, UPLO, N, A, LDA, RWORK ) + CALL ZSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, + $ INFO ) +* +* Perform refinement on each right-hand side +* + IF ( REF_TYPE .NE. 0 ) THEN + + PREC_TYPE = ILAPREC( 'E' ) + + CALL ZLA_SYRFSX_EXTENDED( PREC_TYPE, UPLO, N, + $ NRHS, A, LDA, AF, LDAF, IPIV, RCEQU, S, B, + $ LDB, X, LDX, BERR, N_NORMS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ WORK, RWORK, WORK(N+1), + $ TRANSFER (RWORK(1:2*N), (/ (ZERO, ZERO) /), N), RCOND, + $ ITHRESH, RTHRESH, UNSTABLE_THRESH, IGNORE_CWISE, + $ INFO ) + END IF + + ERR_LBND = MAX( 10.0D+0, SQRT( DBLE( N ) ) ) * DLAMCH( 'Epsilon' ) + IF (N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 1) THEN +* +* Compute scaled normwise condition number cond(A*C). +* + IF ( RCEQU ) THEN + RCOND_TMP = ZLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .TRUE., INFO, WORK, RWORK ) + ELSE + RCOND_TMP = ZLA_SYRCOND_C( UPLO, N, A, LDA, AF, LDAF, IPIV, + $ S, .FALSE., INFO, WORK, RWORK ) + END IF + DO J = 1, NRHS +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF ( INFO .LE. N ) INFO = N + J + ELSE IF ( ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) .LT. ERR_LBND ) + $ THEN + ERR_BNDS_NORM( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_NORM( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_NORM( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + END DO + END IF + + IF ( N_ERR_BNDS .GE. 1 .AND. N_NORMS .GE. 2 ) THEN +* +* Compute componentwise condition number cond(A*diag(Y(:,J))) for +* each right-hand side using the current solution as an estimate of +* the true solution. If the componentwise error estimate is too +* large, then the solution is a lousy estimate of truth and the +* estimated RCOND may be too optimistic. To avoid misleading users, +* the inverse condition number is set to 0.0 when the estimated +* cwise error is at least CWISE_WRONG. +* + CWISE_WRONG = SQRT( DLAMCH( 'Epsilon' ) ) + DO J = 1, NRHS + IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .LT. CWISE_WRONG ) + $ THEN + RCOND_TMP = ZLA_SYRCOND_X( UPLO, N, A, LDA, AF, LDAF, + $ IPIV, X(1,J), INFO, WORK, RWORK ) + ELSE + RCOND_TMP = 0.0D+0 + END IF +* +* Cap the error at 1.0. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_ERR_I + $ .AND. ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) .GT. 1.0D+0 ) + $ ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + +* +* Threshold the error (see LAWN). +* + IF ( RCOND_TMP .LT. ILLRCOND_THRESH ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = 1.0D+0 + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 0.0D+0 + IF (.NOT. IGNORE_CWISE + $ .AND. INFO.LT.N + J ) INFO = N + J + ELSE IF ( ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) + $ .LT. ERR_LBND ) THEN + ERR_BNDS_COMP( J, LA_LINRX_ERR_I ) = ERR_LBND + ERR_BNDS_COMP( J, LA_LINRX_TRUST_I ) = 1.0D+0 + END IF +* +* Save the condition number. +* + IF ( N_ERR_BNDS .GE. LA_LINRX_RCOND_I ) THEN + ERR_BNDS_COMP( J, LA_LINRX_RCOND_I ) = RCOND_TMP + END IF + + END DO + END IF +* + RETURN +* +* End of ZSYRFSX +* + END diff --git a/dspl/liblapack/SRC/zsysv.f b/dspl/liblapack/SRC/zsysv.f new file mode 100644 index 0000000..2ff196c --- /dev/null +++ b/dspl/liblapack/SRC/zsysv.f @@ -0,0 +1,270 @@ +*> \brief ZSYSV computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. The factored form of A is then +*> used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, as +*> determined by ZSYTRF. If IPIV(k) > 0, then rows and columns +*> k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1 +*> diagonal block. If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, +*> then rows and columns k-1 and -IPIV(k) were interchanged and +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. If UPLO = 'L' and +*> IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and +*> -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2 +*> diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> ZSYTRF. +*> for LWORK < N, TRS will be done with Level BLAS 2 +*> for LWORK >= N, TRS will be done with Level BLAS 3 +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYsolve +* +* ===================================================================== + SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF, ZSYTRS, ZSYTRS2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + IF ( LWORK.LT.N ) THEN +* +* Solve with TRS ( Use Level BLAS 2) +* + CALL ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + ELSE +* +* Solve with TRS2 ( Use Level BLAS 3) +* + CALL ZSYTRS2( UPLO,N,NRHS,A,LDA,IPIV,B,LDB,WORK,INFO ) +* + END IF +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV +* + END diff --git a/dspl/liblapack/SRC/zsysv_aa.f b/dspl/liblapack/SRC/zsysv_aa.f new file mode 100644 index 0000000..10693c7 --- /dev/null +++ b/dspl/liblapack/SRC/zsysv_aa.f @@ -0,0 +1,254 @@ +*> \brief ZSYSV_AA computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV computes the solution to a complex system of linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's algorithm is used to factor A as +*> A = U * T * U**T, if UPLO = 'U', or +*> A = L * T * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric tridiagonal. The factored +*> form of A is then used to solve the system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the tridiagonal matrix T and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*T*U**T or A = L*T*L**T as computed by +*> ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= MAX(1,2*N,3*N-2), and for +*> the best performance, LWORK >= MAX(1,N*NB), where NB is +*> the optimal blocksize for ZSYTRF_AA. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYsolve +* +* ===================================================================== + SUBROUTINE ZSYSV_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT, LWKOPT_SYTRF, LWKOPT_SYTRS +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL ILAENV, LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF_AA, ZSYTRS_AA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT_SYTRF = INT( WORK(1) ) + CALL ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ -1, INFO ) + LWKOPT_SYTRS = INT( WORK(1) ) + LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) + WORK( 1 ) = LWKOPT + IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_AA ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*T*U**T or A = L*T*L**T. +* + CALL ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV_AA +* + END diff --git a/dspl/liblapack/SRC/zsysv_aa_2stage.f b/dspl/liblapack/SRC/zsysv_aa_2stage.f new file mode 100644 index 0000000..fcf9bc8 --- /dev/null +++ b/dspl/liblapack/SRC/zsysv_aa_2stage.f @@ -0,0 +1,276 @@ +*> \brief ZSYSV_AA_2STAGE computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, +* IPIV, IPIV2, B, LDB, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, *), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV_AA_2STAGE computes the solution to a complex system of +*> linear equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Aasen's 2-stage algorithm is used to factor A as +*> A = U * T * U**H, if UPLO = 'U', or +*> A = L * T * L**H, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is symmetric and band. The matrix T is +*> then LU-factored with partial pivoting. The factored form of A +*> is then used to solve the system of equations A * X = B. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYSV_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF_AA_2STAGE, + $ ZSYTRS_AA_2STAGE +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, + $ IPIV2, WORK, -1, INFO ) + LWKOPT = INT( WORK(1) ) + IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN + INFO = -7 + ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN + INFO = -13 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_AA_2STAGE', -INFO ) + RETURN + ELSE IF( WQUERY .OR. TQUERY ) THEN + RETURN + END IF +* +* +* Compute the factorization A = U*T*U**H or A = L*T*L**H. +* + CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, + $ WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, + $ IPIV2, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* +* End of ZSYSV_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zsysv_rk.f b/dspl/liblapack/SRC/zsysv_rk.f new file mode 100644 index 0000000..8cc79a7 --- /dev/null +++ b/dspl/liblapack/SRC/zsysv_rk.f @@ -0,0 +1,317 @@ +*> \brief ZSYSV_RK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYSV_RK computes the solution to a complex system of linear +*> equations A * X = B, where A is an N-by-N symmetric matrix +*> and X and B are N-by-NRHS matrices. +*> +*> The bounded Bunch-Kaufman (rook) diagonal pivoting method is used +*> to factor A as +*> A = P*U*D*(U**T)*(P**T), if UPLO = 'U', or +*> A = P*L*D*(L**T)*(P**T), if UPLO = 'L', +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZSYTRF_RK is called to compute the factorization of a complex +*> symmetric matrix. The factored form of A is then used to solve +*> the system of equations A * X = B by calling BLAS3 routine ZSYTRS_3. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, if INFO = 0, diagonal of the block diagonal +*> matrix D and factors U or L as computed by ZSYTRF_RK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> For more info see the description of ZSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the output computed by the factorization +*> routine ZSYTRF_RK, i.e. the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> +*> For more info see the description of ZSYTRF_RK routine. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by ZSYTRF_RK. +*> +*> For more info see the description of ZSYTRF_RK routine. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). +*> Work array used in the factorization stage. +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1. For best performance +*> of factorization stage LWORK >= max(1,N*NB), where NB is +*> the optimal blocksize for ZSYTRF_RK. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array for factorization stage, returns this value as +*> the first entry of the WORK array, and no error message +*> related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYSV_RK( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF_RK, ZSYTRS_3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL ZSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_RK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = P*U*D*(U**T)*(P**T) or +* A = P*U*D*(U**T)*(P**T). +* + CALL ZSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, INFO ) +* + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B with BLAS3 solver, overwriting B with X. +* + CALL ZSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV_RK +* + END diff --git a/dspl/liblapack/SRC/zsysv_rook.f b/dspl/liblapack/SRC/zsysv_rook.f new file mode 100644 index 0000000..f1448e9 --- /dev/null +++ b/dspl/liblapack/SRC/zsysv_rook.f @@ -0,0 +1,293 @@ +*> \brief ZSYSV_ROOK computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSV_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSV_ROOK computes the solution to a complex system of linear +*> equations +*> A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> The diagonal pivoting method is used to factor A as +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZSYTRF_ROOK is called to compute the factorization of a complex +*> symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal +*> pivoting method. +*> +*> The factored form of A is then used to solve the system +*> of equations A * X = B by calling ZSYTRS_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if INFO = 0, the block diagonal matrix D and the +*> multipliers used to obtain the factor U or L from the +*> factorization A = U*D*U**T or A = L*D*L**T as computed by +*> ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D, +*> as determined by ZSYTRF_ROOK. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= 1, and for best performance +*> LWORK >= max(1,N*NB), where NB is the optimal blocksize for +*> ZSYTRF_ROOK. +*> +*> TRS will be done with Level 2 BLAS +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYsolve +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYSV_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, LWORK, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSYTRF_ROOK, ZSYTRS_ROOK +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + CALL ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, -1, INFO ) + LWKOPT = WORK(1) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSV_ROOK ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* +* Solve with TRS_ROOK ( Use Level 2 BLAS) +* + CALL ZSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSV_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsysvx.f b/dspl/liblapack/SRC/zsysvx.f new file mode 100644 index 0000000..ebebe2e --- /dev/null +++ b/dspl/liblapack/SRC/zsysvx.f @@ -0,0 +1,417 @@ +*> \brief ZSYSVX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSVX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, +* LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSVX uses the diagonal pivoting factorization to compute the +*> solution to a complex system of linear equations A * X = B, +*> where A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> Error bounds on the solution and a condition estimate are also +*> provided. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'N', the diagonal pivoting method is used to factor A. +*> The form of the factorization is +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 2. If some D(i,i)=0, so that D is exactly singular, then the routine +*> returns with INFO = i. Otherwise, the factored form of A is used +*> to estimate the condition number of the matrix A. If the +*> reciprocal of the condition number is less than machine precision, +*> INFO = N+1 is returned as a warning, but the routine still goes on +*> to solve for X and compute error bounds as described below. +*> +*> 3. The system of equations is solved for X using the factored form +*> of A. +*> +*> 4. Iterative refinement is applied to improve the computed solution +*> matrix and calculate error bounds and backward error estimates +*> for it. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of A has been +*> supplied on entry. +*> = 'F': On entry, AF and IPIV contain the factored form +*> of A. A, AF and IPIV will not be modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T as computed by ZSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L from the factorization +*> A = U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block structure +*> of D, as determined by ZSYTRF. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block structure +*> of D, as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The N-by-NRHS right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The estimate of the reciprocal condition number of the matrix +*> A. If RCOND is less than the machine precision (in +*> particular, if RCOND = 0), the matrix is singular to working +*> precision. This condition is indicated by a return code of +*> INFO > 0. +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= max(1,2*N), and for best +*> performance, when FACT = 'N', LWORK >= max(1,2*N,N*NB), where +*> NB is the optimal blocksize for ZSYTRF. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, and i is +*> <= N: D(i,i) is exactly zero. The factorization +*> has been completed but the factor D is exactly +*> singular, so the solution and error bounds could +*> not be computed. RCOND = 0 is returned. +*> = N+1: D is nonsingular, but RCOND is less than machine +*> precision, meaning that the matrix is singular +*> to working precision. Nevertheless, the +*> solution and error bounds are computed because +*> there are a number of situations where the +*> computed solution can be more accurate than the +*> value of RCOND would suggest. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16SYsolve +* +* ===================================================================== + SUBROUTINE ZSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, + $ LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK, + $ RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOFACT + INTEGER LWKOPT, NB + DOUBLE PRECISION ANORM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANSY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACPY, ZSYCON, ZSYRFS, ZSYTRF, ZSYTRS +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) + $ THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = MAX( 1, 2*N ) + IF( NOFACT ) THEN + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKOPT, N*NB ) + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSVX', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + IF( NOFACT ) THEN +* +* Compute the factorization A = U*D*U**T or A = L*D*L**T. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 )THEN + RCOND = ZERO + RETURN + END IF + END IF +* +* Compute the norm of the matrix A. +* + ANORM = ZLANSY( 'I', UPLO, N, A, LDA, RWORK ) +* +* Compute the reciprocal of the condition number of A. +* + CALL ZSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO ) +* +* Compute the solution vectors X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solutions and +* compute error bounds and backward error estimates for them. +* + CALL ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* Set INFO = N+1 if the matrix is singular to working precision. +* + IF( RCOND.LT.DLAMCH( 'Epsilon' ) ) + $ INFO = N + 1 +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYSVX +* + END diff --git a/dspl/liblapack/SRC/zsysvxx.f b/dspl/liblapack/SRC/zsysvxx.f new file mode 100644 index 0000000..ef44d09 --- /dev/null +++ b/dspl/liblapack/SRC/zsysvxx.f @@ -0,0 +1,698 @@ +*> \brief ZSYSVXX computes the solution to system of linear equations A * X = B for SY matrices +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSVXX + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, +* EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, +* N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, +* NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER EQUED, FACT, UPLO +* INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, +* $ N_ERR_BNDS +* DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), +* $ X( LDX, * ), WORK( * ) +* DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), +* $ ERR_BNDS_NORM( NRHS, * ), +* $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSVXX uses the diagonal pivoting factorization to compute the +*> solution to a complex*16 system of linear equations A * X = B, where +*> A is an N-by-N symmetric matrix and X and B are N-by-NRHS +*> matrices. +*> +*> If requested, both normwise and maximum componentwise error bounds +*> are returned. ZSYSVXX will return a solution with a tiny +*> guaranteed error (O(eps) where eps is the working machine +*> precision) unless the matrix is very ill-conditioned, in which +*> case a warning is returned. Relevant condition numbers also are +*> calculated and returned. +*> +*> ZSYSVXX accepts user-provided factorizations and equilibration +*> factors; see the definitions of the FACT and EQUED options. +*> Solving with refinement and using a factorization from a previous +*> ZSYSVXX call will also produce a solution with either O(eps) +*> errors or warnings, but we cannot make that claim for general +*> user-provided factorizations and equilibration factors if they +*> differ from what ZSYSVXX would itself produce. +*> \endverbatim +* +*> \par Description: +* ================= +*> +*> \verbatim +*> +*> The following steps are performed: +*> +*> 1. If FACT = 'E', double precision scaling factors are computed to equilibrate +*> the system: +*> +*> diag(S)*A*diag(S) *inv(diag(S))*X = diag(S)*B +*> +*> Whether or not the system will be equilibrated depends on the +*> scaling of the matrix A, but if equilibration is used, A is +*> overwritten by diag(S)*A*diag(S) and B by diag(S)*B. +*> +*> 2. If FACT = 'N' or 'E', the LU decomposition is used to factor +*> the matrix A (after equilibration if FACT = 'E') as +*> +*> A = U * D * U**T, if UPLO = 'U', or +*> A = L * D * L**T, if UPLO = 'L', +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> 3. If some D(i,i)=0, so that D is exactly singular, then the +*> routine returns with INFO = i. Otherwise, the factored form of A +*> is used to estimate the condition number of the matrix A (see +*> argument RCOND). If the reciprocal of the condition number is +*> less than machine precision, the routine still goes on to solve +*> for X and compute error bounds as described below. +*> +*> 4. The system of equations is solved for X using the factored form +*> of A. +*> +*> 5. By default (unless PARAMS(LA_LINRX_ITREF_I) is set to zero), +*> the routine will use iterative refinement to try to get a small +*> error and error bounds. Refinement calculates the residual to at +*> least twice the working precision. +*> +*> 6. If equilibration was used, the matrix X is premultiplied by +*> diag(R) so that it solves the original system before +*> equilibration. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \verbatim +*> Some optional parameters are bundled in the PARAMS array. These +*> settings determine how refinement is performed, but often the +*> defaults are acceptable. If the defaults are acceptable, users +*> can pass NPARAMS = 0 which prevents the source code from accessing +*> the PARAMS argument. +*> \endverbatim +*> +*> \param[in] FACT +*> \verbatim +*> FACT is CHARACTER*1 +*> Specifies whether or not the factored form of the matrix A is +*> supplied on entry, and if not, whether the matrix A should be +*> equilibrated before it is factored. +*> = 'F': On entry, AF and IPIV contain the factored form of A. +*> If EQUED is not 'N', the matrix A has been +*> equilibrated with scaling factors given by S. +*> A, AF, and IPIV are not modified. +*> = 'N': The matrix A will be copied to AF and factored. +*> = 'E': The matrix A will be equilibrated if necessary, then +*> copied to AF and factored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The symmetric matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of A contains the upper triangular +*> part of the matrix A, and the strictly lower triangular +*> part of A is not referenced. If UPLO = 'L', the leading +*> N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by +*> diag(S)*A*diag(S). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] AF +*> \verbatim +*> AF is COMPLEX*16 array, dimension (LDAF,N) +*> If FACT = 'F', then AF is an input argument and on entry +*> contains the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T as computed by DSYTRF. +*> +*> If FACT = 'N', then AF is an output argument and on exit +*> returns the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L from the factorization A = +*> U*D*U**T or A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] LDAF +*> \verbatim +*> LDAF is INTEGER +*> The leading dimension of the array AF. LDAF >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> If FACT = 'F', then IPIV is an input argument and on entry +*> contains details of the interchanges and the block +*> structure of D, as determined by DSYTRF. If IPIV(k) > 0, +*> then rows and columns k and IPIV(k) were interchanged and +*> D(k,k) is a 1-by-1 diagonal block. If UPLO = 'U' and +*> IPIV(k) = IPIV(k-1) < 0, then rows and columns k-1 and +*> -IPIV(k) were interchanged and D(k-1:k,k-1:k) is a 2-by-2 +*> diagonal block. If UPLO = 'L' and IPIV(k) = IPIV(k+1) < 0, +*> then rows and columns k+1 and -IPIV(k) were interchanged +*> and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> +*> If FACT = 'N', then IPIV is an output argument and on exit +*> contains details of the interchanges and the block +*> structure of D, as determined by DSYTRF. +*> \endverbatim +*> +*> \param[in,out] EQUED +*> \verbatim +*> EQUED is CHARACTER*1 +*> Specifies the form of equilibration that was done. +*> = 'N': No equilibration (always true if FACT = 'N'). +*> = 'Y': Both row and column equilibration, i.e., A has been +*> replaced by diag(S) * A * diag(S). +*> EQUED is an input argument if FACT = 'F'; otherwise, it is an +*> output argument. +*> \endverbatim +*> +*> \param[in,out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (N) +*> The scale factors for A. If EQUED = 'Y', A is multiplied on +*> the left and right by diag(S). S is an input argument if FACT = +*> 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED +*> = 'Y', each element of S must be positive. If S is output, each +*> element of S is a power of the radix. If S is input, each element +*> of S should be a power of the radix to ensure a reliable solution +*> and error estimates. Scaling by powers of the radix does not cause +*> rounding errors unless the result underflows or overflows. +*> Rounding errors during scaling lead to refining with a matrix that +*> is not equivalent to the input matrix, producing error estimates +*> that may not be reliable. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS right hand side matrix B. +*> On exit, +*> if EQUED = 'N', B is not modified; +*> if EQUED = 'Y', B is overwritten by diag(S)*B; +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> If INFO = 0, the N-by-NRHS solution matrix X to the original +*> system of equations. Note that A and B are modified on exit if +*> EQUED .ne. 'N', and the solution to the equilibrated system is +*> inv(diag(S))*X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> Reciprocal scaled condition number. This is an estimate of the +*> reciprocal Skeel condition number of the matrix A after +*> equilibration (if done). If this is less than the machine +*> precision (in particular, if it is zero), the matrix is singular +*> to working precision. Note that the error may still be small even +*> if this number is very small and the matrix appears ill- +*> conditioned. +*> \endverbatim +*> +*> \param[out] RPVGRW +*> \verbatim +*> RPVGRW is DOUBLE PRECISION +*> Reciprocal pivot growth. On exit, this contains the reciprocal +*> pivot growth factor norm(A)/norm(U). The "max absolute element" +*> norm is used. If this is much less than 1, then the stability of +*> the LU factorization of the (equilibrated) matrix A could be poor. +*> This also means that the solution X, estimated condition numbers, +*> and error bounds could be unreliable. If factorization fails with +*> 0 for the leading INFO columns of A. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> Componentwise relative backward error. This is the +*> componentwise relative backward error of each solution vector X(j) +*> (i.e., the smallest relative change in any element of A or B that +*> makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[in] N_ERR_BNDS +*> \verbatim +*> N_ERR_BNDS is INTEGER +*> Number of error bounds to return for each right hand side +*> and each type (normwise or componentwise). See ERR_BNDS_NORM and +*> ERR_BNDS_COMP below. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_NORM +*> \verbatim +*> ERR_BNDS_NORM is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> normwise relative error, which is defined as follows: +*> +*> Normwise relative error in the ith solution vector: +*> max_j (abs(XTRUE(j,i) - X(j,i))) +*> ------------------------------ +*> max_j abs(X(j,i)) +*> +*> The array is indexed by the type of error information as described +*> below. There currently are up to three pieces of information +*> returned. +*> +*> The first index in ERR_BNDS_NORM(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_NORM(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated normwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*A, where S scales each row by a power of the +*> radix so all absolute row sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[out] ERR_BNDS_COMP +*> \verbatim +*> ERR_BNDS_COMP is DOUBLE PRECISION array, dimension (NRHS, N_ERR_BNDS) +*> For each right-hand side, this array contains information about +*> various error bounds and condition numbers corresponding to the +*> componentwise relative error, which is defined as follows: +*> +*> Componentwise relative error in the ith solution vector: +*> abs(XTRUE(j,i) - X(j,i)) +*> max_j ---------------------- +*> abs(X(j,i)) +*> +*> The array is indexed by the right-hand side i (on which the +*> componentwise relative error depends), and the type of error +*> information as described below. There currently are up to three +*> pieces of information returned for each right-hand side. If +*> componentwise accuracy is not requested (PARAMS(3) = 0.0), then +*> ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most +*> the first (:,N_ERR_BNDS) entries are returned. +*> +*> The first index in ERR_BNDS_COMP(i,:) corresponds to the ith +*> right-hand side. +*> +*> The second index in ERR_BNDS_COMP(:,err) contains the following +*> three fields: +*> err = 1 "Trust/don't trust" boolean. Trust the answer if the +*> reciprocal condition number is less than the threshold +*> sqrt(n) * dlamch('Epsilon'). +*> +*> err = 2 "Guaranteed" error bound: The estimated forward error, +*> almost certainly within a factor of 10 of the true error +*> so long as the next entry is greater than the threshold +*> sqrt(n) * dlamch('Epsilon'). This error bound should only +*> be trusted if the previous boolean is true. +*> +*> err = 3 Reciprocal condition number: Estimated componentwise +*> reciprocal condition number. Compared with the threshold +*> sqrt(n) * dlamch('Epsilon') to determine if the error +*> estimate is "guaranteed". These reciprocal condition +*> numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some +*> appropriately scaled matrix Z. +*> Let Z = S*(A*diag(x)), where x is the solution for the +*> current right-hand side and S scales each row of +*> A*diag(x) by a power of the radix so all absolute row +*> sums of Z are approximately 1. +*> +*> See Lapack Working Note 165 for further details and extra +*> cautions. +*> \endverbatim +*> +*> \param[in] NPARAMS +*> \verbatim +*> NPARAMS is INTEGER +*> Specifies the number of parameters set in PARAMS. If .LE. 0, the +*> PARAMS array is never referenced and default values are used. +*> \endverbatim +*> +*> \param[in,out] PARAMS +*> \verbatim +*> PARAMS is DOUBLE PRECISION array, dimension NPARAMS +*> Specifies algorithm parameters. If an entry is .LT. 0.0, then +*> that entry will be filled with default value used for that +*> parameter. Only positions up to NPARAMS are accessed; defaults +*> are used for higher-numbered parameters. +*> +*> PARAMS(LA_LINRX_ITREF_I = 1) : Whether to perform iterative +*> refinement or not. +*> Default: 1.0D+0 +*> = 0.0 : No refinement is performed, and no error bounds are +*> computed. +*> = 1.0 : Use the extra-precise refinement algorithm. +*> (other values are reserved for future use) +*> +*> PARAMS(LA_LINRX_ITHRESH_I = 2) : Maximum number of residual +*> computations allowed for refinement. +*> Default: 10 +*> Aggressive: Set to 100 to permit convergence using approximate +*> factorizations or factorizations other than LU. If +*> the factorization uses a technique other than +*> Gaussian elimination, the guarantees in +*> err_bnds_norm and err_bnds_comp may no longer be +*> trustworthy. +*> +*> PARAMS(LA_LINRX_CWISE_I = 3) : Flag determining if the code +*> will attempt to find a solution with small componentwise +*> relative error in the double-precision algorithm. Positive +*> is true, 0.0 is false. +*> Default: 1.0 (attempt componentwise convergence) +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit. The solution to every right-hand side is +*> guaranteed. +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> > 0 and <= N: U(INFO,INFO) is exactly zero. The factorization +*> has been completed, but the factor U is exactly singular, so +*> the solution and error bounds could not be computed. RCOND = 0 +*> is returned. +*> = N+J: The solution corresponding to the Jth right-hand side is +*> not guaranteed. The solutions corresponding to other right- +*> hand sides K with K > J may not be guaranteed as well, but +*> only the first such right-hand side is reported. If a small +*> componentwise error is not requested (PARAMS(3) = 0.0) then +*> the Jth right-hand side is the first with a normwise error +*> bound that is not guaranteed (the smallest J such +*> that ERR_BNDS_NORM(J,1) = 0.0). By default (PARAMS(3) = 1.0) +*> the Jth right-hand side is the first with either a normwise or +*> componentwise error bound that is not guaranteed (the smallest +*> J such that either ERR_BNDS_NORM(J,1) = 0.0 or +*> ERR_BNDS_COMP(J,1) = 0.0). See the definition of +*> ERR_BNDS_NORM(:,1) and ERR_BNDS_COMP(:,1). To get information +*> about all of the right-hand sides check ERR_BNDS_NORM or +*> ERR_BNDS_COMP. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16SYsolve +* +* ===================================================================== + SUBROUTINE ZSYSVXX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ EQUED, S, B, LDB, X, LDX, RCOND, RPVGRW, BERR, + $ N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + $ NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* -- LAPACK driver routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER EQUED, FACT, UPLO + INTEGER INFO, LDA, LDAF, LDB, LDX, N, NRHS, NPARAMS, + $ N_ERR_BNDS + DOUBLE PRECISION RCOND, RPVGRW +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), AF( LDAF, * ), B( LDB, * ), + $ X( LDX, * ), WORK( * ) + DOUBLE PRECISION S( * ), PARAMS( * ), BERR( * ), + $ ERR_BNDS_NORM( NRHS, * ), + $ ERR_BNDS_COMP( NRHS, * ), RWORK( * ) +* .. +* +* ================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + INTEGER FINAL_NRM_ERR_I, FINAL_CMP_ERR_I, BERR_I + INTEGER RCOND_I, NRM_RCOND_I, NRM_ERR_I, CMP_RCOND_I + INTEGER CMP_ERR_I, PIV_GROWTH_I + PARAMETER ( FINAL_NRM_ERR_I = 1, FINAL_CMP_ERR_I = 2, + $ BERR_I = 3 ) + PARAMETER ( RCOND_I = 4, NRM_RCOND_I = 5, NRM_ERR_I = 6 ) + PARAMETER ( CMP_RCOND_I = 7, CMP_ERR_I = 8, + $ PIV_GROWTH_I = 9 ) +* .. +* .. Local Scalars .. + LOGICAL EQUIL, NOFACT, RCEQU + INTEGER INFEQU, J + DOUBLE PRECISION AMAX, BIGNUM, SMIN, SMAX, SCOND, SMLNUM +* .. +* .. External Functions .. + EXTERNAL LSAME, DLAMCH, ZLA_SYRPVGRW + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLA_SYRPVGRW +* .. +* .. External Subroutines .. + EXTERNAL ZSYEQUB, ZSYTRF, ZSYTRS, ZLACPY, + $ ZLAQSY, XERBLA, ZLASCL2, ZSYRFSX +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* + INFO = 0 + NOFACT = LSAME( FACT, 'N' ) + EQUIL = LSAME( FACT, 'E' ) + SMLNUM = DLAMCH( 'Safe minimum' ) + BIGNUM = ONE / SMLNUM + IF( NOFACT .OR. EQUIL ) THEN + EQUED = 'N' + RCEQU = .FALSE. + ELSE + RCEQU = LSAME( EQUED, 'Y' ) + ENDIF +* +* Default is failure. If an input parameter is wrong or +* factorization fails, make everything look horrible. Only the +* pivot growth is set here, the rest is initialized in ZSYRFSX. +* + RPVGRW = ZERO +* +* Test the input parameters. PARAMS is not tested until ZSYRFSX. +* + IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT. + $ LSAME( FACT, 'F' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME(UPLO, 'U') .AND. + $ .NOT.LSAME(UPLO, 'L') ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT. + $ ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN + INFO = -10 + ELSE + IF ( RCEQU ) THEN + SMIN = BIGNUM + SMAX = ZERO + DO 10 J = 1, N + SMIN = MIN( SMIN, S( J ) ) + SMAX = MAX( SMAX, S( J ) ) + 10 CONTINUE + IF( SMIN.LE.ZERO ) THEN + INFO = -11 + ELSE IF( N.GT.0 ) THEN + SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM ) + ELSE + SCOND = ONE + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -13 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -15 + END IF + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYSVXX', -INFO ) + RETURN + END IF +* + IF( EQUIL ) THEN +* +* Compute row and column scalings to equilibrate the matrix A. +* + CALL ZSYEQUB( UPLO, N, A, LDA, S, SCOND, AMAX, WORK, INFEQU ) + IF( INFEQU.EQ.0 ) THEN +* +* Equilibrate the matrix. +* + CALL ZLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED ) + RCEQU = LSAME( EQUED, 'Y' ) + END IF + + END IF +* +* Scale the right hand-side. +* + IF( RCEQU ) CALL ZLASCL2( N, NRHS, S, B, LDB ) +* + IF( NOFACT .OR. EQUIL ) THEN +* +* Compute the LDL^T or UDU^T factorization of A. +* + CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF ) + CALL ZSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, 5*MAX(1,N), INFO ) +* +* Return if INFO is non-zero. +* + IF( INFO.GT.0 ) THEN +* +* Pivot in column INFO is exactly 0 +* Compute the reciprocal pivot growth factor of the +* leading rank-deficient INFO columns of A. +* + IF ( N.GT.0 ) + $ RPVGRW = ZLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, + $ LDAF, IPIV, RWORK ) + RETURN + END IF + END IF +* +* Compute the reciprocal pivot growth factor RPVGRW. +* + IF ( N.GT.0 ) + $ RPVGRW = ZLA_SYRPVGRW( UPLO, N, INFO, A, LDA, AF, LDAF, + $ IPIV, RWORK ) +* +* Compute the solution matrix X. +* + CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX ) + CALL ZSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO ) +* +* Use iterative refinement to improve the computed solution and +* compute error bounds and backward error estimates for it. +* + CALL ZSYRFSX( UPLO, EQUED, N, NRHS, A, LDA, AF, LDAF, IPIV, + $ S, B, LDB, X, LDX, RCOND, BERR, N_ERR_BNDS, ERR_BNDS_NORM, + $ ERR_BNDS_COMP, NPARAMS, PARAMS, WORK, RWORK, INFO ) +* +* Scale solutions. +* + IF ( RCEQU ) THEN + CALL ZLASCL2 (N, NRHS, S, X, LDX ) + END IF +* + RETURN +* +* End of ZSYSVXX +* + END diff --git a/dspl/liblapack/SRC/zsyswapr.f b/dspl/liblapack/SRC/zsyswapr.f new file mode 100644 index 0000000..ea79c8f --- /dev/null +++ b/dspl/liblapack/SRC/zsyswapr.f @@ -0,0 +1,193 @@ +*> \brief \b ZSYSWAPR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYSWAPR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, N ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYSWAPR applies an elementary permutation on the rows and the columns of +*> a symmetric matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] I1 +*> \verbatim +*> I1 is INTEGER +*> Index of the first row to swap +*> \endverbatim +*> +*> \param[in] I2 +*> \verbatim +*> I2 is INTEGER +*> Index of the second row to swap +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +* ===================================================================== + SUBROUTINE ZSYSWAPR( UPLO, N, A, LDA, I1, I2) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER I1, I2, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, N ) +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX*16 TMP +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSWAP +* .. +* .. Executable Statements .. +* + UPPER = LSAME( UPLO, 'U' ) + IF (UPPER) THEN +* +* UPPER +* first swap +* - swap column I1 and I2 from I1 to I1-1 + CALL ZSWAP( I1-1, A(1,I1), 1, A(1,I2), 1 ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap row I1 from I1+1 to I2-1 with col I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1,I1+I) + A(I1,I1+I)=A(I1+I,I2) + A(I1+I,I2)=TMP + END DO +* +* third swap +* - swap row I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I1,I) + A(I1,I)=A(I2,I) + A(I2,I)=TMP + END DO +* + ELSE +* +* LOWER +* first swap +* - swap row I1 and I2 from I1 to I1-1 + CALL ZSWAP( I1-1, A(I1,1), LDA, A(I2,1), LDA ) +* +* second swap : +* - swap A(I1,I1) and A(I2,I2) +* - swap col I1 from I1+1 to I2-1 with row I2 from I1+1 to I2-1 + TMP=A(I1,I1) + A(I1,I1)=A(I2,I2) + A(I2,I2)=TMP +* + DO I=1,I2-I1-1 + TMP=A(I1+I,I1) + A(I1+I,I1)=A(I2,I1+I) + A(I2,I1+I)=TMP + END DO +* +* third swap +* - swap col I1 and I2 from I2+1 to N + DO I=I2+1,N + TMP=A(I,I1) + A(I,I1)=A(I,I2) + A(I,I2)=TMP + END DO +* + ENDIF + END SUBROUTINE ZSYSWAPR + diff --git a/dspl/liblapack/SRC/zsytf2.f b/dspl/liblapack/SRC/zsytf2.f new file mode 100644 index 0000000..766d61c --- /dev/null +++ b/dspl/liblapack/SRC/zsytf2.f @@ -0,0 +1,611 @@ +*> \brief \b ZSYTF2 computes the factorization of a real symmetric indefinite matrix, using the diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTF2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTF2 computes the factorization of a complex symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k-1) < 0, then rows and columns +*> k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) = IPIV(k+1) < 0, then rows and columns +*> k+1 and -IPIV(k) were interchanged and D(k:k+1,k:k+1) +*> is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> 09-29-06 - patch from +*> Bobby Cheng, MathWorks +*> +*> Replace l.209 and l.377 +*> IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN +*> by +*> IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) .OR. DISNAN(ABSAKK) ) THEN +*> +*> 1-96 - Based on modifications by J. Lewis, Boeing Computer Services +*> Company +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTF2( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IMAX, J, JMAX, K, KK, KP, KSTEP + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX + COMPLEX*16 D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL DISNAN, LSAME + INTEGER IZAMAX + EXTERNAL DISNAN, LSAME, IZAMAX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZSWAP, ZSYR +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, SQRT +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTF2', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.GT.1 ) THEN + JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K-1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* +* A := A - U(k)*D(k)*U(k)**T = A - W(k)*1/D(k)*W(k)**T +* + R1 = CONE / A( K, K ) + CALL ZSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, R1, A( 1, K ), 1 ) + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )**T +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) + D12 = T / D12 +* + DO 30 J = K - 2, 1, -1 + WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) ) + WK = D12*( D22*A( J, K )-A( J, K-1 ) ) + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K-1 )*WKM1 + 20 CONTINUE + A( J, K ) = WK + A( J, K-1 ) = WKM1 + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( MAX( ABSAKK, COLMAX ).EQ.ZERO .OR. DISNAN(ABSAKK) ) THEN +* +* Column K is zero or underflow, or contains a NaN: +* set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE + IF( ABSAKK.GE.ALPHA*COLMAX ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value +* + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + IF( IMAX.LT.N ) THEN + JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 ) + ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) ) + END IF +* + IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN +* +* interchange rows and columns K and IMAX, use 1-by-1 +* pivot block +* + KP = IMAX + ELSE +* +* interchange rows and columns K+1 and IMAX, use 2-by-2 +* pivot block +* + KP = IMAX + KSTEP = 2 + END IF + END IF +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* +* A := A - L(k)*D(k)*L(k)**T = A - W(k)*(1/D(k))*W(k)**T +* + R1 = CONE / A( K, K ) + CALL ZSYR( UPLO, N-K, -R1, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column K +* + CALL ZSCAL( N-K, R1, A( K+1, K ), 1 ) + END IF + ELSE +* +* 2-by-2 pivot block D(k) +* + IF( K.LT.N-1 ) THEN +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )**T +* = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )**T +* +* where L(k) and L(k+1) are the k-th and (k+1)-th +* columns of L +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) + D21 = T / D21 +* + DO 60 J = K + 2, N + WK = D21*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) ) + DO 50 I = J, N + A( I, J ) = A( I, J ) - A( I, K )*WK - + $ A( I, K+1 )*WKP1 + 50 CONTINUE + A( J, K ) = WK + A( J, K+1 ) = WKP1 + 60 CONTINUE + END IF + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -KP + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE + RETURN +* +* End of ZSYTF2 +* + END diff --git a/dspl/liblapack/SRC/zsytf2_rk.f b/dspl/liblapack/SRC/zsytf2_rk.f new file mode 100644 index 0000000..b1a02f4 --- /dev/null +++ b/dspl/liblapack/SRC/zsytf2_rk.f @@ -0,0 +1,952 @@ +*> \brief \b ZSYTF2_RK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS2 unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTF2_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E ( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYTF2_RK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put further details +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., +*> Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTF2_RK( UPLO, N, A, LDA, E, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX, DTEMP, SFMIN + COMPLEX*16 D11, D12, D21, D22, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZSCAL, ZSWAP, ZSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT, DIMAG, DBLE +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTF2_RK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* Initilize the first entry of array E, where superdiagonal +* elements of D are stored +* + E( 1 ) = CZERO +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 34 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.GT.1 ) + $ E( K ) = CZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX )) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL ZSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL ZSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( K, K+1 ), LDA, A( P, K+1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL ZSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert upper triangle of A into U form by applying +* the interchanges in columns k+1:N. +* + IF( K.LT.N ) + $ CALL ZSWAP( N-K, A( KK, K+1 ), LDA, A( KP, K+1 ), + $ LDA ) +* + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = CONE / A( K, K ) + CALL ZSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF +* +* Store the superdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* +* Copy superdiagonal elements of D(K) to E(K) and +* ZERO out superdiagonal entry of A +* + E( K ) = A( K-1, K ) + E( K-1 ) = CZERO + A( K-1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + 34 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* Initilize the unused last entry of the subdiagonal array E. +* + E( N ) = CZERO +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 64 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K +* +* Set E( K ) to zero +* + IF( K.LT.N ) + $ E( K ) = CZERO +* + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K +* + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1( A( IMAX, IMAX ) ).LT.ALPHA*ROWMAX )) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL ZSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL ZSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL ZSWAP( K-1, A( K, 1 ), LDA, A( P, 1 ), LDA ) +* + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL ZSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF +* +* Convert lower triangle of A into L form by applying +* the interchanges in columns 1:k-1. +* + IF ( K.GT.1 ) + $ CALL ZSWAP( K-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA ) +* + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = CONE / A( K, K ) + CALL ZSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL ZSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF +* +* Store the subdiagonal element of D in array E +* + E( K ) = CZERO +* + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* +* Copy subdiagonal elements of D(K) to E(K) and +* ZERO out subdiagonal entry of A +* + E( K ) = A( K+1, K ) + E( K+1 ) = CZERO + A( K+1, K ) = CZERO +* + END IF +* +* End column K is nonsingular +* + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + 64 CONTINUE +* + END IF +* + RETURN +* +* End of ZSYTF2_RK +* + END diff --git a/dspl/liblapack/SRC/zsytf2_rook.f b/dspl/liblapack/SRC/zsytf2_rook.f new file mode 100644 index 0000000..cf964ae --- /dev/null +++ b/dspl/liblapack/SRC/zsytf2_rook.f @@ -0,0 +1,821 @@ +*> \brief \b ZSYTF2_ROOK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman ("rook") diagonal pivoting method (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTF2_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTF2_ROOK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method: +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, U**T is the transpose of U, and D is symmetric and +*> block diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the unblocked version of the algorithm, calling Level 2 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> > 0: if INFO = k, D(k,k) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2013 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> November 2013, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> 01-01-96 - Based on modifications by +*> J. Lewis, Boeing Computer Services Company +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville abd , USA +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTF2_ROOK( UPLO, N, A, LDA, IPIV, INFO ) +* +* -- LAPACK computational routine (version 3.5.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2013 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + DOUBLE PRECISION EIGHT, SEVTEN + PARAMETER ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER, DONE + INTEGER I, IMAX, J, JMAX, ITEMP, K, KK, KP, KSTEP, + $ P, II + DOUBLE PRECISION ABSAKK, ALPHA, COLMAX, ROWMAX, DTEMP, SFMIN + COMPLEX*16 D11, D12, D21, D22, T, WK, WKM1, WKP1, Z +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, IZAMAX, DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZSCAL, ZSWAP, ZSYR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT, DIMAG, DBLE +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTF2_ROOK', -INFO ) + RETURN + END IF +* +* Initialize ALPHA for use in choosing pivot block size. +* + ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT +* +* Compute machine safe minimum +* + SFMIN = DLAMCH( 'S' ) +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2 +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.GT.1 ) THEN + IMAX = IZAMAX( K-1, A( 1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( (MAX( ABSAKK, COLMAX ).EQ.ZERO) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, +* use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 12 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), + $ LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.GT.1 ) THEN + ITEMP = IZAMAX( IMAX-1, A( 1, IMAX ), 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1(A( IMAX, IMAX )).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 12 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the leading +* submatrix A(1:k,1:k) if we have a 2-by-2 pivot +* + IF( P.GT.1 ) + $ CALL ZSWAP( P-1, A( 1, K ), 1, A( 1, P ), 1 ) + IF( P.LT.(K-1) ) + $ CALL ZSWAP( K-P-1, A( P+1, K ), 1, A( P, P+1 ), + $ LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K - KSTEP + 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the leading +* submatrix A(1:k,1:k) +* + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 ) + IF( ( KK.GT.1 ) .AND. ( KP.LT.(KK-1) ) ) + $ CALL ZSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K-1, K ) + A( K-1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the leading submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = U(k)*D(k) +* +* where U(k) is the k-th column of U +* + IF( K.GT.1 ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) and +* store U(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(1:k-1,1:k-1) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*1/D(k)*W(k)**T +* + D11 = CONE / A( K, K ) + CALL ZSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) +* +* Store U(k) in column k +* + CALL ZSCAL( K-1, D11, A( 1, K ), 1 ) + ELSE +* +* Store L(k) in column K +* + D11 = A( K, K ) + DO 16 II = 1, K - 1 + A( II, K ) = A( II, K ) / D11 + 16 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - U(k)*D(k)*U(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZSYR( UPLO, K-1, -D11, A( 1, K ), 1, A, LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k-1 now hold +* +* ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k) +* +* where U(k) and U(k-1) are the k-th and (k-1)-th columns +* of U +* +* Perform a rank-2 update of A(1:k-2,1:k-2) as +* +* A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )**T +* = A - ( ( A(k-1)A(k) )*inv(D(k)) ) * ( A(k-1)A(k) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.GT.2 ) THEN +* + D12 = A( K-1, K ) + D22 = A( K-1, K-1 ) / D12 + D11 = A( K, K ) / D12 + T = CONE / ( D11*D22-CONE ) +* + DO 30 J = K - 2, 1, -1 +* + WKM1 = T*( D11*A( J, K-1 )-A( J, K ) ) + WK = T*( D22*A( J, K )-A( J, K-1 ) ) +* + DO 20 I = J, 1, -1 + A( I, J ) = A( I, J ) - (A( I, K ) / D12 )*WK - + $ ( A( I, K-1 ) / D12 )*WKM1 + 20 CONTINUE +* +* Store U(k) and U(k-1) in cols k and k-1 for row J +* + A( J, K ) = WK / D12 + A( J, K-1 ) = WKM1 / D12 +* + 30 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K-1 ) = -KP + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KSTEP + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2 +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 70 + KSTEP = 1 + P = K +* +* Determine rows and columns to be interchanged and whether +* a 1-by-1 or 2-by-2 pivot block will be used +* + ABSAKK = CABS1( A( K, K ) ) +* +* IMAX is the row-index of the largest off-diagonal element in +* column K, and COLMAX is its absolute value. +* Determine both COLMAX and IMAX. +* + IF( K.LT.N ) THEN + IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 ) + COLMAX = CABS1( A( IMAX, K ) ) + ELSE + COLMAX = ZERO + END IF +* + IF( ( MAX( ABSAKK, COLMAX ).EQ.ZERO ) ) THEN +* +* Column K is zero or underflow: set INFO and continue +* + IF( INFO.EQ.0 ) + $ INFO = K + KP = K + ELSE +* +* Test for interchange +* +* Equivalent to testing for (used to handle NaN and Inf) +* ABSAKK.GE.ALPHA*COLMAX +* + IF( .NOT.( ABSAKK.LT.ALPHA*COLMAX ) ) THEN +* +* no interchange, use 1-by-1 pivot block +* + KP = K + ELSE +* + DONE = .FALSE. +* +* Loop until pivot found +* + 42 CONTINUE +* +* Begin pivot search loop body +* +* JMAX is the column-index of the largest off-diagonal +* element in row IMAX, and ROWMAX is its absolute value. +* Determine both ROWMAX and JMAX. +* + IF( IMAX.NE.K ) THEN + JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA ) + ROWMAX = CABS1( A( IMAX, JMAX ) ) + ELSE + ROWMAX = ZERO + END IF +* + IF( IMAX.LT.N ) THEN + ITEMP = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), + $ 1 ) + DTEMP = CABS1( A( ITEMP, IMAX ) ) + IF( DTEMP.GT.ROWMAX ) THEN + ROWMAX = DTEMP + JMAX = ITEMP + END IF + END IF +* +* Equivalent to testing for (used to handle NaN and Inf) +* CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX +* + IF( .NOT.( CABS1(A( IMAX, IMAX )).LT.ALPHA*ROWMAX ) ) + $ THEN +* +* interchange rows and columns K and IMAX, +* use 1-by-1 pivot block +* + KP = IMAX + DONE = .TRUE. +* +* Equivalent to testing for ROWMAX .EQ. COLMAX, +* used to handle NaN and Inf +* + ELSE IF( ( P.EQ.JMAX ).OR.( ROWMAX.LE.COLMAX ) ) THEN +* +* interchange rows and columns K+1 and IMAX, +* use 2-by-2 pivot block +* + KP = IMAX + KSTEP = 2 + DONE = .TRUE. + ELSE +* +* Pivot NOT found, set variables and repeat +* + P = IMAX + COLMAX = ROWMAX + IMAX = JMAX + END IF +* +* End pivot search loop body +* + IF( .NOT. DONE ) GOTO 42 +* + END IF +* +* Swap TWO rows and TWO columns +* +* First swap +* + IF( ( KSTEP.EQ.2 ) .AND. ( P.NE.K ) ) THEN +* +* Interchange rows and column K and P in the trailing +* submatrix A(k:n,k:n) if we have a 2-by-2 pivot +* + IF( P.LT.N ) + $ CALL ZSWAP( N-P, A( P+1, K ), 1, A( P+1, P ), 1 ) + IF( P.GT.(K+1) ) + $ CALL ZSWAP( P-K-1, A( K+1, K ), 1, A( P, K+1 ), LDA ) + T = A( K, K ) + A( K, K ) = A( P, P ) + A( P, P ) = T + END IF +* +* Second swap +* + KK = K + KSTEP - 1 + IF( KP.NE.KK ) THEN +* +* Interchange rows and columns KK and KP in the trailing +* submatrix A(k:n,k:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 ) + IF( ( KK.LT.N ) .AND. ( KP.GT.(KK+1) ) ) + $ CALL ZSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ), + $ LDA ) + T = A( KK, KK ) + A( KK, KK ) = A( KP, KP ) + A( KP, KP ) = T + IF( KSTEP.EQ.2 ) THEN + T = A( K+1, K ) + A( K+1, K ) = A( KP, K ) + A( KP, K ) = T + END IF + END IF +* +* Update the trailing submatrix +* + IF( KSTEP.EQ.1 ) THEN +* +* 1-by-1 pivot block D(k): column k now holds +* +* W(k) = L(k)*D(k) +* +* where L(k) is the k-th column of L +* + IF( K.LT.N ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) and +* store L(k) in column k +* + IF( CABS1( A( K, K ) ).GE.SFMIN ) THEN +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* + D11 = CONE / A( K, K ) + CALL ZSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) +* +* Store L(k) in column k +* + CALL ZSCAL( N-K, D11, A( K+1, K ), 1 ) + ELSE +* +* Store L(k) in column k +* + D11 = A( K, K ) + DO 46 II = K + 1, N + A( II, K ) = A( II, K ) / D11 + 46 CONTINUE +* +* Perform a rank-1 update of A(k+1:n,k+1:n) as +* A := A - L(k)*D(k)*L(k)**T +* = A - W(k)*(1/D(k))*W(k)**T +* = A - (W(k)/D(k))*(D(k))*(W(k)/D(K))**T +* + CALL ZSYR( UPLO, N-K, -D11, A( K+1, K ), 1, + $ A( K+1, K+1 ), LDA ) + END IF + END IF +* + ELSE +* +* 2-by-2 pivot block D(k): columns k and k+1 now hold +* +* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) +* +* where L(k) and L(k+1) are the k-th and (k+1)-th columns +* of L +* +* +* Perform a rank-2 update of A(k+2:n,k+2:n) as +* +* A := A - ( L(k) L(k+1) ) * D(k) * ( L(k) L(k+1) )**T +* = A - ( ( A(k)A(k+1) )*inv(D(k) ) * ( A(k)A(k+1) )**T +* +* and store L(k) and L(k+1) in columns k and k+1 +* + IF( K.LT.N-1 ) THEN +* + D21 = A( K+1, K ) + D11 = A( K+1, K+1 ) / D21 + D22 = A( K, K ) / D21 + T = CONE / ( D11*D22-CONE ) +* + DO 60 J = K + 2, N +* +* Compute D21 * ( W(k)W(k+1) ) * inv(D(k)) for row J +* + WK = T*( D11*A( J, K )-A( J, K+1 ) ) + WKP1 = T*( D22*A( J, K+1 )-A( J, K ) ) +* +* Perform a rank-2 update of A(k+2:n,k+2:n) +* + DO 50 I = J, N + A( I, J ) = A( I, J ) - ( A( I, K ) / D21 )*WK - + $ ( A( I, K+1 ) / D21 )*WKP1 + 50 CONTINUE +* +* Store L(k) and L(k+1) in cols k and k+1 for row J +* + A( J, K ) = WK / D21 + A( J, K+1 ) = WKP1 / D21 +* + 60 CONTINUE +* + END IF +* + END IF + END IF +* +* Store details of the interchanges in IPIV +* + IF( KSTEP.EQ.1 ) THEN + IPIV( K ) = KP + ELSE + IPIV( K ) = -P + IPIV( K+1 ) = -KP + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KSTEP + GO TO 40 +* + END IF +* + 70 CONTINUE +* + RETURN +* +* End of ZSYTF2_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsytrf.f b/dspl/liblapack/SRC/zsytrf.f new file mode 100644 index 0000000..663199c --- /dev/null +++ b/dspl/liblapack/SRC/zsytrf.f @@ -0,0 +1,362 @@ +*> \brief \b ZSYTRF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF computes the factorization of a complex symmetric matrix A +*> using the Bunch-Kaufman diagonal pivoting method. The form of the +*> factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) were +*> interchanged and D(k,k) is a 1-by-1 diagonal block. +*> If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and +*> columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) +*> is a 2-by-2 diagonal block. If UPLO = 'L' and IPIV(k) = +*> IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were +*> interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLASYF, ZSYTF2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZSYTRF', UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLASYF; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZSYTF2( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLASYF; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ), + $ WORK, N, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZSYTRF +* + END diff --git a/dspl/liblapack/SRC/zsytrf_aa.f b/dspl/liblapack/SRC/zsytrf_aa.f new file mode 100644 index 0000000..b25b1fb --- /dev/null +++ b/dspl/liblapack/SRC/zsytrf_aa.f @@ -0,0 +1,467 @@ +*> \brief \b ZSYTRF_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF_AA computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric tridiagonal matrix. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the tridiagonal matrix is stored in the diagonals +*> and the subdiagonals of A just below (or above) the diagonals, +*> and L is stored below (or above) the subdiaonals, when UPLO +*> is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=MAX(1,2*N). For optimum performance +*> LWORK >= N*(1+NB), where NB is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER J, LWKOPT + INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB + COMPLEX*16 ALPHA +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLASYF_AA, ZGEMM, ZGEMV, ZSCAL, ZCOPY, + $ ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF_AA', UPLO, N, -1, -1, -1 ) +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = (NB+1)*N + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF + IPIV( 1 ) = 1 + IF ( N.EQ.1 ) THEN + RETURN + END IF +* +* Adjust block size based on the workspace size +* + IF( LWORK.LT.((1+NB)*N) ) THEN + NB = ( LWORK-N ) / N + END IF +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* +* Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 10 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J + 1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( MAX(1, J), J+1 ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( 1, J2 ), 1, + $ A( 1, IPIV(J2) ), 1 ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and +* WORK stores the current block of the auxiriarly matrix H +* + IF( J.LT.N ) THEN +* +* If first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J, J+1 ) + A( J, J+1 ) = ONE + CALL ZCOPY( N-J, A( J-1, J+1 ), LDA, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J1-K2, J3 ), 1, + $ ONE, A( J3, J3 ), LDA ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block of J2-th block row with ZGEMM +* + CALL ZGEMM( 'Transpose', 'Transpose', + $ NJ, N-J3+1, JB+1, + $ -ONE, A( J1-K2, J2 ), LDA, + $ WORK( J3-J1+1+K1*N ), N, + $ ONE, A( J2, J3 ), LDA ) + END DO +* +* Recover T( J, J+1 ) +* + A( J, J+1 ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 ) + END IF + GO TO 10 + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* +* copy first column A(1:N, 1) into H(1:N, 1) +* (stored in WORK(1:N)) +* + CALL ZCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 ) +* +* J is the main loop index, increasing from 1 to N in steps of +* JB, where JB is the number of columns factorized by ZLASYF; +* JB is either NB, or N-J+1 for the last block +* + J = 0 + 11 CONTINUE + IF( J.GE.N ) + $ GO TO 20 +* +* each step of the main loop +* J is the last column of the previous panel +* J1 is the first column of the current panel +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 for the first panel, and +* K1=0 for the rest +* + J1 = J+1 + JB = MIN( N-J1+1, NB ) + K1 = MAX(1, J)-J +* +* Panel factorization +* + CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB, + $ A( J+1, MAX(1, J) ), LDA, + $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) ) +* +* Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot) +* + DO J2 = J+2, MIN(N, J+JB+1) + IPIV( J2 ) = IPIV( J2 ) + J + IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN + CALL ZSWAP( J1-K1-2, A( J2, 1 ), LDA, + $ A( IPIV(J2), 1 ), LDA ) + END IF + END DO + J = J + JB +* +* Trailing submatrix update, where +* A(J2+1, J1-1) stores L(J2+1, J1) and +* WORK(J2+1, 1) stores H(J2+1, 1) +* + IF( J.LT.N ) THEN +* +* if first panel and JB=1 (NB=1), then nothing to do +* + IF( J1.GT.1 .OR. JB.GT.1 ) THEN +* +* Merge rank-1 update with BLAS-3 update +* + ALPHA = A( J+1, J ) + A( J+1, J ) = ONE + CALL ZCOPY( N-J, A( J+1, J-1 ), 1, + $ WORK( (J+1-J1+1)+JB*N ), 1 ) + CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 ) +* +* K1 identifies if the previous column of the panel has been +* explicitly stored, e.g., K1=1 and K2= 0 for the first panel, +* while K1=0 and K2=1 for the rest +* + IF( J1.GT.1 ) THEN +* +* Not first panel +* + K2 = 1 + ELSE +* +* First panel +* + K2 = 0 +* +* First update skips the first column +* + JB = JB - 1 + END IF +* + DO J2 = J+1, N, NB + NJ = MIN( NB, N-J2+1 ) +* +* Update (J2, J2) diagonal block with ZGEMV +* + J3 = J2 + DO MJ = NJ-1, 1, -1 + CALL ZGEMV( 'No transpose', MJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J3, J1-K2 ), LDA, + $ ONE, A( J3, J3 ), 1 ) + J3 = J3 + 1 + END DO +* +* Update off-diagonal block in J2-th block column with ZGEMM +* + CALL ZGEMM( 'No transpose', 'Transpose', + $ N-J3+1, NJ, JB+1, + $ -ONE, WORK( J3-J1+1+K1*N ), N, + $ A( J2, J1-K2 ), LDA, + $ ONE, A( J3, J2 ), LDA ) + END DO +* +* Recover T( J+1, J ) +* + A( J+1, J ) = ALPHA + END IF +* +* WORK(J+1, 1) stores H(J+1, 1) +* + CALL ZCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 ) + END IF + GO TO 11 + END IF +* + 20 CONTINUE + RETURN +* +* End of ZSYTRF_AA +* + END diff --git a/dspl/liblapack/SRC/zsytrf_aa_2stage.f b/dspl/liblapack/SRC/zsytrf_aa_2stage.f new file mode 100644 index 0000000..1f91672 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrf_aa_2stage.f @@ -0,0 +1,668 @@ +*> \brief \b ZSYTRF_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, +* IPIV2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF_AA_2STAGE computes the factorization of a complex symmetric matrix A +*> using the Aasen's algorithm. The form of the factorization is +*> +*> A = U*T*U**T or A = L*T*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and T is a complex symmetric band matrix with the +*> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is +*> LU factorized with partial pivoting). +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, L is stored below (or above) the subdiaonal blocks, +*> when UPLO is 'L' (or 'U'). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> On exit, details of the LU factorization of the band matrix. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N, internally +*> used to select NB such that LTB >= (3*NB+1)*N. +*> +*> If LTB = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of LTB, +*> returns this value as the first entry of TB, and +*> no error message related to LTB is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of A were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] IPIV2 +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> On exit, it contains the details of the interchanges, i.e., +*> the row and column k of T were interchanged with the +*> row and column IPIV(k). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 workspace of size LWORK +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> The size of WORK. LWORK >= N, internally used to select NB +*> such that LWORK >= N*NB. +*> +*> If LWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the WORK array, +*> returns this value as the first entry of the WORK array, and +*> no error message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: if INFO = i, band LU factorization failed on i-th column +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV, + $ IPIV2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, LDA, LTB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), WORK( * ) +* .. +* +* ===================================================================== +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* +* .. Local Scalars .. + LOGICAL UPPER, TQUERY, WQUERY + INTEGER I, J, K, I1, I2, TD + INTEGER LDTB, NB, KB, JB, NT, IINFO + COMPLEX*16 PIV +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZGBTRF, ZGEMM, ZGETRF, + $ ZLACPY, ZLASET, ZLASWP, ZTRSM, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + WQUERY = ( LWORK.EQ.-1 ) + TQUERY = ( LTB.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN + INFO = -6 + ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN + INFO = -10 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Answer the query +* + NB = ILAENV( 1, 'ZSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 ) + IF( INFO.EQ.0 ) THEN + IF( TQUERY ) THEN + TB( 1 ) = (3*NB+1)*N + END IF + IF( WQUERY ) THEN + WORK( 1 ) = N*NB + END IF + END IF + IF( TQUERY .OR. WQUERY ) THEN + RETURN + END IF +* +* Quick return +* + IF ( N.EQ.0 ) THEN + RETURN + ENDIF +* +* Determine the number of the block size +* + LDTB = LTB/N + IF( LDTB .LT. 3*NB+1 ) THEN + NB = (LDTB-1)/3 + END IF + IF( LWORK .LT. NB*N ) THEN + NB = LWORK/N + END IF +* +* Determine the number of the block columns +* + NT = (N+NB-1)/NB + TD = 2*NB + KB = MIN(NB, N) +* +* Initialize vectors/matrices +* + DO J = 1, KB + IPIV( J ) = J + END DO +* +* Save NB +* + TB( 1 ) = NB +* + IF( UPPER ) THEN +* +* ..................................................... +* Factorize A as L*D*L**T using the upper triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( (I-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J) + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (I-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = U(1:J,J)'*H(1:J) + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( 1, J*NB+1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J) + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Upper', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO ) + CALL ZTRSM( 'L', 'U', 'T', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL ZTRSM( 'R', 'U', 'N', 'N', KB, KB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( (J-2)*NB+1, J*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'Transpose', 'NoTranspose', + $ NB, N-(J+1)*NB, J*NB, + $ -CONE, WORK( NB+1 ), N, + $ A( 1, (J+1)*NB+1 ), LDA, + $ CONE, A( J*NB+1, (J+1)*NB+1 ), LDA ) + END IF +* +* Copy panel to workspace to call ZGETRF +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ A( J*NB+K, (J+1)*NB+1 ), LDA, + $ WORK( 1+(K-1)*N ), 1 ) + END DO +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ WORK, N, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Copy panel back +* + DO K = 1, NB + CALL ZCOPY( N-(J+1)*NB, + $ WORK( 1+(K-1)*N ), 1, + $ A( J*NB+K, (J+1)*NB+1 ), LDA ) + END DO +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ WORK, N, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'U', 'N', 'U', KB, NB, CONE, + $ A( (J-1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) + $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL ZLASET( 'Lower', KB, NB, CZERO, CONE, + $ A( J*NB+1, (J+1)*NB+1), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1, + $ A( (J+1)*NB+1, I2 ), 1 ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA, + $ A( I1+1, I2 ), 1 ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA, + $ A( I2, I2+1 ), LDA ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( 1, I1 ), 1, + $ A( 1, I2 ), 1 ) + END IF + ENDIF + END DO + END IF + END DO + ELSE +* +* ..................................................... +* Factorize A as L*D*L**T using the lower triangle of A +* ..................................................... +* + DO J = 0, NT-1 +* +* Generate Jth column of W and H +* + KB = MIN(NB, N-J*NB) + DO I = 1, J-1 + IF( I.EQ.1 ) THEN +* H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = NB+KB + ELSE + JB = 2*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (I-1)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + ELSE +* H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)' + IF( I .EQ. (J-1) ) THEN + JB = 2*NB+KB + ELSE + JB = 3*NB + END IF + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ NB, KB, JB, + $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (I-2)*NB+1 ), LDA, + $ CZERO, WORK( I*NB+1 ), N ) + END IF + END DO +* +* Compute T(J,J) +* + CALL ZLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.1 ) THEN +* T(J,J) = L(J,1:J)*H(1:J) + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, KB, (J-1)*NB, + $ -CONE, A( J*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) +* T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)' + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ KB, NB, KB, + $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1, + $ CZERO, WORK( 1 ), N ) + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB, + $ -CONE, WORK( 1 ), N, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Expand T(J,J) into full format +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO + IF( J.GT.0 ) THEN +c CALL CHEGST( 1, 'Lower', KB, +c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1, +c $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO ) + CALL ZTRSM( 'L', 'L', 'N', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + CALL ZTRSM( 'R', 'L', 'T', 'N', KB, KB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Symmetrize T(J,J) +* + DO I = 1, KB + DO K = I+1, KB + TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB ) + $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB ) + END DO + END DO +* + IF( J.LT.NT-1 ) THEN + IF( J.GT.0 ) THEN +* +* Compute H(J,J) +* + IF( J.EQ.1 ) THEN + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, KB, + $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + ELSE + CALL ZGEMM( 'NoTranspose', 'Transpose', + $ KB, KB, NB+KB, + $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ), + $ LDTB-1, + $ A( J*NB+1, (J-2)*NB+1 ), LDA, + $ CZERO, WORK( J*NB+1 ), N ) + END IF +* +* Update with the previous column +* + CALL ZGEMM( 'NoTranspose', 'NoTranspose', + $ N-(J+1)*NB, NB, J*NB, + $ -CONE, A( (J+1)*NB+1, 1 ), LDA, + $ WORK( NB+1 ), N, + $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA ) + END IF +* +* Factorize panel +* + CALL ZGETRF( N-(J+1)*NB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ IPIV( (J+1)*NB+1 ), IINFO ) +c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN +c INFO = IINFO+(J+1)*NB +c END IF +* +* Compute T(J+1, J), zero out for GEMM update +* + KB = MIN(NB, N-(J+1)*NB) + CALL ZLASET( 'Full', KB, NB, CZERO, CZERO, + $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 ) + CALL ZLACPY( 'Upper', KB, NB, + $ A( (J+1)*NB+1, J*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + IF( J.GT.0 ) THEN + CALL ZTRSM( 'R', 'L', 'T', 'U', KB, NB, CONE, + $ A( J*NB+1, (J-1)*NB+1 ), LDA, + $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 ) + END IF +* +* Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM +* updates +* + DO K = 1, NB + DO I = 1, KB + TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) = + $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB ) + END DO + END DO + CALL ZLASET( 'Upper', KB, NB, CZERO, CONE, + $ A( (J+1)*NB+1, J*NB+1 ), LDA ) +* +* Apply pivots to trailing submatrix of A +* + DO K = 1, KB +* > Adjust ipiv + IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB +* + I1 = (J+1)*NB+K + I2 = IPIV( (J+1)*NB+K ) + IF( I1.NE.I2 ) THEN +* > Apply pivots to previous columns of L + CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA, + $ A( I2, (J+1)*NB+1 ), LDA ) +* > Swap A(I1+1:M, I1) with A(I2, I1+1:M) + CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1, + $ A( I2, I1+1 ), LDA ) +* > Swap A(I2+1:M, I1) with A(I2+1:M, I2) + CALL ZSWAP( N-I2, A( I2+1, I1 ), 1, + $ A( I2+1, I2 ), 1 ) +* > Swap A(I1, I1) with A(I2, I2) + PIV = A( I1, I1 ) + A( I1, I1 ) = A( I2, I2 ) + A( I2, I2 ) = PIV +* > Apply pivots to previous columns of L + IF( J.GT.0 ) THEN + CALL ZSWAP( J*NB, A( I1, 1 ), LDA, + $ A( I2, 1 ), LDA ) + END IF + ENDIF + END DO +* +* Apply pivots to previous columns of L +* +c CALL ZLASWP( J*NB, A( 1, 1 ), LDA, +c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) + END IF + END DO + END IF +* +* Factor the band matrix + CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* +* End of ZSYTRF_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zsytrf_rk.f b/dspl/liblapack/SRC/zsytrf_rk.f new file mode 100644 index 0000000..2fabf9d --- /dev/null +++ b/dspl/liblapack/SRC/zsytrf_rk.f @@ -0,0 +1,498 @@ +*> \brief \b ZSYTRF_RK computes the factorization of a complex symmetric indefinite matrix using the bounded Bunch-Kaufman (rook) diagonal pivoting method (BLAS3 blocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_RK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E ( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYTRF_RK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman (rook) diagonal pivoting method: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> For more information see Further Details section. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> symmetric matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. +*> If UPLO = 'U': the leading N-by-N upper triangular part +*> of A contains the upper triangular part of the matrix A, +*> and the strictly lower triangular part of A is not +*> referenced. +*> +*> If UPLO = 'L': the leading N-by-N lower triangular part +*> of A contains the lower triangular part of the matrix A, +*> and the strictly upper triangular part of A is not +*> referenced. +*> +*> On exit, contains: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> are stored on exit in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On exit, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) is set to 0; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) is set to 0. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is set to 0 in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> IPIV describes the permutation matrix P in the factorization +*> of matrix A as follows. The absolute value of IPIV(k) +*> represents the index of row and column that were +*> interchanged with the k-th row and column. The value of UPLO +*> describes the order in which the interchanges were applied. +*> Also, the sign of IPIV represents the block structure of +*> the symmetric block diagonal matrix D with 1-by-1 or 2-by-2 +*> diagonal blocks which correspond to 1 or 2 interchanges +*> at each factorization step. For more info see Further +*> Details section. +*> +*> If UPLO = 'U', +*> ( in factorization order, k decreases from N to 1 ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N); +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k-1) < 0 means: +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k-1) != k-1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k-1) = k-1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) <= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> +*> If UPLO = 'L', +*> ( in factorization order, k increases from 1 to N ): +*> a) A single positive entry IPIV(k) > 0 means: +*> D(k,k) is a 1-by-1 diagonal block. +*> If IPIV(k) != k, rows and columns k and IPIV(k) were +*> interchanged in the matrix A(1:N,1:N). +*> If IPIV(k) = k, no interchange occurred. +*> +*> b) A pair of consecutive negative entries +*> IPIV(k) < 0 and IPIV(k+1) < 0 means: +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> (NOTE: negative entries in IPIV appear ONLY in pairs). +*> 1) If -IPIV(k) != k, rows and columns +*> k and -IPIV(k) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k) = k, no interchange occurred. +*> 2) If -IPIV(k+1) != k+1, rows and columns +*> k-1 and -IPIV(k-1) were interchanged +*> in the matrix A(1:N,1:N). +*> If -IPIV(k+1) = k+1, no interchange occurred. +*> +*> c) In both cases a) and b), always ABS( IPIV(k) ) >= k. +*> +*> d) NOTE: Any entry IPIV(k) is always NONZERO on output. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension ( MAX(1,LWORK) ). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned +*> by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the WORK +*> array, returns this value as the first entry of the WORK +*> array, and no error message related to LWORK is issued +*> by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> +*> < 0: If INFO = -k, the k-th argument had an illegal value +*> +*> > 0: If INFO = k, the matrix A is singular, because: +*> If UPLO = 'U': column k in the upper +*> triangular part of A contains all zeros. +*> If UPLO = 'L': column k in the lower +*> triangular part of A contains all zeros. +*> +*> Therefore D(k,k) is exactly zero, and superdiagonal +*> elements of column k of U (or subdiagonal elements of +*> column k of L ) are all zeros. The factorization has +*> been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if +*> it is used to solve a system of equations. +*> +*> NOTE: INFO only stores the first occurrence of +*> a singularity, any subsequent occurrence of singularity +*> is not stored in INFO even though the factorization +*> always completes. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> TODO: put correct description +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRF_RK( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IP, IWS, K, KB, LDWORK, LWKOPT, + $ NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLASYF_RK, ZSYTF2_RK, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF_RK', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_RK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZSYTRF_RK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLASYF_RK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 15 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLASYF_RK( UPLO, K, NB, KB, A, LDA, E, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZSYTF2_RK( UPLO, K, A, LDA, E, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k-kb+1:k and apply row permutations to the +* last k+1 colunms k+1:N after that block +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.LT.N ) THEN + DO I = K, ( K - KB + 1 ), -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL ZSWAP( N-K, A( I, K+1 ), LDA, + $ A( IP, K+1 ), LDA ) + END IF + END DO + END IF +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* +* This label is the exit from main loop over K decreasing +* from N to 1 in steps of KB +* + 15 CONTINUE +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLASYF_RK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 35 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLASYF_RK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, E( K ), + $ IPIV( K ), WORK, LDWORK, IINFO ) + + + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZSYTF2_RK( UPLO, N-K+1, A( K, K ), LDA, E( K ), + $ IPIV( K ), IINFO ) + KB = N - K + 1 +* + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO I = K, K + KB - 1 + IF( IPIV( I ).GT.0 ) THEN + IPIV( I ) = IPIV( I ) + K - 1 + ELSE + IPIV( I ) = IPIV( I ) - K + 1 + END IF + END DO +* +* Apply permutations to the leading panel 1:k-1 +* +* Read IPIV from the last block factored, i.e. +* indices k:k+kb-1 and apply row permutations to the +* first k-1 colunms 1:k-1 before that block +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV( I ) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + IF( K.GT.1 ) THEN + DO I = K, ( K + KB - 1 ), 1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + CALL ZSWAP( K-1, A( I, 1 ), LDA, + $ A( IP, 1 ), LDA ) + END IF + END DO + END IF +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* +* This label is the exit from main loop over K increasing +* from 1 to N in steps of KB +* + 35 CONTINUE +* +* End Lower +* + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZSYTRF_RK +* + END diff --git a/dspl/liblapack/SRC/zsytrf_rook.f b/dspl/liblapack/SRC/zsytrf_rook.f new file mode 100644 index 0000000..3fe69b3 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrf_rook.f @@ -0,0 +1,393 @@ +*> \brief \b ZSYTRF_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRF_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRF_ROOK computes the factorization of a complex symmetric matrix A +*> using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. +*> The form of the factorization is +*> +*> A = U*D*U**T or A = L*D*L**T +*> +*> where U (or L) is a product of permutation and unit upper (lower) +*> triangular matrices, and D is symmetric and block diagonal with +*> 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> +*> On exit, the block diagonal matrix D and the multipliers used +*> to obtain the factor U or L (see below for further details). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D. +*> +*> If UPLO = 'U': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k-1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k-1 and -IPIV(k-1) were inerchaged, +*> D(k-1:k,k-1:k) is a 2-by-2 diagonal block. +*> +*> If UPLO = 'L': +*> If IPIV(k) > 0, then rows and columns k and IPIV(k) +*> were interchanged and D(k,k) is a 1-by-1 diagonal block. +*> +*> If IPIV(k) < 0 and IPIV(k+1) < 0, then rows and +*> columns k and -IPIV(k) were interchanged and rows and +*> columns k+1 and -IPIV(k+1) were inerchaged, +*> D(k:k+1,k:k+1) is a 2-by-2 diagonal block. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >=1. For best performance +*> LWORK >= N*NB, where NB is the block size returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) is exactly zero. The factorization +*> has been completed, but the block diagonal matrix D is +*> exactly singular, and division by zero will occur if it +*> is used to solve a system of equations. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', then A = U*D*U**T, where +*> U = P(n)*U(n)* ... *P(k)U(k)* ..., +*> i.e., U is a product of terms P(k)*U(k), where k decreases from n to +*> 1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and U(k) is a unit upper triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I v 0 ) k-s +*> U(k) = ( 0 I 0 ) s +*> ( 0 0 I ) n-k +*> k-s s n-k +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k). +*> If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), +*> and A(k,k), and v overwrites A(1:k-2,k-1:k). +*> +*> If UPLO = 'L', then A = L*D*L**T, where +*> L = P(1)*L(1)* ... *P(k)*L(k)* ..., +*> i.e., L is a product of terms P(k)*L(k), where k increases from 1 to +*> n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1 +*> and 2-by-2 diagonal blocks D(k). P(k) is a permutation matrix as +*> defined by IPIV(k), and L(k) is a unit lower triangular matrix, such +*> that if the diagonal block D(k) is of order s (s = 1 or 2), then +*> +*> ( I 0 0 ) k-1 +*> L(k) = ( 0 I 0 ) s +*> ( 0 v I ) n-k-s+1 +*> k-1 s n-k-s+1 +*> +*> If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k). +*> If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k), +*> and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1). +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRF_ROOK( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZLASYF_ROOK, ZSYTF2_ROOK, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size +* + NB = ILAENV( 1, 'ZSYTRF_ROOK', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, N*NB ) + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRF_ROOK', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = N + IF( NB.GT.1 .AND. NB.LT.N ) THEN + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = MAX( 2, ILAENV( 2, 'ZSYTRF_ROOK', + $ UPLO, N, -1, -1, -1 ) ) + END IF + ELSE + IWS = 1 + END IF + IF( NB.LT.NBMIN ) + $ NB = N +* + IF( UPPER ) THEN +* +* Factorize A as U*D*U**T using the upper triangle of A +* +* K is the main loop index, decreasing from N to 1 in steps of +* KB, where KB is the number of columns factorized by ZLASYF_ROOK; +* KB is either NB or NB-1, or K for the last block +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop +* + IF( K.LT.1 ) + $ GO TO 40 +* + IF( K.GT.NB ) THEN +* +* Factorize columns k-kb+1:k of A and use blocked code to +* update columns 1:k-kb +* + CALL ZLASYF_ROOK( UPLO, K, NB, KB, A, LDA, + $ IPIV, WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns 1:k of A +* + CALL ZSYTF2_ROOK( UPLO, K, A, LDA, IPIV, IINFO ) + KB = K + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO +* +* No need to adjust IPIV +* +* Decrease K and return to the start of the main loop +* + K = K - KB + GO TO 10 +* + ELSE +* +* Factorize A as L*D*L**T using the lower triangle of A +* +* K is the main loop index, increasing from 1 to N in steps of +* KB, where KB is the number of columns factorized by ZLASYF_ROOK; +* KB is either NB or NB-1, or N-K+1 for the last block +* + K = 1 + 20 CONTINUE +* +* If K > N, exit from loop +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( K.LE.N-NB ) THEN +* +* Factorize columns k:k+kb-1 of A and use blocked code to +* update columns k+kb:n +* + CALL ZLASYF_ROOK( UPLO, N-K+1, NB, KB, A( K, K ), LDA, + $ IPIV( K ), WORK, LDWORK, IINFO ) + ELSE +* +* Use unblocked code to factorize columns k:n of A +* + CALL ZSYTF2_ROOK( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), + $ IINFO ) + KB = N - K + 1 + END IF +* +* Set INFO on the first occurrence of a zero pivot +* + IF( INFO.EQ.0 .AND. IINFO.GT.0 ) + $ INFO = IINFO + K - 1 +* +* Adjust IPIV +* + DO 30 J = K, K + KB - 1 + IF( IPIV( J ).GT.0 ) THEN + IPIV( J ) = IPIV( J ) + K - 1 + ELSE + IPIV( J ) = IPIV( J ) - K + 1 + END IF + 30 CONTINUE +* +* Increase K and return to the start of the main loop +* + K = K + KB + GO TO 20 +* + END IF +* + 40 CONTINUE + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZSYTRF_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsytri.f b/dspl/liblapack/SRC/zsytri.f new file mode 100644 index 0000000..233026a --- /dev/null +++ b/dspl/liblapack/SRC/zsytri.f @@ -0,0 +1,383 @@ +*> \brief \b ZSYTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRI computes the inverse of a complex symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> ZSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX*16 AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTU + EXTERNAL LSAME, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZSWAP, ZSYMV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-ONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ ZDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the leading +* submatrix A(1:k+1,1:k+1) +* + CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF + END IF +* + K = K + KSTEP + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = ONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-ONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1, + $ ZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ ZDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN +* +* Interchange rows and columns K and KP in the trailing +* submatrix A(k-1:n,k-1:n) +* + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + IF( KSTEP.EQ.2 ) THEN + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF + END IF +* + K = K - KSTEP + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRI +* + END diff --git a/dspl/liblapack/SRC/zsytri2.f b/dspl/liblapack/SRC/zsytri2.f new file mode 100644 index 0000000..d5aabd4 --- /dev/null +++ b/dspl/liblapack/SRC/zsytri2.f @@ -0,0 +1,205 @@ +*> \brief \b ZSYTRI2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRI2 computes the inverse of a COMPLEX*16 symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> ZSYTRF. ZSYTRI2 sets the LEADING DIMENSION of the workspace +*> before calling ZSYTRI2X that actually computes the inverse. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NB structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1)*(NB+3) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> WORK is size >= (N+NB+1)*(NB+3) +*> If LDWORK = -1, then a workspace query is assumed; the routine +*> calculates: +*> - the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, +*> - and no error message related to LDWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRI2( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER MINSIZE, NBMAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZSYTRI, ZSYTRI2X, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* Get blocksize + NBMAX = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + IF ( NBMAX .GE. N ) THEN + MINSIZE = N + ELSE + MINSIZE = (N+NBMAX+1)*(NBMAX+3) + END IF +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF (LWORK .LT. MINSIZE .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK(1)=MINSIZE + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN + + IF( NBMAX .GE. N ) THEN + CALL ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO ) + ELSE + CALL ZSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NBMAX, INFO ) + END IF + RETURN +* +* End of ZSYTRI2 +* + END diff --git a/dspl/liblapack/SRC/zsytri2x.f b/dspl/liblapack/SRC/zsytri2x.f new file mode 100644 index 0000000..4feb4d5 --- /dev/null +++ b/dspl/liblapack/SRC/zsytri2x.f @@ -0,0 +1,589 @@ +*> \brief \b ZSYTRI2X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI2X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRI2X computes the inverse of a complex symmetric indefinite matrix +*> A using the factorization A = U*D*U**T or A = L*D*L**T computed by +*> ZSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the NNB diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSYTRF. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the NNB structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3) +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRI2X( UPLO, N, A, LDA, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( N+NB+1,* ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IP, K, CUT, NNB + INTEGER COUNT + INTEGER J, U11, INVD + + COMPLEX*16 AK, AKKP1, AKP1, D, T + COMPLEX*16 U01_I_J, U01_IP1_J + COMPLEX*16 U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSYCONV, XERBLA, ZTRTRI + EXTERNAL ZGEMM, ZTRMM, ZSYSWAPR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI2X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Convert A +* Workspace got Non-diag elements of D +* + CALL ZSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO ) + $ RETURN + END DO + END IF + INFO = 0 +* +* Splitting Workspace +* U01 is a block (N,NB+1) +* The first element of U01 is in WORK(1,1) +* U11 is a block (NB+1,NB+1) +* The first element of U11 is in WORK(N+1,1) + U11 = N +* INVD is a block (N,2) +* The first element of INVD is in WORK(1,INVD) + INVD = NB+2 + + IF( UPPER ) THEN +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = 1/ A( K, K ) + WORK(K,INVD+1) = 0 + K=K+1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K+1,1) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK(K+1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K,INVD) = AKP1 / D + WORK(K+1,INVD+1) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K+1,INVD) = -AKKP1 / D + K=K+2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=N + DO WHILE (CUT .GT. 0) + NNB=NB + IF (CUT .LE. NNB) THEN + NNB=CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1-NNB,CUT + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF + + CUT=CUT-NNB +* +* U01 Block +* + DO I=1,CUT + DO J=1,NNB + WORK(I,J)=A(I,CUT+J) + END DO + END DO +* +* U11 Block +* + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=1,I-1 + WORK(U11+I,J)=ZERO + END DO + DO J=I+1,NNB + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*U01 +* + I=1 + DO WHILE (I .LE. CUT) + IF (IPIV(I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(I,INVD)*WORK(I,J) + END DO + I=I+1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I+1,J) + WORK(I,J)=WORK(I,INVD)*U01_I_J+ + $ WORK(I,INVD+1)*U01_IP1_J + WORK(I+1,J)=WORK(I+1,INVD)*U01_I_J+ + $ WORK(I+1,INVD+1)*U01_IP1_J + END DO + I=I+2 + END IF + END DO +* +* invD1*U11 +* + I=1 + DO WHILE (I .LE. NNB) + IF (IPIV(CUT+I) > 0) THEN + DO J=I,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I+1 + ELSE + DO J=I,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*WORK(U11+I+1,J) + WORK(U11+I+1,J)=WORK(CUT+I+1,INVD)*U11_I_J+ + $ WORK(CUT+I+1,INVD+1)*U11_IP1_J + END DO + I=I+2 + END IF + END DO +* +* U11**T*invD1*U11->U11 +* + CALL ZTRMM('L','U','T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* +* U01**T*invD*U01->A(CUT+I,CUT+J) +* + CALL ZGEMM('T','N',NNB,NNB,CUT,ONE,A(1,CUT+1),LDA, + $ WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) +* +* U11 = U11**T*invD1*U11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=I,NNB + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T*invD0*U01 +* + CALL ZTRMM('L',UPLO,'T','U',CUT, NNB, + $ ONE,A,LDA,WORK,N+NB+1) + +* +* Update U01 +* + DO I=1,CUT + DO J=1,NNB + A(I,CUT+J)=WORK(I,J) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + I=I+1 + IF ( (I-1) .LT. IP) + $ CALL ZSYSWAPR( UPLO, N, A, LDA, I-1 ,IP ) + IF ( (I-1) .GT. IP) + $ CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I-1 ) + ENDIF + I=I+1 + END DO + ELSE +* +* LOWER... +* +* invA = P * inv(U**T)*inv(D)*inv(U)*P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D)*inv(U) +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK(K,INVD) = 1/ A( K, K ) + WORK(K,INVD+1) = 0 + K=K-1 + ELSE +* 2 x 2 diagonal NNB + T = WORK(K-1,1) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK(K-1,1) / T + D = T*( AK*AKP1-ONE ) + WORK(K-1,INVD) = AKP1 / D + WORK(K,INVD) = AK / D + WORK(K,INVD+1) = -AKKP1 / D + WORK(K-1,INVD+1) = -AKKP1 / D + K=K-2 + END IF + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T)*inv(D)*inv(U) +* + CUT=0 + DO WHILE (CUT .LT. N) + NNB=NB + IF (CUT + NNB .GE. N) THEN + NNB=N-CUT + ELSE + COUNT = 0 +* count negative elements, + DO I=CUT+1,CUT+NNB + IF (IPIV(I) .LT. 0) COUNT=COUNT+1 + END DO +* need a even number for a clear cut + IF (MOD(COUNT,2) .EQ. 1) NNB=NNB+1 + END IF +* L21 Block + DO I=1,N-CUT-NNB + DO J=1,NNB + WORK(I,J)=A(CUT+NNB+I,CUT+J) + END DO + END DO +* L11 Block + DO I=1,NNB + WORK(U11+I,I)=ONE + DO J=I+1,NNB + WORK(U11+I,J)=ZERO + END DO + DO J=1,I-1 + WORK(U11+I,J)=A(CUT+I,CUT+J) + END DO + END DO +* +* invD*L21 +* + I=N-CUT-NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+NNB+I) > 0) THEN + DO J=1,NNB + WORK(I,J)=WORK(CUT+NNB+I,INVD)*WORK(I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I=I-2 + END IF + END DO +* +* invD1*L11 +* + I=NNB + DO WHILE (I .GE. 1) + IF (IPIV(CUT+I) > 0) THEN + DO J=1,NNB + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + END DO + I=I-1 + ELSE + DO J=1,NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I-1,J) + WORK(U11+I,J)=WORK(CUT+I,INVD)*WORK(U11+I,J) + + $ WORK(CUT+I,INVD+1)*U11_IP1_J + WORK(U11+I-1,J)=WORK(CUT+I-1,INVD+1)*U11_I_J+ + $ WORK(CUT+I-1,INVD)*U11_IP1_J + END DO + I=I-2 + END IF + END DO +* +* L11**T*invD1*L11->L11 +* + CALL ZTRMM('L',UPLO,'T','U',NNB, NNB, + $ ONE,A(CUT+1,CUT+1),LDA,WORK(U11+1,1),N+NB+1) +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO +* + + IF ( (CUT+NNB) .LT. N ) THEN +* +* L21**T*invD2*L21->A(CUT+I,CUT+J) +* + CALL ZGEMM('T','N',NNB,NNB,N-NNB-CUT,ONE,A(CUT+NNB+1,CUT+1) + $ ,LDA,WORK,N+NB+1, ZERO, WORK(U11+1,1), N+NB+1) + +* +* L11 = L11**T*invD1*L11 + U01**T*invD*U01 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=A(CUT+I,CUT+J)+WORK(U11+I,J) + END DO + END DO +* +* U01 = L22**T*invD2*L21 +* + CALL ZTRMM('L',UPLO,'T','U', N-NNB-CUT, NNB, + $ ONE,A(CUT+NNB+1,CUT+NNB+1),LDA,WORK,N+NB+1) + +* Update L21 + DO I=1,N-CUT-NNB + DO J=1,NNB + A(CUT+NNB+I,CUT+J)=WORK(I,J) + END DO + END DO + ELSE +* +* L11 = L11**T*invD1*L11 +* + DO I=1,NNB + DO J=1,I + A(CUT+I,CUT+J)=WORK(U11+I,J) + END DO + END DO + END IF +* +* Next Block +* + CUT=CUT+NNB + END DO +* +* Apply PERMUTATIONS P and P**T: P * inv(U**T)*inv(D)*inv(U) *P**T +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + IP=IPIV(I) + IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I ) + ELSE + IP=-IPIV(I) + IF ( I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF ( I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I ) + I=I-1 + ENDIF + I=I-1 + END DO + END IF +* + RETURN +* +* End of ZSYTRI2X +* + END + diff --git a/dspl/liblapack/SRC/zsytri_3.f b/dspl/liblapack/SRC/zsytri_3.f new file mode 100644 index 0000000..dc38e85 --- /dev/null +++ b/dspl/liblapack/SRC/zsytri_3.f @@ -0,0 +1,248 @@ +*> \brief \b ZSYTRI_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYTRI_3 computes the inverse of a complex symmetric indefinite +*> matrix A using the factorization computed by ZSYTRF_RK or ZSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> ZSYTRI_3 sets the leading dimension of the workspace before calling +*> ZSYTRI_3X that actually computes the inverse. This is the blocked +*> version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by ZSYTRF_RK and ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_RK or ZSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1)*(NB+3). +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of WORK. LWORK >= (N+NB+1)*(NB+3). +*> +*> If LDWORK = -1, then a workspace query is assumed; +*> the routine only calculates the optimal size of the optimal +*> size of the WORK array, returns this value as the first +*> entry of the WORK array, and no error message related to +*> LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> November 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRI_3( UPLO, N, A, LDA, E, IPIV, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL UPPER, LQUERY + INTEGER LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL ZSYTRI_3X, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* Determine the block size +* + NB = MAX( 1, ILAENV( 1, 'ZSYTRI_3', UPLO, N, -1, -1, -1 ) ) + LWKOPT = ( N+NB+1 ) * ( NB+3 ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF ( LWORK .LT. LWKOPT .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI_3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + CALL ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZSYTRI_3 +* + END diff --git a/dspl/liblapack/SRC/zsytri_3x.f b/dspl/liblapack/SRC/zsytri_3x.f new file mode 100644 index 0000000..a943929 --- /dev/null +++ b/dspl/liblapack/SRC/zsytri_3x.f @@ -0,0 +1,647 @@ +*> \brief \b ZSYTRI_3X +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI_3X + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYTRI_3X computes the inverse of a complex symmetric indefinite +*> matrix A using the factorization computed by ZSYTRF_RK or ZSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This is the blocked version of the algorithm, calling Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix. +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, diagonal of the block diagonal matrix D and +*> factors U or L as computed by ZSYTRF_RK and ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> +*> On exit, if INFO = 0, the symmetric inverse of the original +*> matrix. +*> If UPLO = 'U': the upper triangular part of the inverse +*> is formed and the part of A below the diagonal is not +*> referenced; +*> If UPLO = 'L': the lower triangular part of the inverse +*> is formed and the part of A above the diagonal is not +*> referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_RK or ZSYTRF_BK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3). +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> Block size. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N, NB +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11 + COMPLEX*16 AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J, + $ U11_I_J, U11_IP1_J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZSYSWAPR, ZTRTRI, ZTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF +* +* Quick return if possible +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI_3X', -INFO ) + RETURN + END IF + IF( N.EQ.0 ) + $ RETURN +* +* Workspace got Non-diag elements of D +* + DO K = 1, N + WORK( K, 1 ) = E( K ) + END DO +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + END DO + END IF +* + INFO = 0 +* +* Splitting Workspace +* U01 is a block ( N, NB+1 ) +* The first element of U01 is in WORK( 1, 1 ) +* U11 is a block ( NB+1, NB+1 ) +* The first element of U11 is in WORK( N+1, 1 ) +* + U11 = N +* +* INVD is a block ( N, 2 ) +* The first element of INVD is in WORK( 1, INVD ) +* + INVD = NB + 2 + + IF( UPPER ) THEN +* +* Begin Upper +* +* invA = P * inv(U**T) * inv(D) * inv(U) * P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(U) +* + K = 1 + DO WHILE( K.LE.N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = CONE / A( K, K ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K+1, 1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = WORK( K+1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K, INVD ) = AKP1 / D + WORK( K+1, INVD+1 ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K+1, INVD ) = WORK( K, INVD+1 ) + K = K + 1 + END IF + K = K + 1 + END DO +* +* inv(U**T) = (inv(U))**T +* +* inv(U**T) * inv(D) * inv(U) +* + CUT = N + DO WHILE( CUT.GT.0 ) + NNB = NB + IF( CUT.LE.NNB ) THEN + NNB = CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT+1-NNB, CUT + IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF + + CUT = CUT - NNB +* +* U01 Block +* + DO I = 1, CUT + DO J = 1, NNB + WORK( I, J ) = A( I, CUT+J ) + END DO + END DO +* +* U11 Block +* + DO I = 1, NNB + WORK( U11+I, I ) = CONE + DO J = 1, I-1 + WORK( U11+I, J ) = CZERO + END DO + DO J = I+1, NNB + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD * U01 +* + I = 1 + DO WHILE( I.LE.CUT ) + IF( IPIV( I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( I, INVD ) * WORK( I, J ) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK( I, J ) + U01_IP1_J = WORK( I+1, J ) + WORK( I, J ) = WORK( I, INVD ) * U01_I_J + $ + WORK( I, INVD+1 ) * U01_IP1_J + WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J + $ + WORK( I+1, INVD+1 ) * U01_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* invD1 * U11 +* + I = 1 + DO WHILE ( I.LE.NNB ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = I, NNB + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + END DO + ELSE + DO J = I, NNB + U11_I_J = WORK(U11+I,J) + U11_IP1_J = WORK(U11+I+1,J) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J) + WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J + $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* U11**T * invD1 * U11 -> U11 +* + CALL ZTRMM( 'L', 'U', 'T', 'U', NNB, NNB, + $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* +* U01**T * invD * U01 -> A( CUT+I, CUT+J ) +* + CALL ZGEMM( 'T', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ), + $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1), + $ N+NB+1 ) + +* +* U11 = U11**T * invD1 * U11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = I, NNB + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J) + END DO + END DO +* +* U01 = U00**T * invD0 * U01 +* + CALL ZTRMM( 'L', UPLO, 'T', 'U', CUT, NNB, + $ CONE, A, LDA, WORK, N+NB+1 ) + +* +* Update U01 +* + DO I = 1, CUT + DO J = 1, NNB + A( I, CUT+J ) = WORK( I, J ) + END DO + END DO +* +* Next Block +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(U**T) * inv(D) * inv(U) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Upper case. +* +* ( We can use a loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = 1, N + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* inv A = P * inv(L**T) * inv(D) * inv(L) * P**T. +* + CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO ) +* +* inv(D) and inv(D) * inv(L) +* + K = N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal NNB + WORK( K, INVD ) = CONE / A( K, K ) + WORK( K, INVD+1 ) = CZERO + ELSE +* 2 x 2 diagonal NNB + T = WORK( K-1, 1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = WORK( K-1, 1 ) / T + D = T*( AK*AKP1-CONE ) + WORK( K-1, INVD ) = AKP1 / D + WORK( K, INVD ) = AK / D + WORK( K, INVD+1 ) = -AKKP1 / D + WORK( K-1, INVD+1 ) = WORK( K, INVD+1 ) + K = K - 1 + END IF + K = K - 1 + END DO +* +* inv(L**T) = (inv(L))**T +* +* inv(L**T) * inv(D) * inv(L) +* + CUT = 0 + DO WHILE( CUT.LT.N ) + NNB = NB + IF( (CUT + NNB).GT.N ) THEN + NNB = N - CUT + ELSE + ICOUNT = 0 +* count negative elements, + DO I = CUT + 1, CUT+NNB + IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1 + END DO +* need a even number for a clear cut + IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1 + END IF +* +* L21 Block +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + WORK( I, J ) = A( CUT+NNB+I, CUT+J ) + END DO + END DO +* +* L11 Block +* + DO I = 1, NNB + WORK( U11+I, I) = CONE + DO J = I+1, NNB + WORK( U11+I, J ) = CZERO + END DO + DO J = 1, I-1 + WORK( U11+I, J ) = A( CUT+I, CUT+J ) + END DO + END DO +* +* invD*L21 +* + I = N-CUT-NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+NNB+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J) + END DO + ELSE + DO J = 1, NNB + U01_I_J = WORK(I,J) + U01_IP1_J = WORK(I-1,J) + WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+ + $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J + WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+ + $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* invD1*L11 +* + I = NNB + DO WHILE( I.GE.1 ) + IF( IPIV( CUT+I ).GT.0 ) THEN + DO J = 1, NNB + WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J) + END DO + + ELSE + DO J = 1, NNB + U11_I_J = WORK( U11+I, J ) + U11_IP1_J = WORK( U11+I-1, J ) + WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J) + $ + WORK(CUT+I,INVD+1) * U11_IP1_J + WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J + $ + WORK(CUT+I-1,INVD) * U11_IP1_J + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* L11**T * invD1 * L11 -> L11 +* + CALL ZTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, CONE, + $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ), + $ N+NB+1 ) + +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO +* + IF( (CUT+NNB).LT.N ) THEN +* +* L21**T * invD2*L21 -> A( CUT+I, CUT+J ) +* + CALL ZGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, CONE, + $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1, + $ CZERO, WORK( U11+1, 1 ), N+NB+1 ) + +* +* L11 = L11**T * invD1 * L11 + U01**T * invD * U01 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J) + END DO + END DO +* +* L01 = L22**T * invD2 * L21 +* + CALL ZTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, CONE, + $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK, + $ N+NB+1 ) +* +* Update L21 +* + DO I = 1, N-CUT-NNB + DO J = 1, NNB + A( CUT+NNB+I, CUT+J ) = WORK( I, J ) + END DO + END DO +* + ELSE +* +* L11 = L11**T * invD1 * L11 +* + DO I = 1, NNB + DO J = 1, I + A( CUT+I, CUT+J ) = WORK( U11+I, J ) + END DO + END DO + END IF +* +* Next Block +* + CUT = CUT + NNB +* + END DO +* +* Apply PERMUTATIONS P and P**T: +* P * inv(L**T) * inv(D) * inv(L) * P**T. +* Interchange rows and columns I and IPIV(I) in reverse order +* from the formation order of IPIV vector for Lower case. +* +* ( We can use a loop over IPIV with increment -1, +* since the ABS value of IPIV(I) represents the row (column) +* index of the interchange with row (column) i in both 1x1 +* and 2x2 pivot cases, i.e. we don't need separate code branches +* for 1x1 and 2x2 pivot cases ) +* + DO I = N, 1, -1 + IP = ABS( IPIV( I ) ) + IF( IP.NE.I ) THEN + IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP ) + IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I ) + END IF + END DO +* + END IF +* + RETURN +* +* End of ZSYTRI_3X +* + END + diff --git a/dspl/liblapack/SRC/zsytri_rook.f b/dspl/liblapack/SRC/zsytri_rook.f new file mode 100644 index 0000000..77aba25 --- /dev/null +++ b/dspl/liblapack/SRC/zsytri_rook.f @@ -0,0 +1,451 @@ +*> \brief \b ZSYTRI_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRI_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRI_ROOK computes the inverse of a complex symmetric +*> matrix A using the factorization A = U*D*U**T or A = L*D*L**T +*> computed by ZSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the block diagonal matrix D and the multipliers +*> used to obtain the factor U or L as computed by ZSYTRF_ROOK. +*> +*> On exit, if INFO = 0, the (symmetric) inverse of the original +*> matrix. If UPLO = 'U', the upper triangular part of the +*> inverse is formed and the part of A below the diagonal is not +*> referenced; if UPLO = 'L' the lower triangular part of the +*> inverse is formed and the part of A above the diagonal is +*> not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its +*> inverse could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRI_ROOK( UPLO, N, A, LDA, IPIV, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER K, KP, KSTEP + COMPLEX*16 AK, AKKP1, AKP1, D, T, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTU + EXTERNAL LSAME, ZDOTU +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZSWAP, ZSYMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRI_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check that the diagonal matrix D is nonsingular. +* + IF( UPPER ) THEN +* +* Upper triangular storage: examine D from bottom to top +* + DO 10 INFO = N, 1, -1 + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 10 CONTINUE + ELSE +* +* Lower triangular storage: examine D from top to bottom. +* + DO 20 INFO = 1, N + IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO ) + $ RETURN + 20 CONTINUE + END IF + INFO = 0 +* + IF( UPPER ) THEN +* +* Compute inv(A) from the factorization A = U*D*U**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 30 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 40 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = CONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K+1 ) + AK = A( K, K ) / T + AKP1 = A( K+1, K+1 ) / T + AKKP1 = A( K, K+1 ) / T + D = T*( AK*AKP1-CONE ) + A( K, K ) = AKP1 / D + A( K+1, K+1 ) = AK / D + A( K, K+1 ) = -AKKP1 / D +* +* Compute columns K and K+1 of the inverse. +* + IF( K.GT.1 ) THEN + CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ), + $ 1 ) + A( K, K+1 ) = A( K, K+1 ) - + $ ZDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 ) + CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, CZERO, + $ A( 1, K+1 ), 1 ) + A( K+1, K+1 ) = A( K+1, K+1 ) - + $ ZDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the leading +* submatrix A(1:k+1,1:k+1) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K+1 with -IPIV(K) and +* -IPIV(K+1)in the leading submatrix A(1:k+1,1:k+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K+1 ) + A( K, K+1 ) = A( KP, K+1 ) + A( KP, K+1 ) = TEMP + END IF +* + K = K + 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.GT.1 ) + $ CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 ) + CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K + 1 + GO TO 30 + 40 CONTINUE +* + ELSE +* +* Compute inv(A) from the factorization A = L*D*L**T. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 50 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 60 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Invert the diagonal block. +* + A( K, K ) = CONE / A( K, K ) +* +* Compute column K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + END IF + KSTEP = 1 + ELSE +* +* 2 x 2 diagonal block +* +* Invert the diagonal block. +* + T = A( K, K-1 ) + AK = A( K-1, K-1 ) / T + AKP1 = A( K, K ) / T + AKKP1 = A( K, K-1 ) / T + D = T*( AK*AKP1-CONE ) + A( K-1, K-1 ) = AKP1 / D + A( K, K ) = AK / D + A( K, K-1 ) = -AKKP1 / D +* +* Compute columns K-1 and K of the inverse. +* + IF( K.LT.N ) THEN + CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K ), 1 ) + A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ), + $ 1 ) + A( K, K-1 ) = A( K, K-1 ) - + $ ZDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ), + $ 1 ) + CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 ) + CALL ZSYMV( UPLO, N-K,-CONE, A( K+1, K+1 ), LDA, WORK, 1, + $ CZERO, A( K+1, K-1 ), 1 ) + A( K-1, K-1 ) = A( K-1, K-1 ) - + $ ZDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 ) + END IF + KSTEP = 2 + END IF +* + IF( KSTEP.EQ.1 ) THEN +* +* Interchange rows and columns K and IPIV(K) in the trailing +* submatrix A(k-1:n,k-1:n) +* + KP = IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + ELSE +* +* Interchange rows and columns K and K-1 with -IPIV(K) and +* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) +* + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) +* + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + TEMP = A( K, K-1 ) + A( K, K-1 ) = A( KP, K-1 ) + A( KP, K-1 ) = TEMP + END IF +* + K = K - 1 + KP = -IPIV( K ) + IF( KP.NE.K ) THEN + IF( KP.LT.N ) + $ CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 ) + CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA ) + TEMP = A( K, K ) + A( K, K ) = A( KP, KP ) + A( KP, KP ) = TEMP + END IF + END IF +* + K = K - 1 + GO TO 50 + 60 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRI_ROOK +* + END diff --git a/dspl/liblapack/SRC/zsytrs.f b/dspl/liblapack/SRC/zsytrs.f new file mode 100644 index 0000000..c9fcb4a --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs.f @@ -0,0 +1,445 @@ +*> \brief \b ZSYTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by ZSYTRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K-1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - ONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ), + $ 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, + $ A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB ) +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K+1 and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRS +* + END diff --git a/dspl/liblapack/SRC/zsytrs2.f b/dspl/liblapack/SRC/zsytrs2.f new file mode 100644 index 0000000..c0ee206 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs2.f @@ -0,0 +1,361 @@ +*> \brief \b ZSYTRS2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS2 solves a system of linear equations A*X = B with a real +*> symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by ZSYTRF and converted by ZSYCONV. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF. +*> Note that A is input / output. This might be counter-intuitive, +*> and one may think that A is input only. A is input / output. This +*> is because, at the start of the subroutine, we permute A in a +*> "better" form and then we permute A back to its original form at +*> the end. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS2( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D+0,0.0D+0) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, J, K, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSCAL, ZSYCONV, ZSWAP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Convert A +* + CALL ZSYCONV( UPLO, 'C', N, A, LDA, IPIV, WORK, IINFO ) +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( KP.EQ.-IPIV( K-1 ) ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM('L','U','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I=N + DO WHILE ( I .GE. 1 ) + IF( IPIV(I) .GT. 0 ) THEN + CALL ZSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSEIF ( I .GT. 1) THEN + IF ( IPIV(I-1) .EQ. IPIV(I) ) THEN + AKM1K = WORK(I) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO 15 J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + 15 CONTINUE + I = I - 1 + ENDIF + ENDIF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL ZTRSM('L','U','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K .LT. N .AND. KP.EQ.-IPIV( K+1 ) ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B + K=1 + DO WHILE ( K .LE. N ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K and -IPIV(K+1). + KP = -IPIV( K+1 ) + IF( KP.EQ.-IPIV( K ) ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) + K=K+2 + ENDIF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM('L','L','N','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I=1 + DO WHILE ( I .LE. N ) + IF( IPIV(I) .GT. 0 ) THEN + CALL ZSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE + AKM1K = WORK(I) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO 25 J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 25 CONTINUE + I = I + 1 + ENDIF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL ZTRSM('L','L','T','U',N,NRHS,ONE,A,LDA,B,LDB) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* + K=N + DO WHILE ( K .GE. 1 ) + IF( IPIV( K ).GT.0 ) THEN +* 1 x 1 diagonal block +* Interchange rows K and IPIV(K). + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-1 + ELSE +* 2 x 2 diagonal block +* Interchange rows K-1 and -IPIV(K). + KP = -IPIV( K ) + IF( K.GT.1 .AND. KP.EQ.-IPIV( K-1 ) ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K=K-2 + ENDIF + END DO +* + END IF +* +* Revert A +* + CALL ZSYCONV( UPLO, 'R', N, A, LDA, IPIV, WORK, IINFO ) +* + RETURN +* +* End of ZSYTRS2 +* + END diff --git a/dspl/liblapack/SRC/zsytrs_3.f b/dspl/liblapack/SRC/zsytrs_3.f new file mode 100644 index 0000000..6736875 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs_3.f @@ -0,0 +1,371 @@ +*> \brief \b ZSYTRS_3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> ZSYTRS_3 solves a system of linear equations A * X = B with a complex +*> symmetric matrix A using the factorization computed +*> by ZSYTRF_RK or ZSYTRF_BK: +*> +*> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T), +*> +*> where U (or L) is unit upper (or lower) triangular matrix, +*> U**T (or L**T) is the transpose of U (or L), P is a permutation +*> matrix, P**T is the transpose of P, and D is symmetric and block +*> diagonal with 1-by-1 and 2-by-2 diagonal blocks. +*> +*> This algorithm is using Level 3 BLAS. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are +*> stored as an upper or lower triangular matrix: +*> = 'U': Upper triangular, form is A = P*U*D*(U**T)*(P**T); +*> = 'L': Lower triangular, form is A = P*L*D*(L**T)*(P**T). +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Diagonal of the block diagonal matrix D and factors U or L +*> as computed by ZSYTRF_RK and ZSYTRF_BK: +*> a) ONLY diagonal elements of the symmetric block diagonal +*> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k); +*> (superdiagonal (or subdiagonal) elements of D +*> should be provided on entry in array E), and +*> b) If UPLO = 'U': factor U in the superdiagonal part of A. +*> If UPLO = 'L': factor L in the subdiagonal part of A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (N) +*> On entry, contains the superdiagonal (or subdiagonal) +*> elements of the symmetric block diagonal matrix D +*> with 1-by-1 or 2-by-2 diagonal blocks, where +*> If UPLO = 'U': E(i) = D(i-1,i),i=2:N, E(1) not referenced; +*> If UPLO = 'L': E(i) = D(i+1,i),i=1:N-1, E(N) not referenced. +*> +*> NOTE: For 1-by-1 diagonal block D(k), where +*> 1 <= k <= N, the element E(k) is not referenced in both +*> UPLO = 'U' or UPLO = 'L' cases. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_RK or ZSYTRF_BK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> June 2017, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRS_3( UPLO, N, NRHS, A, LDA, E, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), E( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0,0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, J, K, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZSCAL, ZSWAP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Begin Upper +* +* Solve A*X = B, where A = U*D*U**T. +* +* P**T * B +* +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (U \P**T * B) ] +* + I = N + DO WHILE ( I.GE.1 ) + IF( IPIV( I ).GT.0 ) THEN + CALL ZSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF ( I.GT.1 ) THEN + AKM1K = E( I ) + AKM1 = A( I-1, I-1 ) / AKM1K + AK = A( I, I ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I-1, J ) / AKM1K + BK = B( I, J ) / AKM1K + B( I-1, J ) = ( AK*BKM1-BK ) / DENOM + B( I, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I - 1 + END IF + I = I - 1 + END DO +* +* Compute (U**T \ B) -> B [ U**T \ (D \ (U \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (U**T \ (D \ (U \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Upper case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* + ELSE +* +* Begin Lower +* +* Solve A*X = B, where A = L*D*L**T. +* +* P**T * B +* Interchange rows K and IPIV(K) of matrix B in the same order +* that the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with increment 1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = 1, N, 1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* Compute D \ B -> B [ D \ (L \P**T * B) ] +* + I = 1 + DO WHILE ( I.LE.N ) + IF( IPIV( I ).GT.0 ) THEN + CALL ZSCAL( NRHS, ONE / A( I, I ), B( I, 1 ), LDB ) + ELSE IF( I.LT.N ) THEN + AKM1K = E( I ) + AKM1 = A( I, I ) / AKM1K + AK = A( I+1, I+1 ) / AKM1K + DENOM = AKM1*AK - ONE + DO J = 1, NRHS + BKM1 = B( I, J ) / AKM1K + BK = B( I+1, J ) / AKM1K + B( I, J ) = ( AK*BKM1-BK ) / DENOM + B( I+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + END DO + I = I + 1 + END IF + I = I + 1 + END DO +* +* Compute (L**T \ B) -> B [ L**T \ (D \ (L \P**T * B) ) ] +* + CALL ZTRSM('L', 'L', 'T', 'U', N, NRHS, ONE, A, LDA, B, LDB ) +* +* P * B [ P * (L**T \ (D \ (L \P**T * B) )) ] +* +* Interchange rows K and IPIV(K) of matrix B in reverse order +* from the formation order of IPIV(I) vector for Lower case. +* +* (We can do the simple loop over IPIV with decrement -1, +* since the ABS value of IPIV(I) represents the row index +* of the interchange with row i in both 1x1 and 2x2 pivot cases) +* + DO K = N, 1, -1 + KP = ABS( IPIV( K ) ) + IF( KP.NE.K ) THEN + CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END IF + END DO +* +* END Lower +* + END IF +* + RETURN +* +* End of ZSYTRS_3 +* + END diff --git a/dspl/liblapack/SRC/zsytrs_aa.f b/dspl/liblapack/SRC/zsytrs_aa.f new file mode 100644 index 0000000..e62e9e4 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs_aa.f @@ -0,0 +1,285 @@ +*> \brief \b ZSYTRS_AA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_AA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS_AA solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZSYTRF_AA. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Details of factors computed by ZSYTRF_AA. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by ZSYTRF_AA. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] WORK +*> \verbatim +*> WORK is DOUBLE array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER, LWORK >= MAX(1,3*N-2). +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS_AA( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LDB, LWORK, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER K, KP, LWKOPT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGTSV, ZSWAP, ZLACPY, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.MAX( 1, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_AA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + LWKOPT = (3*N-2) + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (U \P**T * B) -> B [ (U \P**T * B) ] +* + CALL ZTRSM('L', 'U', 'T', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (U \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A( 1, 1 ), LDA+1, WORK( N ), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 1 ), 1 ) + CALL ZLACPY( 'F', 1, N-1, A( 1, 2 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL ZGTSV( N, NRHS, WORK( 1 ), WORK( N ), WORK( 2*N ), B, LDB, + $ INFO ) +* +* Compute (U**T \ B) -> B [ U**T \ (T \ (U \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-1, NRHS, ONE, A( 1, 2 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (U**T \ (T \ (U \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* +* Pivot, P**T * B +* + DO K = 1, N + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZLACPY( 'F', 1, N, A(1, 1), LDA+1, WORK(N), 1) + IF( N.GT.1 ) THEN + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 1 ), 1 ) + CALL ZLACPY( 'F', 1, N-1, A( 2, 1 ), LDA+1, WORK( 2*N ), 1 ) + END IF + CALL ZGTSV( N, NRHS, WORK( 1 ), WORK(N), WORK( 2*N ), B, LDB, + $ INFO) +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'T', 'U', N-1, NRHS, ONE, A( 2, 1 ), LDA, + $ B( 2, 1 ), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + DO K = N, 1, -1 + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + END DO +* + END IF +* + RETURN +* +* End of ZSYTRS_AA +* + END diff --git a/dspl/liblapack/SRC/zsytrs_aa_2stage.f b/dspl/liblapack/SRC/zsytrs_aa_2stage.f new file mode 100644 index 0000000..c5d8947 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs_aa_2stage.f @@ -0,0 +1,281 @@ +*> \brief \b ZSYTRS_AA_2STAGE +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_AA_2STAGE + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, IPIV, +* IPIV2, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ), IPIV2( * ) +* COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS_AA_2STAGE solves a system of linear equations A*X = B with a complex +*> symmetric matrix A using the factorization A = U*T*U**T or +*> A = L*T*L**T computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*T*U**T; +*> = 'L': Lower triangular, form is A = L*T*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> Details of factors computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] TB +*> \verbatim +*> TB is COMPLEX*16 array, dimension (LTB) +*> Details of factors computed by ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] LTB +*> \verbatim +*> The size of the array TB. LTB >= 4*N. +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in] IPIV2 +*> \verbatim +*> IPIV2 is INTEGER array, dimension (N) +*> Details of the interchanges as computed by +*> ZSYTRF_AA_2STAGE. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZSYTRS_AA_2STAGE( UPLO, N, NRHS, A, LDA, TB, LTB, + $ IPIV, IPIV2, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER N, NRHS, LDA, LTB, LDB, INFO +* .. +* .. Array Arguments .. + INTEGER IPIV( * ), IPIV2( * ) + COMPLEX*16 A( LDA, * ), TB( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ) ) +* .. +* .. Local Scalars .. + INTEGER LDTB, NB + LOGICAL UPPER +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGBTRS, ZLASWP, ZTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LTB.LT.( 4*N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_AA_2STAGE', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* +* Read NB and compute LDTB +* + NB = INT( TB( 1 ) ) + LDTB = LTB/N +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*T*U**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (U**T \P**T * B) -> B [ (U**T \P**T * B) ] +* + CALL ZTRSM( 'L', 'U', 'T', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (U**T \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (U \ B) -> B [ U \ (T \ (U**T \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'U', 'N', 'U', N-NB, NRHS, ONE, A(1, NB+1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (U \ (T \ (U**T \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF +* + ELSE +* +* Solve A*X = B, where A = L*T*L**T. +* + IF( N.GT.NB ) THEN +* +* Pivot, P**T * B +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, 1 ) +* +* Compute (L \P**T * B) -> B [ (L \P**T * B) ] +* + CALL ZTRSM( 'L', 'L', 'N', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* + END IF +* +* Compute T \ B -> B [ T \ (L \P**T * B) ] +* + CALL ZGBTRS( 'N', N, NB, NB, NRHS, TB, LDTB, IPIV2, B, LDB, + $ INFO) + IF( N.GT.NB ) THEN +* +* Compute (L**T \ B) -> B [ L**T \ (T \ (L \P**T * B) ) ] +* + CALL ZTRSM( 'L', 'L', 'T', 'U', N-NB, NRHS, ONE, A(NB+1, 1), + $ LDA, B(NB+1, 1), LDB) +* +* Pivot, P * B [ P * (L**T \ (T \ (L \P**T * B) )) ] +* + CALL ZLASWP( NRHS, B, LDB, NB+1, N, IPIV, -1 ) +* + END IF + END IF +* + RETURN +* +* End of ZSYTRS_AA_2STAGE +* + END diff --git a/dspl/liblapack/SRC/zsytrs_rook.f b/dspl/liblapack/SRC/zsytrs_rook.f new file mode 100644 index 0000000..0934336 --- /dev/null +++ b/dspl/liblapack/SRC/zsytrs_rook.f @@ -0,0 +1,484 @@ +*> \brief \b ZSYTRS_ROOK +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSYTRS_ROOK + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSYTRS_ROOK solves a system of linear equations A*X = B with +*> a complex symmetric matrix A using the factorization A = U*D*U**T or +*> A = L*D*L**T computed by ZSYTRF_ROOK. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the details of the factorization are stored +*> as an upper or lower triangular matrix. +*> = 'U': Upper triangular, form is A = U*D*U**T; +*> = 'L': Lower triangular, form is A = L*D*L**T. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The block diagonal matrix D and the multipliers used to +*> obtain the factor U or L as computed by ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> Details of the interchanges and the block structure of D +*> as determined by ZSYTRF_ROOK. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> \verbatim +*> +*> December 2016, Igor Kozachenko, +*> Computer Science Division, +*> University of California, Berkeley +*> +*> September 2007, Sven Hammarling, Nicholas J. Higham, Craig Lucas, +*> School of Mathematics, +*> University of Manchester +*> +*> \endverbatim +* +* ===================================================================== + SUBROUTINE ZSYTRS_ROOK( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER J, K, KP + COMPLEX*16 AK, AKM1, AKM1K, BK, BKM1, DENOM +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERU, ZSCAL, ZSWAP, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSYTRS_ROOK', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Solve A*X = B, where A = U*D*U**T. +* +* First solve U*D*X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 10 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 30 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in column K of A. +* + CALL ZGERU( K-1, NRHS, -CONE, A( 1, K ), 1, B( K, 1 ), LDB, + $ B( 1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, CONE / A( K, K ), B( K, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(U(K)), where U(K) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.GT.2 ) THEN + CALL ZGERU( K-2, NRHS,-CONE, A( 1, K ), 1, B( K, 1 ), + $ LDB, B( 1, 1 ), LDB ) + CALL ZGERU( K-2, NRHS,-CONE, A( 1, K-1 ), 1, B( K-1, 1 ), + $ LDB, B( 1, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K-1, K ) + AKM1 = A( K-1, K-1 ) / AKM1K + AK = A( K, K ) / AKM1K + DENOM = AKM1*AK - CONE + DO 20 J = 1, NRHS + BKM1 = B( K-1, J ) / AKM1K + BK = B( K, J ) / AKM1K + B( K-1, J ) = ( AK*BKM1-BK ) / DENOM + B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM + 20 CONTINUE + K = K - 2 + END IF +* + GO TO 10 + 30 CONTINUE +* +* Next solve U**T *X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 40 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 50 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(U**T(K)), where U(K) is the transformation +* stored in column K of A. +* + IF( K.GT.1 ) + $ CALL ZGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K ), 1, CONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(U**T(K+1)), where U(K+1) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.GT.1 ) THEN + CALL ZGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K ), 1, CONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', K-1, NRHS, -CONE, B, + $ LDB, A( 1, K+1 ), 1, CONE, B( K+1, 1 ), LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1). +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K + 2 + END IF +* + GO TO 40 + 50 CONTINUE +* + ELSE +* +* Solve A*X = B, where A = L*D*L**T. +* +* First solve L*D*X = B, overwriting B with X. +* +* K is the main loop index, increasing from 1 to N in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = 1 + 60 CONTINUE +* +* If K > N, exit from loop. +* + IF( K.GT.N ) + $ GO TO 80 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGERU( N-K, NRHS, -CONE, A( K+1, K ), 1, B( K, 1 ), + $ LDB, B( K+1, 1 ), LDB ) +* +* Multiply by the inverse of the diagonal block. +* + CALL ZSCAL( NRHS, CONE / A( K, K ), B( K, 1 ), LDB ) + K = K + 1 + ELSE +* +* 2 x 2 diagonal block +* +* Interchange rows K and -IPIV(K) THEN K+1 and -IPIV(K+1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K+1 ) + IF( KP.NE.K+1 ) + $ CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB ) +* +* Multiply by inv(L(K)), where L(K) is the transformation +* stored in columns K and K+1 of A. +* + IF( K.LT.N-1 ) THEN + CALL ZGERU( N-K-1, NRHS,-CONE, A( K+2, K ), 1, B( K, 1 ), + $ LDB, B( K+2, 1 ), LDB ) + CALL ZGERU( N-K-1, NRHS,-CONE, A( K+2, K+1 ), 1, + $ B( K+1, 1 ), LDB, B( K+2, 1 ), LDB ) + END IF +* +* Multiply by the inverse of the diagonal block. +* + AKM1K = A( K+1, K ) + AKM1 = A( K, K ) / AKM1K + AK = A( K+1, K+1 ) / AKM1K + DENOM = AKM1*AK - CONE + DO 70 J = 1, NRHS + BKM1 = B( K, J ) / AKM1K + BK = B( K+1, J ) / AKM1K + B( K, J ) = ( AK*BKM1-BK ) / DENOM + B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM + 70 CONTINUE + K = K + 2 + END IF +* + GO TO 60 + 80 CONTINUE +* +* Next solve L**T *X = B, overwriting B with X. +* +* K is the main loop index, decreasing from N to 1 in steps of +* 1 or 2, depending on the size of the diagonal blocks. +* + K = N + 90 CONTINUE +* +* If K < 1, exit from loop. +* + IF( K.LT.1 ) + $ GO TO 100 +* + IF( IPIV( K ).GT.0 ) THEN +* +* 1 x 1 diagonal block +* +* Multiply by inv(L**T(K)), where L(K) is the transformation +* stored in column K of A. +* + IF( K.LT.N ) + $ CALL ZGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, CONE, B( K, 1 ), LDB ) +* +* Interchange rows K and IPIV(K). +* + KP = IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) + K = K - 1 + ELSE +* +* 2 x 2 diagonal block +* +* Multiply by inv(L**T(K-1)), where L(K-1) is the transformation +* stored in columns K-1 and K of A. +* + IF( K.LT.N ) THEN + CALL ZGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K ), 1, CONE, B( K, 1 ), LDB ) + CALL ZGEMV( 'Transpose', N-K, NRHS, -CONE, B( K+1, 1 ), + $ LDB, A( K+1, K-1 ), 1, CONE, B( K-1, 1 ), + $ LDB ) + END IF +* +* Interchange rows K and -IPIV(K) THEN K-1 and -IPIV(K-1) +* + KP = -IPIV( K ) + IF( KP.NE.K ) + $ CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB ) +* + KP = -IPIV( K-1 ) + IF( KP.NE.K-1 ) + $ CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB ) +* + K = K - 2 + END IF +* + GO TO 90 + 100 CONTINUE + END IF +* + RETURN +* +* End of ZSYTRS_ROOK +* + END diff --git a/dspl/liblapack/SRC/ztbcon.f b/dspl/liblapack/SRC/ztbcon.f new file mode 100644 index 0000000..ceff2b9 --- /dev/null +++ b/dspl/liblapack/SRC/ztbcon.f @@ -0,0 +1,291 @@ +*> \brief \b ZTBCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTBCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, KD, LDAB, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTBCON estimates the reciprocal of the condition number of a +*> triangular band matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, KD, LDAB, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AB( LDAB, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTB + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTB +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATBS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( N, 1 ) ) +* +* Compute the 1-norm of the triangular matrix A or A**H. +* + ANORM = ZLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the 1-norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD, + $ AB, LDAB, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL ZLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTBCON +* + END diff --git a/dspl/liblapack/SRC/ztbrfs.f b/dspl/liblapack/SRC/ztbrfs.f new file mode 100644 index 0000000..50d9a57 --- /dev/null +++ b/dspl/liblapack/SRC/ztbrfs.f @@ -0,0 +1,497 @@ +*> \brief \b ZTBRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTBRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTBRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular band +*> coefficient matrix. +*> +*> The solution matrix X must be computed by ZTBTRS or some other +*> means before entering this routine. ZTBRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of the array. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AB( LDAB, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACN2, ZTBMV, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX, MIN +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -12 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = KD + 2 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = MAX( 1, K-KD ), K + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = MAX( 1, K-KD ), K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( KD+1+I-K, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, MIN( N, K+KD ) + RWORK( I ) = RWORK( I ) + + $ CABS1( AB( 1+I-K, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = MAX( 1, K-KD ), K + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = MAX( 1, K-KD ), K - 1 + S = S + CABS1( AB( KD+1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, MIN( N, K+KD ) + S = S + CABS1( AB( 1+I-K, K ) )* + $ CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK, + $ 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTBRFS +* + END diff --git a/dspl/liblapack/SRC/ztbtrs.f b/dspl/liblapack/SRC/ztbtrs.f new file mode 100644 index 0000000..06de7d7 --- /dev/null +++ b/dspl/liblapack/SRC/ztbtrs.f @@ -0,0 +1,244 @@ +*> \brief \b ZTBTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTBTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, +* LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTBTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular band matrix of order N, and B is an +*> N-by-NRHS matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] KD +*> \verbatim +*> KD is INTEGER +*> The number of superdiagonals or subdiagonals of the +*> triangular band matrix A. KD >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AB +*> \verbatim +*> AB is COMPLEX*16 array, dimension (LDAB,N) +*> The upper or lower triangular band matrix A, stored in the +*> first kd+1 rows of AB. The j-th column of A is stored +*> in the j-th column of the array AB as follows: +*> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; +*> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDAB +*> \verbatim +*> LDAB is INTEGER +*> The leading dimension of the array AB. LDAB >= KD+1. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B, + $ LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, KD, LDAB, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AB( LDAB, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTBSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( KD.LT.0 ) THEN + INFO = -5 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -6 + ELSE IF( LDAB.LT.KD+1 ) THEN + INFO = -8 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTBTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + DO 10 INFO = 1, N + IF( AB( KD+1, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + DO 20 INFO = 1, N + IF( AB( 1, INFO ).EQ.ZERO ) + $ RETURN + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * X = B, A**T * X = B, or A**H * X = B. +* + DO 30 J = 1, NRHS + CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of ZTBTRS +* + END diff --git a/dspl/liblapack/SRC/ztfsm.f b/dspl/liblapack/SRC/ztfsm.f new file mode 100644 index 0000000..8d4af93 --- /dev/null +++ b/dspl/liblapack/SRC/ztfsm.f @@ -0,0 +1,1026 @@ +*> \brief \b ZTFSM solves a matrix equation (one operand is a triangular matrix in RFP format). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTFSM + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, +* B, LDB ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO +* INTEGER LDB, M, N +* COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> Level 3 BLAS like routine for A in RFP Format. +*> +*> ZTFSM solves the matrix equation +*> +*> op( A )*X = alpha*B or X*op( A ) = alpha*B +*> +*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or +*> non-unit, upper or lower triangular matrix and op( A ) is one of +*> +*> op( A ) = A or op( A ) = A**H. +*> +*> A is in Rectangular Full Packed (RFP) Format. +*> +*> The matrix X is overwritten on B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal Form of RFP A is stored; +*> = 'C': The Conjugate-transpose Form of RFP A is stored. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether op( A ) appears on the left +*> or right of X as follows: +*> +*> SIDE = 'L' or 'l' op( A )*X = alpha*B. +*> +*> SIDE = 'R' or 'r' X*op( A ) = alpha*B. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the RFP matrix A came from +*> an upper or lower triangular matrix as follows: +*> UPLO = 'U' or 'u' RFP A came from an upper triangular matrix +*> UPLO = 'L' or 'l' RFP A came from a lower triangular matrix +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the form of op( A ) to be used +*> in the matrix multiplication as follows: +*> +*> TRANS = 'N' or 'n' op( A ) = A. +*> +*> TRANS = 'C' or 'c' op( A ) = conjg( A' ). +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not RFP A is unit +*> triangular as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of B. M must be at +*> least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of B. N must be +*> at least zero. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. When alpha is +*> zero then A is not referenced and B need not be set before +*> entry. +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (N*(N+1)/2) +*> NT = N*(N+1)/2. On entry, the matrix A in RFP Format. +*> RFP Format is described by TRANSR, UPLO and N as follows: +*> If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; +*> K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If +*> TRANSR = 'C' then RFP is the Conjugate-transpose of RFP A as +*> defined when TRANSR = 'N'. The contents of RFP A are defined +*> by UPLO as follows: If UPLO = 'U' the RFP A contains the NT +*> elements of upper packed A either in normal or +*> conjugate-transpose Format. If UPLO = 'L' the RFP A contains +*> the NT elements of lower packed A either in normal or +*> conjugate-transpose Format. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'C'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and is N when is odd. +*> See the Note below for more details. Unchanged on exit. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> Before entry, the leading m by n part of the array B must +*> contain the right-hand side matrix B, and on exit is +*> overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> Unchanged on exit. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTFSM( TRANSR, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, + $ B, LDB ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, DIAG, SIDE, TRANS, UPLO + INTEGER LDB, M, N + COMPLEX*16 ALPHA +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: * ), B( 0: LDB-1, 0: * ) +* .. +* +* ===================================================================== +* .. +* .. Parameters .. + COMPLEX*16 CONE, CZERO + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ), + $ CZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LSIDE, MISODD, NISODD, NORMALTRANSR, + $ NOTRANS + INTEGER M1, M2, N1, N2, K, INFO, I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LSIDE = LSAME( SIDE, 'L' ) + LOWER = LSAME( UPLO, 'L' ) + NOTRANS = LSAME( TRANS, 'N' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSIDE .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -3 + ELSE IF( .NOT.NOTRANS .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -4 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTFSM ', -INFO ) + RETURN + END IF +* +* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN +* +* Quick return when ALPHA.EQ.(0D+0,0D+0) +* + IF( ALPHA.EQ.CZERO ) THEN + DO 20 J = 0, N - 1 + DO 10 I = 0, M - 1 + B( I, J ) = CZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* + IF( LSIDE ) THEN +* +* SIDE = 'L' +* +* A is M-by-M. +* If M is odd, set NISODD = .TRUE., and M1 and M2. +* If M is even, NISODD = .FALSE., and M. +* + IF( MOD( M, 2 ).EQ.0 ) THEN + MISODD = .FALSE. + K = M / 2 + ELSE + MISODD = .TRUE. + IF( LOWER ) THEN + M2 = M / 2 + M1 = M - M2 + ELSE + M1 = M / 2 + M2 = M - M1 + END IF + END IF +* + IF( MISODD ) THEN +* +* SIDE = 'L' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL ZTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A, M, B, LDB ) + ELSE + CALL ZTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + CALL ZGEMM( 'N', 'N', M2, N, M1, -CONE, A( M1 ), + $ M, B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL ZTRSM( 'L', 'U', 'C', DIAG, M2, N, CONE, + $ A( M ), M, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'C' +* + IF( M.EQ.1 ) THEN + CALL ZTRSM( 'L', 'L', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M, B, LDB ) + ELSE + CALL ZTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M ), M, B( M1, 0 ), LDB ) + CALL ZGEMM( 'C', 'N', M1, N, M2, -CONE, A( M1 ), + $ M, B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'L', 'C', DIAG, M1, N, CONE, + $ A( 0 ), M, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL ZTRSM( 'L', 'L', 'N', DIAG, M1, N, ALPHA, + $ A( M2 ), M, B, LDB ) + CALL ZGEMM( 'C', 'N', M2, N, M1, -CONE, A( 0 ), M, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL ZTRSM( 'L', 'U', 'C', DIAG, M2, N, CONE, + $ A( M1 ), M, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'C' +* + CALL ZTRSM( 'L', 'U', 'N', DIAG, M2, N, ALPHA, + $ A( M1 ), M, B( M1, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M1, N, M2, -CONE, A( 0 ), M, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'L', 'C', DIAG, M1, N, CONE, + $ A( M2 ), M, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'N' +* + IF( M.EQ.1 ) THEN + CALL ZTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL ZTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + CALL ZGEMM( 'C', 'N', M2, N, M1, -CONE, + $ A( M1*M1 ), M1, B, LDB, ALPHA, + $ B( M1, 0 ), LDB ) + CALL ZTRSM( 'L', 'L', 'N', DIAG, M2, N, CONE, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'C' +* + IF( M.EQ.1 ) THEN + CALL ZTRSM( 'L', 'U', 'N', DIAG, M1, N, ALPHA, + $ A( 0 ), M1, B, LDB ) + ELSE + CALL ZTRSM( 'L', 'L', 'C', DIAG, M2, N, ALPHA, + $ A( 1 ), M1, B( M1, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M1, N, M2, -CONE, + $ A( M1*M1 ), M1, B( M1, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'U', 'N', DIAG, M1, N, CONE, + $ A( 0 ), M1, B, LDB ) + END IF +* + END IF +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'N' +* + CALL ZTRSM( 'L', 'U', 'C', DIAG, M1, N, ALPHA, + $ A( M2*M2 ), M2, B, LDB ) + CALL ZGEMM( 'N', 'N', M2, N, M1, -CONE, A( 0 ), M2, + $ B, LDB, ALPHA, B( M1, 0 ), LDB ) + CALL ZTRSM( 'L', 'L', 'N', DIAG, M2, N, CONE, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'C' +* + CALL ZTRSM( 'L', 'L', 'C', DIAG, M2, N, ALPHA, + $ A( M1*M2 ), M2, B( M1, 0 ), LDB ) + CALL ZGEMM( 'C', 'N', M1, N, M2, -CONE, A( 0 ), M2, + $ B( M1, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'U', 'N', DIAG, M1, N, CONE, + $ A( M2*M2 ), M2, B, LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'L', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL ZTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( 1 ), M+1, B, LDB ) + CALL ZGEMM( 'N', 'N', K, N, K, -CONE, A( K+1 ), + $ M+1, B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL ZTRSM( 'L', 'U', 'C', DIAG, K, N, CONE, + $ A( 0 ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'C' +* + CALL ZTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( 0 ), M+1, B( K, 0 ), LDB ) + CALL ZGEMM( 'C', 'N', K, N, K, -CONE, A( K+1 ), + $ M+1, B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'L', 'C', DIAG, K, N, CONE, + $ A( 1 ), M+1, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL ZTRSM( 'L', 'L', 'N', DIAG, K, N, ALPHA, + $ A( K+1 ), M+1, B, LDB ) + CALL ZGEMM( 'C', 'N', K, N, K, -CONE, A( 0 ), M+1, + $ B, LDB, ALPHA, B( K, 0 ), LDB ) + CALL ZTRSM( 'L', 'U', 'C', DIAG, K, N, CONE, + $ A( K ), M+1, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'C' + CALL ZTRSM( 'L', 'U', 'N', DIAG, K, N, ALPHA, + $ A( K ), M+1, B( K, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', K, N, K, -CONE, A( 0 ), M+1, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'L', 'C', DIAG, K, N, CONE, + $ A( K+1 ), M+1, B, LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'L', N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'N' +* + CALL ZTRSM( 'L', 'U', 'C', DIAG, K, N, ALPHA, + $ A( K ), K, B, LDB ) + CALL ZGEMM( 'C', 'N', K, N, K, -CONE, + $ A( K*( K+1 ) ), K, B, LDB, ALPHA, + $ B( K, 0 ), LDB ) + CALL ZTRSM( 'L', 'L', 'N', DIAG, K, N, CONE, + $ A( 0 ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'C' +* + CALL ZTRSM( 'L', 'L', 'C', DIAG, K, N, ALPHA, + $ A( 0 ), K, B( K, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', K, N, K, -CONE, + $ A( K*( K+1 ) ), K, B( K, 0 ), LDB, + $ ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'U', 'N', DIAG, K, N, CONE, + $ A( K ), K, B, LDB ) +* + END IF +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( .NOT.NOTRANS ) THEN +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'N' +* + CALL ZTRSM( 'L', 'U', 'C', DIAG, K, N, ALPHA, + $ A( K*( K+1 ) ), K, B, LDB ) + CALL ZGEMM( 'N', 'N', K, N, K, -CONE, A( 0 ), K, B, + $ LDB, ALPHA, B( K, 0 ), LDB ) + CALL ZTRSM( 'L', 'L', 'N', DIAG, K, N, CONE, + $ A( K*K ), K, B( K, 0 ), LDB ) +* + ELSE +* +* SIDE ='L', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'C' +* + CALL ZTRSM( 'L', 'L', 'C', DIAG, K, N, ALPHA, + $ A( K*K ), K, B( K, 0 ), LDB ) + CALL ZGEMM( 'C', 'N', K, N, K, -CONE, A( 0 ), K, + $ B( K, 0 ), LDB, ALPHA, B, LDB ) + CALL ZTRSM( 'L', 'U', 'N', DIAG, K, N, CONE, + $ A( K*( K+1 ) ), K, B, LDB ) +* + END IF +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' +* +* A is N-by-N. +* If N is odd, set NISODD = .TRUE., and N1 and N2. +* If N is even, NISODD = .FALSE., and K. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + NISODD = .FALSE. + K = N / 2 + ELSE + NISODD = .TRUE. + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF + END IF +* + IF( NISODD ) THEN +* +* SIDE = 'R' and N is odd +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is odd, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'N' +* + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, N2, ALPHA, + $ A( N ), N, B( 0, N1 ), LDB ) + CALL ZGEMM( 'N', 'N', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, 0 ), + $ LDB ) + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, N1, CONE, + $ A( 0 ), N, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and +* TRANS = 'C' +* + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, N1, ALPHA, + $ A( 0 ), N, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'C', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( N1 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, N2, CONE, + $ A( N ), N, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'N' +* + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, N1, ALPHA, + $ A( N2 ), N, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, N1 ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, N2, CONE, + $ A( N1 ), N, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and +* TRANS = 'C' +* + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, N2, ALPHA, + $ A( N1 ), N, B( 0, N1 ), LDB ) + CALL ZGEMM( 'N', 'C', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( 0 ), N, ALPHA, B( 0, 0 ), LDB ) + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, N1, CONE, + $ A( N2 ), N, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is odd, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'N' +* + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( 1 ), N1, B( 0, N1 ), LDB ) + CALL ZGEMM( 'N', 'C', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, N1, CONE, + $ A( 0 ), N1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'L', and +* TRANS = 'C' +* + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( 0 ), N1, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( N1*N1 ), N1, ALPHA, B( 0, N1 ), + $ LDB ) + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, N2, CONE, + $ A( 1 ), N1, B( 0, N1 ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'N' +* + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, N1, ALPHA, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'C', M, N2, N1, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, N1 ), + $ LDB ) + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, N2, CONE, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) +* + ELSE +* +* SIDE ='R', N is odd, TRANSR = 'C', UPLO = 'U', and +* TRANS = 'C' +* + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, N2, ALPHA, + $ A( N1*N2 ), N2, B( 0, N1 ), LDB ) + CALL ZGEMM( 'N', 'N', M, N1, N2, -CONE, B( 0, N1 ), + $ LDB, A( 0 ), N2, ALPHA, B( 0, 0 ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, N1, CONE, + $ A( N2*N2 ), N2, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R' and N is even +* + IF( NORMALTRANSR ) THEN +* +* SIDE = 'R', N is even, and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'N' +* + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, K, ALPHA, + $ A( 0 ), N+1, B( 0, K ), LDB ) + CALL ZGEMM( 'N', 'N', M, K, K, -CONE, B( 0, K ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, K, CONE, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', +* and TRANS = 'C' +* + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, K, ALPHA, + $ A( 1 ), N+1, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'C', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( K+1 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, K, CONE, + $ A( 0 ), N+1, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'N' +* + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, K, ALPHA, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, K ), + $ LDB ) + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, K, CONE, + $ A( K ), N+1, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', +* and TRANS = 'C' +* + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, K, ALPHA, + $ A( K ), N+1, B( 0, K ), LDB ) + CALL ZGEMM( 'N', 'C', M, K, K, -CONE, B( 0, K ), + $ LDB, A( 0 ), N+1, ALPHA, B( 0, 0 ), + $ LDB ) + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, K, CONE, + $ A( K+1 ), N+1, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + ELSE +* +* SIDE = 'R', N is even, and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', and UPLO = 'L' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'N' +* + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( 0 ), K, B( 0, K ), LDB ) + CALL ZGEMM( 'N', 'C', M, K, K, -CONE, B( 0, K ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, 0 ), LDB ) + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, K, CONE, + $ A( K ), K, B( 0, 0 ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'L', +* and TRANS = 'C' +* + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( K ), K, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'N', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( ( K+1 )*K ), K, ALPHA, + $ B( 0, K ), LDB ) + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, K, CONE, + $ A( 0 ), K, B( 0, K ), LDB ) +* + END IF +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', and UPLO = 'U' +* + IF( NOTRANS ) THEN +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'N' +* + CALL ZTRSM( 'R', 'U', 'N', DIAG, M, K, ALPHA, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) + CALL ZGEMM( 'N', 'C', M, K, K, -CONE, B( 0, 0 ), + $ LDB, A( 0 ), K, ALPHA, B( 0, K ), LDB ) + CALL ZTRSM( 'R', 'L', 'C', DIAG, M, K, CONE, + $ A( K*K ), K, B( 0, K ), LDB ) +* + ELSE +* +* SIDE ='R', N is even, TRANSR = 'C', UPLO = 'U', +* and TRANS = 'C' +* + CALL ZTRSM( 'R', 'L', 'N', DIAG, M, K, ALPHA, + $ A( K*K ), K, B( 0, K ), LDB ) + CALL ZGEMM( 'N', 'N', M, K, K, -CONE, B( 0, K ), + $ LDB, A( 0 ), K, ALPHA, B( 0, 0 ), LDB ) + CALL ZTRSM( 'R', 'U', 'C', DIAG, M, K, CONE, + $ A( ( K+1 )*K ), K, B( 0, 0 ), LDB ) +* + END IF +* + END IF +* + END IF +* + END IF + END IF +* + RETURN +* +* End of ZTFSM +* + END diff --git a/dspl/liblapack/SRC/ztftri.f b/dspl/liblapack/SRC/ztftri.f new file mode 100644 index 0000000..4bc5cfe --- /dev/null +++ b/dspl/liblapack/SRC/ztftri.f @@ -0,0 +1,492 @@ +*> \brief \b ZTFTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTFTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO, DIAG +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTFTRI computes the inverse of a triangular matrix A stored in RFP +*> format. +*> +*> This is a Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': The Normal TRANSR of RFP A is stored; +*> = 'C': The Conjugate-transpose TRANSR of RFP A is stored. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( N*(N+1)/2 ); +*> On entry, the triangular matrix A in RFP format. RFP format +*> is described by TRANSR, UPLO, and N as follows: If TRANSR = +*> 'N' then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is +*> (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'C' then RFP is +*> the Conjugate-transpose of RFP A as defined when +*> TRANSR = 'N'. The contents of RFP A are defined by UPLO as +*> follows: If UPLO = 'U' the RFP A contains the nt elements of +*> upper packed A; If UPLO = 'L' the RFP A contains the nt +*> elements of lower packed A. The LDA of RFP A is (N+1)/2 when +*> TRANSR = 'C'. When TRANSR is 'N' the LDA is N+1 when N is +*> even and N is odd. See the Note below for more details. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTFTRI( TRANSR, UPLO, DIAG, N, A, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO, DIAG + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRMM, ZTRTRI +* .. +* .. Intrinsic Functions .. + INTRINSIC MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( DIAG, 'N' ) .AND. .NOT.LSAME( DIAG, 'U' ) ) + $ THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTFTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + ELSE + NISODD = .TRUE. + END IF +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1) +* + CALL ZTRTRI( 'L', DIAG, N1, A( 0 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'L', 'N', DIAG, N2, N1, -CONE, A( 0 ), + $ N, A( N1 ), N ) + CALL ZTRTRI( 'U', DIAG, N2, A( N ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'U', 'C', DIAG, N2, N1, CONE, A( N ), N, + $ A( N1 ), N ) +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + CALL ZTRTRI( 'L', DIAG, N1, A( N2 ), N, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'L', 'C', DIAG, N1, N2, -CONE, A( N2 ), + $ N, A( 0 ), N ) + CALL ZTRTRI( 'U', DIAG, N2, A( N1 ), N, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'U', 'N', DIAG, N1, N2, CONE, A( N1 ), + $ N, A( 0 ), N ) +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) +* + CALL ZTRTRI( 'U', DIAG, N1, A( 0 ), N1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'U', 'N', DIAG, N1, N2, -CONE, A( 0 ), + $ N1, A( N1*N1 ), N1 ) + CALL ZTRTRI( 'L', DIAG, N2, A( 1 ), N1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'L', 'C', DIAG, N1, N2, CONE, A( 1 ), + $ N1, A( N1*N1 ), N1 ) +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) +* + CALL ZTRTRI( 'U', DIAG, N1, A( N2*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'U', 'C', DIAG, N2, N1, -CONE, + $ A( N2*N2 ), N2, A( 0 ), N2 ) + CALL ZTRTRI( 'L', DIAG, N2, A( N1*N2 ), N2, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + N1 + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'L', 'N', DIAG, N2, N1, CONE, + $ A( N1*N2 ), N2, A( 0 ), N2 ) + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + CALL ZTRTRI( 'L', DIAG, K, A( 1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'L', 'N', DIAG, K, K, -CONE, A( 1 ), + $ N+1, A( K+1 ), N+1 ) + CALL ZTRTRI( 'U', DIAG, K, A( 0 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'U', 'C', DIAG, K, K, CONE, A( 0 ), N+1, + $ A( K+1 ), N+1 ) +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + CALL ZTRTRI( 'L', DIAG, K, A( K+1 ), N+1, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'L', 'C', DIAG, K, K, -CONE, A( K+1 ), + $ N+1, A( 0 ), N+1 ) + CALL ZTRTRI( 'U', DIAG, K, A( K ), N+1, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'U', 'N', DIAG, K, K, CONE, A( K ), N+1, + $ A( 0 ), N+1 ) + END IF + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + CALL ZTRTRI( 'U', DIAG, K, A( K ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'U', 'N', DIAG, K, K, -CONE, A( K ), K, + $ A( K*( K+1 ) ), K ) + CALL ZTRTRI( 'L', DIAG, K, A( 0 ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'L', 'C', DIAG, K, K, CONE, A( 0 ), K, + $ A( K*( K+1 ) ), K ) + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + CALL ZTRTRI( 'U', DIAG, K, A( K*( K+1 ) ), K, INFO ) + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'R', 'U', 'C', DIAG, K, K, -CONE, + $ A( K*( K+1 ) ), K, A( 0 ), K ) + CALL ZTRTRI( 'L', DIAG, K, A( K*K ), K, INFO ) + IF( INFO.GT.0 ) + $ INFO = INFO + K + IF( INFO.GT.0 ) + $ RETURN + CALL ZTRMM( 'L', 'L', 'N', DIAG, K, K, CONE, A( K*K ), K, + $ A( 0 ), K ) + END IF + END IF + END IF +* + RETURN +* +* End of ZTFTRI +* + END diff --git a/dspl/liblapack/SRC/ztfttp.f b/dspl/liblapack/SRC/ztfttp.f new file mode 100644 index 0000000..f402ca1 --- /dev/null +++ b/dspl/liblapack/SRC/ztfttp.f @@ -0,0 +1,543 @@ +*> \brief \b ZTFTTP copies a triangular matrix from the rectangular full packed format (TF) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTFTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTFTTP copies a triangular matrix A from rectangular full packed +*> format (TF) to standard packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'C': ARF is in Conjugate-transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTFTTP( TRANSR, UPLO, N, ARF, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Intrinsic Functions .. +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTFTTP', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + AP( 0 ) = ARF( 0 ) + ELSE + AP( 0 ) = DCONJG( ARF( 0 ) ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + AP( IJP ) = ARF( IJ ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + AP( IJP ) = DCONJG( ARF( IJ ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZTFTTP +* + END diff --git a/dspl/liblapack/SRC/ztfttr.f b/dspl/liblapack/SRC/ztfttr.f new file mode 100644 index 0000000..39c7e54 --- /dev/null +++ b/dspl/liblapack/SRC/ztfttr.f @@ -0,0 +1,538 @@ +*> \brief \b ZTFTTR copies a triangular matrix from the rectangular full packed format (TF) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTFTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTFTTR copies a triangular matrix A from rectangular full packed +*> format (TF) to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF is in Normal format; +*> = 'C': ARF is in Conjugate-transpose format; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] ARF +*> \verbatim +*> ARF is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTFTTR( TRANSR, UPLO, N, ARF, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT, NX2, NP1X2 + INTEGER I, J, L, IJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTFTTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + A( 0, 0 ) = ARF( 0 ) + ELSE + A( 0, 0 ) = DCONJG( ARF( 0 ) ) + END IF + END IF + RETURN + END IF +* +* Size of array ARF(1:2,0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda=n +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + A( N2+J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0); lda=n +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + A( J-N1, L ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> A(0+0) , T2 -> A(1+0) , S -> A(0+n1*n1); lda=n1 +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + A( I, N1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> A(n2*n2), T2 -> A(n1*n2), S -> A(0); lda = n2 +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + A( N2+J, L ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1); lda=n+1 +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + A( K+J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0); lda=n+1 +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + A( J-K, L ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,1) , T2 -> A(0,0) , S -> A(0,k+1) : +* T1 -> A(0+k) , T2 -> A(0+0) , S -> A(0+k*(k+1)); lda=k +* + IJ = 0 + J = K + DO I = K, N - 1 + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + A( I, K+1+J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,k+1) , T2 -> A(0,k) , S -> A(0,0) +* T1 -> A(0+k*(k+1)) , T2 -> A(0+k*k) , S -> A(0+0)); lda=k +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + A( J, I ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + A( K+1+J, L ) = DCONJG( ARF( IJ ) ) + IJ = IJ + 1 + END DO + END DO +* +* Note that here J = K-1 +* + DO I = 0, J + A( I, J ) = ARF( IJ ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZTFTTR +* + END diff --git a/dspl/liblapack/SRC/ztgevc.f b/dspl/liblapack/SRC/ztgevc.f new file mode 100644 index 0000000..447fc1a --- /dev/null +++ b/dspl/liblapack/SRC/ztgevc.f @@ -0,0 +1,737 @@ +*> \brief \b ZTGEVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGEVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, +* LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 P( LDP, * ), S( LDS, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGEVC computes some or all of the right and/or left eigenvectors of +*> a pair of complex matrices (S,P), where S and P are upper triangular. +*> Matrix pairs of this type are produced by the generalized Schur +*> factorization of a complex matrix pair (A,B): +*> +*> A = Q*S*Z**H, B = Q*P*Z**H +*> +*> as computed by ZGGHRD + ZHGEQZ. +*> +*> The right eigenvector x and the left eigenvector y of (S,P) +*> corresponding to an eigenvalue w are defined by: +*> +*> S*x = w*P*x, (y**H)*S = w*(y**H)*P, +*> +*> where y**H denotes the conjugate tranpose of y. +*> The eigenvalues are not input to this routine, but are computed +*> directly from the diagonal elements of S and P. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of (S,P), or the products Z*X and/or Q*Y, +*> where Z and Q are input matrices. +*> If Q and Z are the unitary factors from the generalized Schur +*> factorization of a matrix pair (A,B), then Z*X and Q*Y +*> are the matrices of right and left eigenvectors of (A,B). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed by the matrices in VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> specified by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY='S', SELECT specifies the eigenvectors to be +*> computed. The eigenvector corresponding to the j-th +*> eigenvalue is computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices S and P. N >= 0. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is COMPLEX*16 array, dimension (LDS,N) +*> The upper triangular matrix S from a generalized Schur +*> factorization, as computed by ZHGEQZ. +*> \endverbatim +*> +*> \param[in] LDS +*> \verbatim +*> LDS is INTEGER +*> The leading dimension of array S. LDS >= max(1,N). +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is COMPLEX*16 array, dimension (LDP,N) +*> The upper triangular matrix P from a generalized Schur +*> factorization, as computed by ZHGEQZ. P must have real +*> diagonal elements. +*> \endverbatim +*> +*> \param[in] LDP +*> \verbatim +*> LDP is INTEGER +*> The leading dimension of array P. LDP >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q +*> of left Schur vectors returned by ZHGEQZ). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VL, in the same order as their eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'l' or 'B' or 'b', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Z +*> of right Schur vectors returned by ZHGEQZ). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of (S,P); +*> if HOWMNY = 'B', the matrix Z*X; +*> if HOWMNY = 'S', the right eigenvectors of (S,P) specified by +*> SELECT, stored consecutively in the columns of +*> VR, in the same order as their eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected eigenvector occupies one column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16GEcomputational +* +* ===================================================================== + SUBROUTINE ZTGEVC( SIDE, HOWMNY, SELECT, N, S, LDS, P, LDP, VL, + $ LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDP, LDS, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 P( LDP, * ), S( LDS, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP, + $ LSA, LSB + INTEGER I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC, + $ J, JE, JR + DOUBLE PRECISION ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG, + $ BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA, + $ SCALE, SMALL, TEMP, ULP, XMAX + COMPLEX*16 BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZLADIV + EXTERNAL LSAME, DLAMCH, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEMV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Statement Functions .. + DOUBLE PRECISION ABS1 +* .. +* .. Statement Function definitions .. + ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) ) +* .. +* .. Executable Statements .. +* +* Decode and Test the input parameters +* + IF( LSAME( HOWMNY, 'A' ) ) THEN + IHWMNY = 1 + ILALL = .TRUE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN + IHWMNY = 2 + ILALL = .FALSE. + ILBACK = .FALSE. + ELSE IF( LSAME( HOWMNY, 'B' ) ) THEN + IHWMNY = 3 + ILALL = .TRUE. + ILBACK = .TRUE. + ELSE + IHWMNY = -1 + END IF +* + IF( LSAME( SIDE, 'R' ) ) THEN + ISIDE = 1 + COMPL = .FALSE. + COMPR = .TRUE. + ELSE IF( LSAME( SIDE, 'L' ) ) THEN + ISIDE = 2 + COMPL = .TRUE. + COMPR = .FALSE. + ELSE IF( LSAME( SIDE, 'B' ) ) THEN + ISIDE = 3 + COMPL = .TRUE. + COMPR = .TRUE. + ELSE + ISIDE = -1 + END IF +* + INFO = 0 + IF( ISIDE.LT.0 ) THEN + INFO = -1 + ELSE IF( IHWMNY.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDS.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDP.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEVC', -INFO ) + RETURN + END IF +* +* Count the number of eigenvectors +* + IF( .NOT.ILALL ) THEN + IM = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ IM = IM + 1 + 10 CONTINUE + ELSE + IM = N + END IF +* +* Check diagonal of B +* + ILBBAD = .FALSE. + DO 20 J = 1, N + IF( DIMAG( P( J, J ) ).NE.ZERO ) + $ ILBBAD = .TRUE. + 20 CONTINUE +* + IF( ILBBAD ) THEN + INFO = -7 + ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN + INFO = -10 + ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN + INFO = -12 + ELSE IF( MM.LT.IM ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEVC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + M = IM + IF( N.EQ.0 ) + $ RETURN +* +* Machine Constants +* + SAFMIN = DLAMCH( 'Safe minimum' ) + BIG = ONE / SAFMIN + CALL DLABAD( SAFMIN, BIG ) + ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' ) + SMALL = SAFMIN*N / ULP + BIG = ONE / SMALL + BIGNUM = ONE / ( SAFMIN*N ) +* +* Compute the 1-norm of each column of the strictly upper triangular +* part of A and B to check for possible overflow in the triangular +* solver. +* + ANORM = ABS1( S( 1, 1 ) ) + BNORM = ABS1( P( 1, 1 ) ) + RWORK( 1 ) = ZERO + RWORK( N+1 ) = ZERO + DO 40 J = 2, N + RWORK( J ) = ZERO + RWORK( N+J ) = ZERO + DO 30 I = 1, J - 1 + RWORK( J ) = RWORK( J ) + ABS1( S( I, J ) ) + RWORK( N+J ) = RWORK( N+J ) + ABS1( P( I, J ) ) + 30 CONTINUE + ANORM = MAX( ANORM, RWORK( J )+ABS1( S( J, J ) ) ) + BNORM = MAX( BNORM, RWORK( N+J )+ABS1( P( J, J ) ) ) + 40 CONTINUE +* + ASCALE = ONE / MAX( ANORM, SAFMIN ) + BSCALE = ONE / MAX( BNORM, SAFMIN ) +* +* Left eigenvectors +* + IF( COMPL ) THEN + IEIG = 0 +* +* Main loop over eigenvalues +* + DO 140 JE = 1, N + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG + 1 +* + IF( ABS1( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( DBLE( P( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 50 JR = 1, N + VL( JR, IEIG ) = CZERO + 50 CONTINUE + VL( IEIG, IEIG ) = CONE + GO TO 140 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* H +* y ( a A - b B ) = 0 +* + TEMP = ONE / MAX( ABS1( S( JE, JE ) )*ASCALE, + $ ABS( DBLE( P( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*DBLE( P( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 60 JR = 1, N + WORK( JR ) = CZERO + 60 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* H +* Triangular solve of (a A - b B) y = 0 +* +* H +* (rowwise in (a A - b B) , or columnwise in a A - b B) +* + DO 100 J = JE + 1, N +* +* Compute +* j-1 +* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) +* k=je +* (Scale if necessary) +* + TEMP = ONE / XMAX + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM* + $ TEMP ) THEN + DO 70 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 70 CONTINUE + XMAX = ONE + END IF + SUMA = CZERO + SUMB = CZERO +* + DO 80 JR = JE, J - 1 + SUMA = SUMA + DCONJG( S( JR, J ) )*WORK( JR ) + SUMB = SUMB + DCONJG( P( JR, J ) )*WORK( JR ) + 80 CONTINUE + SUM = ACOEFF*SUMA - DCONJG( BCOEFF )*SUMB +* +* Form x(j) = - SUM / conjg( a*S(j,j) - b*P(j,j) ) +* +* with scaling and perturbation of the denominator +* + D = DCONJG( ACOEFF*S( J, J )-BCOEFF*P( J, J ) ) + IF( ABS1( D ).LE.DMIN ) + $ D = DCMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( SUM ) + DO 90 JR = JE, J - 1 + WORK( JR ) = TEMP*WORK( JR ) + 90 CONTINUE + XMAX = TEMP*XMAX + SUM = TEMP*SUM + END IF + END IF + WORK( J ) = ZLADIV( -SUM, D ) + XMAX = MAX( XMAX, ABS1( WORK( J ) ) ) + 100 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL ZGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL, + $ WORK( JE ), 1, CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IBEG = 1 + ELSE + ISRC = 1 + IBEG = JE + END IF +* +* Copy and scale eigenvector into column of VL +* + XMAX = ZERO + DO 110 JR = IBEG, N + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 110 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 120 JR = IBEG, N + VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 120 CONTINUE + ELSE + IBEG = N + 1 + END IF +* + DO 130 JR = 1, IBEG - 1 + VL( JR, IEIG ) = CZERO + 130 CONTINUE +* + END IF + 140 CONTINUE + END IF +* +* Right eigenvectors +* + IF( COMPR ) THEN + IEIG = IM + 1 +* +* Main loop over eigenvalues +* + DO 250 JE = N, 1, -1 + IF( ILALL ) THEN + ILCOMP = .TRUE. + ELSE + ILCOMP = SELECT( JE ) + END IF + IF( ILCOMP ) THEN + IEIG = IEIG - 1 +* + IF( ABS1( S( JE, JE ) ).LE.SAFMIN .AND. + $ ABS( DBLE( P( JE, JE ) ) ).LE.SAFMIN ) THEN +* +* Singular matrix pencil -- return unit eigenvector +* + DO 150 JR = 1, N + VR( JR, IEIG ) = CZERO + 150 CONTINUE + VR( IEIG, IEIG ) = CONE + GO TO 250 + END IF +* +* Non-singular eigenvalue: +* Compute coefficients a and b in +* +* ( a A - b B ) x = 0 +* + TEMP = ONE / MAX( ABS1( S( JE, JE ) )*ASCALE, + $ ABS( DBLE( P( JE, JE ) ) )*BSCALE, SAFMIN ) + SALPHA = ( TEMP*S( JE, JE ) )*ASCALE + SBETA = ( TEMP*DBLE( P( JE, JE ) ) )*BSCALE + ACOEFF = SBETA*ASCALE + BCOEFF = SALPHA*BSCALE +* +* Scale to avoid underflow +* + LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL + LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT. + $ SMALL +* + SCALE = ONE + IF( LSA ) + $ SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG ) + IF( LSB ) + $ SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )* + $ MIN( BNORM, BIG ) ) + IF( LSA .OR. LSB ) THEN + SCALE = MIN( SCALE, ONE / + $ ( SAFMIN*MAX( ONE, ABS( ACOEFF ), + $ ABS1( BCOEFF ) ) ) ) + IF( LSA ) THEN + ACOEFF = ASCALE*( SCALE*SBETA ) + ELSE + ACOEFF = SCALE*ACOEFF + END IF + IF( LSB ) THEN + BCOEFF = BSCALE*( SCALE*SALPHA ) + ELSE + BCOEFF = SCALE*BCOEFF + END IF + END IF +* + ACOEFA = ABS( ACOEFF ) + BCOEFA = ABS1( BCOEFF ) + XMAX = ONE + DO 160 JR = 1, N + WORK( JR ) = CZERO + 160 CONTINUE + WORK( JE ) = CONE + DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN ) +* +* Triangular solve of (a A - b B) x = 0 (columnwise) +* +* WORK(1:j-1) contains sums w, +* WORK(j+1:JE) contains x +* + DO 170 JR = 1, JE - 1 + WORK( JR ) = ACOEFF*S( JR, JE ) - BCOEFF*P( JR, JE ) + 170 CONTINUE + WORK( JE ) = CONE +* + DO 210 J = JE - 1, 1, -1 +* +* Form x(j) := - w(j) / d +* with scaling and perturbation of the denominator +* + D = ACOEFF*S( J, J ) - BCOEFF*P( J, J ) + IF( ABS1( D ).LE.DMIN ) + $ D = DCMPLX( DMIN ) +* + IF( ABS1( D ).LT.ONE ) THEN + IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + DO 180 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 180 CONTINUE + END IF + END IF +* + WORK( J ) = ZLADIV( -WORK( J ), D ) +* + IF( J.GT.1 ) THEN +* +* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling +* + IF( ABS1( WORK( J ) ).GT.ONE ) THEN + TEMP = ONE / ABS1( WORK( J ) ) + IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE. + $ BIGNUM*TEMP ) THEN + DO 190 JR = 1, JE + WORK( JR ) = TEMP*WORK( JR ) + 190 CONTINUE + END IF + END IF +* + CA = ACOEFF*WORK( J ) + CB = BCOEFF*WORK( J ) + DO 200 JR = 1, J - 1 + WORK( JR ) = WORK( JR ) + CA*S( JR, J ) - + $ CB*P( JR, J ) + 200 CONTINUE + END IF + 210 CONTINUE +* +* Back transform eigenvector if HOWMNY='B'. +* + IF( ILBACK ) THEN + CALL ZGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1, + $ CZERO, WORK( N+1 ), 1 ) + ISRC = 2 + IEND = N + ELSE + ISRC = 1 + IEND = JE + END IF +* +* Copy and scale eigenvector into column of VR +* + XMAX = ZERO + DO 220 JR = 1, IEND + XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) ) + 220 CONTINUE +* + IF( XMAX.GT.SAFMIN ) THEN + TEMP = ONE / XMAX + DO 230 JR = 1, IEND + VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR ) + 230 CONTINUE + ELSE + IEND = 0 + END IF +* + DO 240 JR = IEND + 1, N + VR( JR, IEIG ) = CZERO + 240 CONTINUE +* + END IF + 250 CONTINUE + END IF +* + RETURN +* +* End of ZTGEVC +* + END diff --git a/dspl/liblapack/SRC/ztgex2.f b/dspl/liblapack/SRC/ztgex2.f new file mode 100644 index 0000000..ba80d71 --- /dev/null +++ b/dspl/liblapack/SRC/ztgex2.f @@ -0,0 +1,367 @@ +*> \brief \b ZTGEX2 swaps adjacent diagonal blocks in an upper (quasi) triangular matrix pair by an unitary equivalence transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGEX2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, J1, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22) +*> in an upper triangular matrix pair (A, B) by an unitary equivalence +*> transformation. +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H +*> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimensions (LDA,N) +*> On entry, the matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimensions (LDB,N) +*> On entry, the matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit, +*> the updated matrix Q. +*> Not referenced if WANTQ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit, +*> the updated matrix Z. +*> Not referenced if WANTZ = .FALSE.. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] J1 +*> \verbatim +*> J1 is INTEGER +*> The index to the first block (A11, B11). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEauxiliary +* +*> \par Further Details: +* ===================== +*> +*> In the current code both weak and strong stability tests are +*> performed. The user can omit the strong stability test by changing +*> the internal logical parameter WANDS to .FALSE.. See ref. [2] for +*> details. +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report UMINF-94.04, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, 1994. Also as LAPACK Working Note 87. To appear in +*> Numerical Algorithms, 1996. +*> +* ===================================================================== + SUBROUTINE ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, J1, INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER INFO, J1, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + DOUBLE PRECISION TWENTY + PARAMETER ( TWENTY = 2.0D+1 ) + INTEGER LDST + PARAMETER ( LDST = 2 ) + LOGICAL WANDS + PARAMETER ( WANDS = .TRUE. ) +* .. +* .. Local Scalars .. + LOGICAL DTRONG, WEAK + INTEGER I, M + DOUBLE PRECISION CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM, + $ THRESH, WS + COMPLEX*16 CDUM, F, G, SQ, SZ +* .. +* .. Local Arrays .. + COMPLEX*16 S( LDST, LDST ), T( LDST, LDST ), WORK( 8 ) +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. External Subroutines .. + EXTERNAL ZLACPY, ZLARTG, ZLASSQ, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, SQRT +* .. +* .. Executable Statements .. +* + INFO = 0 +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN +* + M = LDST + WEAK = .FALSE. + DTRONG = .FALSE. +* +* Make a local copy of selected block in (A, B) +* + CALL ZLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST ) + CALL ZLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST ) +* +* Compute the threshold for testing the acceptance of swapping. +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + SCALE = DBLE( CZERO ) + SUM = DBLE( CONE ) + CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SA = SCALE*SQRT( SUM ) +* +* THRES has been changed from +* THRESH = MAX( TEN*EPS*SA, SMLNUM ) +* to +* THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* on 04/01/10. +* "Bug" reported by Ondra Kamenik, confirmed by Julie Langou, fixed by +* Jim Demmel and Guillaume Revy. See forum post 1783. +* + THRESH = MAX( TWENTY*EPS*SA, SMLNUM ) +* +* Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks +* using Givens rotations and perform the swap tentatively. +* + F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 ) + G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 ) + SA = ABS( S( 2, 2 ) ) + SB = ABS( T( 2, 2 ) ) + CALL ZLARTG( G, F, CZ, SZ, CDUM ) + SZ = -SZ + CALL ZROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, DCONJG( SZ ) ) + CALL ZROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, DCONJG( SZ ) ) + IF( SA.GE.SB ) THEN + CALL ZLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM ) + ELSE + CALL ZLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM ) + END IF + CALL ZROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ ) + CALL ZROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ ) +* +* Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T))) +* + WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) ) + WEAK = WS.LE.THRESH + IF( .NOT.WEAK ) + $ GO TO 20 +* + IF( WANDS ) THEN +* +* Strong stability test: +* F-norm((A-QL**H*S*QR, B-QL**H*T*QR)) <= O(EPS*F-norm((A, B))) +* + CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M ) + CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M ) + CALL ZROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -DCONJG( SZ ) ) + CALL ZROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -DCONJG( SZ ) ) + CALL ZROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ ) + CALL ZROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ ) + DO 10 I = 1, 2 + WORK( I ) = WORK( I ) - A( J1+I-1, J1 ) + WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 ) + WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 ) + WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 ) + 10 CONTINUE + SCALE = DBLE( CZERO ) + SUM = DBLE( CONE ) + CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM ) + SS = SCALE*SQRT( SUM ) + DTRONG = SS.LE.THRESH + IF( .NOT.DTRONG ) + $ GO TO 20 + END IF +* +* If the swap is accepted ("weakly" and "strongly"), apply the +* equivalence transformations to the original matrix pair (A,B) +* + CALL ZROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + CALL ZROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + CALL ZROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ ) + CALL ZROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ ) +* +* Set N1 by N2 (2,1) blocks to 0 +* + A( J1+1, J1 ) = CZERO + B( J1+1, J1 ) = CZERO +* +* Accumulate transformations into Q and Z if requested. +* + IF( WANTZ ) + $ CALL ZROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ, + $ DCONJG( SZ ) ) + IF( WANTQ ) + $ CALL ZROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ, + $ DCONJG( SQ ) ) +* +* Exit with INFO = 0 if swap was successfully performed. +* + RETURN +* +* Exit with INFO = 1 if swap was rejected. +* + 20 CONTINUE + INFO = 1 + RETURN +* +* End of ZTGEX2 +* + END diff --git a/dspl/liblapack/SRC/ztgexc.f b/dspl/liblapack/SRC/ztgexc.f new file mode 100644 index 0000000..7eac54c --- /dev/null +++ b/dspl/liblapack/SRC/ztgexc.f @@ -0,0 +1,300 @@ +*> \brief \b ZTGEXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGEXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, +* LDZ, IFST, ILST, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGEXC reorders the generalized Schur decomposition of a complex +*> matrix pair (A,B), using an unitary equivalence transformation +*> (A, B) := Q * (A, B) * Z**H, so that the diagonal block of (A, B) with +*> row index IFST is moved to row ILST. +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> +*> Optionally, the matrices Q and Z of generalized Schur vectors are +*> updated. +*> +*> Q(in) * A(in) * Z(in)**H = Q(out) * A(out) * Z(out)**H +*> Q(in) * B(in) * Z(in)**H = Q(out) * B(out) * Z(out)**H +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the upper triangular matrix A in the pair (A, B). +*> On exit, the updated matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the upper triangular matrix B in the pair (A, B). +*> On exit, the updated matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., the unitary matrix Q. +*> On exit, the updated matrix Q. +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1; +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., the unitary matrix Z. +*> On exit, the updated matrix Z. +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1; +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[in] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in,out] ILST +*> \verbatim +*> ILST is INTEGER +*> Specify the reordering of the diagonal blocks of (A, B). +*> The block with row index IFST is moved to row ILST, by a +*> sequence of swapping between adjacent blocks. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: if INFO = -i, the i-th argument had an illegal value. +*> =1: The transformed matrix pair (A, B) would be too far +*> from generalized Schur form; the problem is ill- +*> conditioned. (A, B) may have been partially reordered, +*> and ILST points to the first row of the current +*> position of the block being moved. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16GEcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> \n +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> +* ===================================================================== + SUBROUTINE ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, IFST, ILST, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER HERE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTGEX2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Decode and test input arguments. + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -9 + ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN + INFO = -11 + ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN + INFO = -12 + ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN + INFO = -13 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGEXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) + $ RETURN + IF( IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* + HERE = IFST +* + 10 CONTINUE +* +* Swap with next one below +* + CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE + 1 + IF( HERE.LT.ILST ) + $ GO TO 10 + HERE = HERE - 1 + ELSE + HERE = IFST - 1 +* + 20 CONTINUE +* +* Swap with next one above +* + CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ, + $ HERE, INFO ) + IF( INFO.NE.0 ) THEN + ILST = HERE + RETURN + END IF + HERE = HERE - 1 + IF( HERE.GE.ILST ) + $ GO TO 20 + HERE = HERE + 1 + END IF + ILST = HERE + RETURN +* +* End of ZTGEXC +* + END diff --git a/dspl/liblapack/SRC/ztgsen.f b/dspl/liblapack/SRC/ztgsen.f new file mode 100644 index 0000000..cb4afd5 --- /dev/null +++ b/dspl/liblapack/SRC/ztgsen.f @@ -0,0 +1,787 @@ +*> \brief \b ZTGSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, +* ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, +* WORK, LWORK, IWORK, LIWORK, INFO ) +* +* .. Scalar Arguments .. +* LOGICAL WANTQ, WANTZ +* INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, +* $ M, N +* DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION DIF( * ) +* COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), +* $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGSEN reorders the generalized Schur decomposition of a complex +*> matrix pair (A, B) (in terms of an unitary equivalence trans- +*> formation Q**H * (A, B) * Z), so that a selected cluster of eigenvalues +*> appears in the leading diagonal blocks of the pair (A,B). The leading +*> columns of Q and Z form unitary bases of the corresponding left and +*> right eigenspaces (deflating subspaces). (A, B) must be in +*> generalized Schur canonical form, that is, A and B are both upper +*> triangular. +*> +*> ZTGSEN also computes the generalized eigenvalues +*> +*> w(j)= ALPHA(j) / BETA(j) +*> +*> of the reordered matrix pair (A, B). +*> +*> Optionally, the routine computes estimates of reciprocal condition +*> numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11), +*> (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s) +*> between the matrix pairs (A11, B11) and (A22,B22) that correspond to +*> the selected cluster and the eigenvalues outside the cluster, resp., +*> and norms of "projections" onto left and right eigenspaces w.r.t. +*> the selected cluster in the (1,1)-block. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (PL and PR) or the deflating subspaces +*> (Difu and Difl): +*> =0: Only reorder w.r.t. SELECT. No extras. +*> =1: Reciprocal of norms of "projections" onto left and right +*> eigenspaces w.r.t. the selected cluster (PL and PR). +*> =2: Upper bounds on Difu and Difl. F-norm-based estimate +*> (DIF(1:2)). +*> =3: Estimate of Difu and Difl. 1-norm-based estimate +*> (DIF(1:2)). +*> About 5 times as expensive as IJOB = 2. +*> =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic +*> version to get it all. +*> =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above) +*> \endverbatim +*> +*> \param[in] WANTQ +*> \verbatim +*> WANTQ is LOGICAL +*> .TRUE. : update the left transformation matrix Q; +*> .FALSE.: do not update Q. +*> \endverbatim +*> +*> \param[in] WANTZ +*> \verbatim +*> WANTZ is LOGICAL +*> .TRUE. : update the right transformation matrix Z; +*> .FALSE.: do not update Z. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select an eigenvalue w(j), SELECT(j) must be set to +*> .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension(LDA,N) +*> On entry, the upper triangular matrix A, in generalized +*> Schur canonical form. +*> On exit, A is overwritten by the reordered matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension(LDB,N) +*> On entry, the upper triangular matrix B, in generalized +*> Schur canonical form. +*> On exit, B is overwritten by the reordered matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is COMPLEX*16 array, dimension (N) +*> +*> The diagonal elements of A and B, respectively, +*> when the pair (A,B) has been reduced to generalized Schur +*> form. ALPHA(i)/BETA(i) i=1,...,N are the generalized +*> eigenvalues. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if WANTQ = .TRUE., Q is an N-by-N matrix. +*> On exit, Q has been postmultiplied by the left unitary +*> transformation matrix which reorder (A, B); The leading M +*> columns of Q form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTQ = .FALSE., Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1. +*> If WANTQ = .TRUE., LDQ >= N. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ,N) +*> On entry, if WANTZ = .TRUE., Z is an N-by-N matrix. +*> On exit, Z has been postmultiplied by the left unitary +*> transformation matrix which reorder (A, B); The leading M +*> columns of Z form orthonormal bases for the specified pair of +*> left eigenspaces (deflating subspaces). +*> If WANTZ = .FALSE., Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1. +*> If WANTZ = .TRUE., LDZ >= N. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified pair of left and right +*> eigenspaces, (deflating subspaces) 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] PL +*> \verbatim +*> PL is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] PR +*> \verbatim +*> PR is DOUBLE PRECISION +*> +*> If IJOB = 1, 4 or 5, PL, PR are lower bounds on the +*> reciprocal of the norm of "projections" onto left and right +*> eigenspace with respect to the selected cluster. +*> 0 < PL, PR <= 1. +*> If M = 0 or M = N, PL = PR = 1. +*> If IJOB = 0, 2 or 3 PL, PR are not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION array, dimension (2). +*> If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl. +*> If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on +*> Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based +*> estimates of Difu and Difl, computed using reversed +*> communication with ZLACN2. +*> If M = 0 or N, DIF(1:2) = F-norm([A, B]). +*> If IJOB = 0 or 1, DIF is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1 +*> If IJOB = 1, 2 or 4, LWORK >= 2*M*(N-M) +*> If IJOB = 3 or 5, LWORK >= 4*M*(N-M) +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (MAX(1,LIWORK)) +*> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. +*> \endverbatim +*> +*> \param[in] LIWORK +*> \verbatim +*> LIWORK is INTEGER +*> The dimension of the array IWORK. LIWORK >= 1. +*> If IJOB = 1, 2 or 4, LIWORK >= N+2; +*> If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M)); +*> +*> If LIWORK = -1, then a workspace query is assumed; the +*> routine only calculates the optimal size of the IWORK array, +*> returns this value as the first entry of the IWORK array, and +*> no error message related to LIWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: Successful exit. +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> =1: Reordering of (A, B) failed because the transformed +*> matrix pair (A, B) would be too far from generalized +*> Schur form; the problem is very ill-conditioned. +*> (A, B) may have been partially reordered. +*> If requested, 0 is returned in DIF(*), PL and PR. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> ZTGSEN first collects the selected eigenvalues by computing unitary +*> U and W that move them to the top left corner of (A, B). In other +*> words, the selected eigenvalues are the eigenvalues of (A11, B11) in +*> +*> U**H*(A, B)*W = (A11 A12) (B11 B12) n1 +*> ( 0 A22),( 0 B22) n2 +*> n1 n2 n1 n2 +*> +*> where N = n1+n2 and U**H means the conjugate transpose of U. The first +*> n1 columns of U and W span the specified pair of left and right +*> eigenspaces (deflating subspaces) of (A, B). +*> +*> If (A, B) has been obtained from the generalized real Schur +*> decomposition of a matrix pair (C, D) = Q*(A, B)*Z**H, then the +*> reordered generalized Schur form of (C, D) is given by +*> +*> (C, D) = (Q*U)*(U**H *(A, B)*W)*(Z*W)**H, +*> +*> and the first n1 columns of Q*U and Z*W span the corresponding +*> deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.). +*> +*> Note that if the selected eigenvalue is sufficiently ill-conditioned, +*> then its value may differ significantly from its value before +*> reordering. +*> +*> The reciprocal condition numbers of the left and right eigenspaces +*> spanned by the first n1 columns of U and W (or Q*U and Z*W) may +*> be returned in DIF(1:2), corresponding to Difu and Difl, resp. +*> +*> The Difu and Difl are defined as: +*> +*> Difu[(A11, B11), (A22, B22)] = sigma-min( Zu ) +*> and +*> Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)], +*> +*> where sigma-min(Zu) is the smallest singular value of the +*> (2*n1*n2)-by-(2*n1*n2) matrix +*> +*> Zu = [ kron(In2, A11) -kron(A22**H, In1) ] +*> [ kron(In2, B11) -kron(B22**H, In1) ]. +*> +*> Here, Inx is the identity matrix of size nx and A22**H is the +*> conjugate transpose of A22. kron(X, Y) is the Kronecker product between +*> the matrices X and Y. +*> +*> When DIF(2) is small, small changes in (A, B) can cause large changes +*> in the deflating subspace. An approximate (asymptotic) bound on the +*> maximum angular error in the computed deflating subspaces is +*> +*> EPS * norm((A, B)) / DIF(2), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal norm of the projectors on the left and right +*> eigenspaces associated with (A11, B11) may be returned in PL and PR. +*> They are computed as follows. First we compute L and R so that +*> P*(A, B)*Q is block diagonal, where +*> +*> P = ( I -L ) n1 Q = ( I R ) n1 +*> ( 0 I ) n2 and ( 0 I ) n2 +*> n1 n2 n1 n2 +*> +*> and (L, R) is the solution to the generalized Sylvester equation +*> +*> A11*R - L*A22 = -A12 +*> B11*R - L*B22 = -B12 +*> +*> Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2). +*> An approximate (asymptotic) bound on the average absolute error of +*> the selected eigenvalues is +*> +*> EPS * norm((A, B)) / PL. +*> +*> There are also global error bounds which valid for perturbations up +*> to a certain restriction: A lower bound (x) on the smallest +*> F-norm(E,F) for which an eigenvalue of (A11, B11) may move and +*> coalesce with an eigenvalue of (A22, B22) under perturbation (E,F), +*> (i.e. (A + E, B + F), is +*> +*> x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)). +*> +*> An approximate bound on x can be computed from DIF(1:2), PL and PR. +*> +*> If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed +*> (L', R') and unperturbed (L, R) left and right deflating subspaces +*> associated with the selected cluster in the (1,1)-blocks can be +*> bounded as +*> +*> max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2)) +*> max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2)) +*> +*> See LAPACK User's Guide section 4.11 or the following references +*> for more information. +*> +*> Note that if the default method for computing the Frobenius-norm- +*> based estimate DIF is not wanted (see ZLATDF), then the parameter +*> IDIFJB (see below) should be changed from 3 to 4 (routine ZLATDF +*> (IJOB = 2 will be used)). See ZTGSYL for more details. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> \n +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> \n +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, +*> 1996. +*> +* ===================================================================== + SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB, + $ ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF, + $ WORK, LWORK, IWORK, LIWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2016 +* +* .. Scalar Arguments .. + LOGICAL WANTQ, WANTZ + INTEGER IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK, + $ M, N + DOUBLE PRECISION PL, PR +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION DIF( * ) + COMPLEX*16 A( LDA, * ), ALPHA( * ), B( LDB, * ), + $ BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER IDIFJB + PARAMETER ( IDIFJB = 3 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP + INTEGER I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2, + $ N1, N2 + DOUBLE PRECISION DSCALE, DSUM, RDSCAL, SAFMIN + COMPLEX*16 TEMP1, TEMP2 +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACN2, ZLACPY, ZLASSQ, ZSCAL, ZTGEXC, + $ ZTGSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, DCONJG, MAX, SQRT +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + EXTERNAL DLAMCH +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 ) +* + IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -13 + ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSEN', -INFO ) + RETURN + END IF +* + IERR = 0 +* + WANTP = IJOB.EQ.1 .OR. IJOB.GE.4 + WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4 + WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5 + WANTD = WANTD1 .OR. WANTD2 +* +* Set M to the dimension of the specified pair of deflating +* subspaces. +* + M = 0 + IF( .NOT.LQUERY .OR. IJOB.NE.0 ) THEN + DO 10 K = 1, N + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) + IF( K.LT.N ) THEN + IF( SELECT( K ) ) + $ M = M + 1 + ELSE + IF( SELECT( N ) ) + $ M = M + 1 + END IF + 10 CONTINUE + END IF +* + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN + LWMIN = MAX( 1, 2*M*( N-M ) ) + LIWMIN = MAX( 1, N+2 ) + ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN + LWMIN = MAX( 1, 4*M*( N-M ) ) + LIWMIN = MAX( 1, 2*M*( N-M ), N+2 ) + ELSE + LWMIN = 1 + LIWMIN = 1 + END IF +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -21 + ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN + INFO = -23 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTP ) THEN + PL = ONE + PR = ONE + END IF + IF( WANTD ) THEN + DSCALE = ZERO + DSUM = ONE + DO 20 I = 1, N + CALL ZLASSQ( N, A( 1, I ), 1, DSCALE, DSUM ) + CALL ZLASSQ( N, B( 1, I ), 1, DSCALE, DSUM ) + 20 CONTINUE + DIF( 1 ) = DSCALE*SQRT( DSUM ) + DIF( 2 ) = DIF( 1 ) + END IF + GO TO 70 + END IF +* +* Get machine constant +* + SAFMIN = DLAMCH( 'S' ) +* +* Collect the selected blocks at the top-left corner of (A, B). +* + KS = 0 + DO 30 K = 1, N + SWAP = SELECT( K ) + IF( SWAP ) THEN + KS = KS + 1 +* +* Swap the K-th block to position KS. Compute unitary Q +* and Z that will swap adjacent diagonal blocks in (A, B). +* + IF( K.NE.KS ) + $ CALL ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, + $ LDZ, K, KS, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Swap is rejected: exit. +* + INFO = 1 + IF( WANTP ) THEN + PL = ZERO + PR = ZERO + END IF + IF( WANTD ) THEN + DIF( 1 ) = ZERO + DIF( 2 ) = ZERO + END IF + GO TO 70 + END IF + END IF + 30 CONTINUE + IF( WANTP ) THEN +* +* Solve generalized Sylvester equation for R and L: +* A11 * R - L * A22 = A12 +* B11 * R - L * B22 = B12 +* + N1 = M + N2 = N - M + I = N1 + 1 + CALL ZLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 ) + CALL ZLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ), + $ N1 ) + IJB = 0 + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1, + $ DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Estimate the reciprocal of norms of "projections" onto +* left and right eigenspaces +* + RDSCAL = ZERO + DSUM = ONE + CALL ZLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM ) + PL = RDSCAL*SQRT( DSUM ) + IF( PL.EQ.ZERO ) THEN + PL = ONE + ELSE + PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) ) + END IF + RDSCAL = ZERO + DSUM = ONE + CALL ZLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM ) + PR = RDSCAL*SQRT( DSUM ) + IF( PR.EQ.ZERO ) THEN + PR = ONE + ELSE + PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) ) + END IF + END IF + IF( WANTD ) THEN +* +* Compute estimates Difu and Difl. +* + IF( WANTD1 ) THEN + N1 = M + N2 = N - M + I = N1 + 1 + IJB = IDIFJB +* +* Frobenius norm-based Difu estimate. +* + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK, + $ N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), + $ N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) +* +* Frobenius norm-based Difl estimate. +* + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK, + $ N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ), + $ N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ), + $ LWORK-2*N1*N2, IWORK, IERR ) + ELSE +* +* Compute 1-norm-based estimates of Difu and Difl using +* reversed communication with ZLACN2. In each step a +* generalized Sylvester equation or a transposed variant +* is solved. +* + KASE = 0 + N1 = M + N2 = N - M + I = N1 + 1 + IJB = 0 + MN2 = 2*N1*N2 +* +* 1-norm-based estimate of Difu. +* + 40 CONTINUE + CALL ZLACN2( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE, + $ ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL ZTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA, + $ WORK, N1, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 40 + END IF + DIF( 1 ) = DSCALE / DIF( 1 ) +* +* 1-norm-based estimate of Difl. +* + 50 CONTINUE + CALL ZLACN2( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE, + $ ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve generalized Sylvester equation +* + CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B( I, I ), LDB, B, LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + ELSE +* +* Solve the transposed variant. +* + CALL ZTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA, + $ WORK, N2, B, LDB, B( I, I ), LDB, + $ WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ), + $ WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK, + $ IERR ) + END IF + GO TO 50 + END IF + DIF( 2 ) = DSCALE / DIF( 2 ) + END IF + END IF +* +* If B(K,K) is complex, make it real and positive (normalization +* of the generalized Schur form) and Store the generalized +* eigenvalues of reordered pair (A, B) +* + DO 60 K = 1, N + DSCALE = ABS( B( K, K ) ) + IF( DSCALE.GT.SAFMIN ) THEN + TEMP1 = DCONJG( B( K, K ) / DSCALE ) + TEMP2 = B( K, K ) / DSCALE + B( K, K ) = DSCALE + CALL ZSCAL( N-K, TEMP1, B( K, K+1 ), LDB ) + CALL ZSCAL( N-K+1, TEMP1, A( K, K ), LDA ) + IF( WANTQ ) + $ CALL ZSCAL( N, TEMP2, Q( 1, K ), 1 ) + ELSE + B( K, K ) = DCMPLX( ZERO, ZERO ) + END IF +* + ALPHA( K ) = A( K, K ) + BETA( K ) = B( K, K ) +* + 60 CONTINUE +* + 70 CONTINUE +* + WORK( 1 ) = LWMIN + IWORK( 1 ) = LIWMIN +* + RETURN +* +* End of ZTGSEN +* + END diff --git a/dspl/liblapack/SRC/ztgsja.f b/dspl/liblapack/SRC/ztgsja.f new file mode 100644 index 0000000..851f650 --- /dev/null +++ b/dspl/liblapack/SRC/ztgsja.f @@ -0,0 +1,666 @@ +*> \brief \b ZTGSJA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGSJA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, +* LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, +* Q, LDQ, WORK, NCYCLE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBQ, JOBU, JOBV +* INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, +* $ NCYCLE, P +* DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION ALPHA( * ), BETA( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), +* $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGSJA computes the generalized singular value decomposition (GSVD) +*> of two complex upper triangular (or trapezoidal) matrices A and B. +*> +*> On entry, it is assumed that matrices A and B have the following +*> forms, which may be obtained by the preprocessing subroutine ZGGSVP +*> from a general M-by-N matrix A and P-by-N matrix B: +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L >= 0; +*> L ( 0 0 A23 ) +*> M-K-L ( 0 0 0 ) +*> +*> N-K-L K L +*> A = K ( 0 A12 A13 ) if M-K-L < 0; +*> M-K ( 0 0 A23 ) +*> +*> N-K-L K L +*> B = L ( 0 0 B13 ) +*> P-L ( 0 0 0 ) +*> +*> where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular +*> upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, +*> otherwise A23 is (M-K)-by-L upper trapezoidal. +*> +*> On exit, +*> +*> U**H *A*Q = D1*( 0 R ), V**H *B*Q = D2*( 0 R ), +*> +*> where U, V and Q are unitary matrices. +*> R is a nonsingular upper triangular matrix, and D1 +*> and D2 are ``diagonal'' matrices, which are of the following +*> structures: +*> +*> If M-K-L >= 0, +*> +*> K L +*> D1 = K ( I 0 ) +*> L ( 0 C ) +*> M-K-L ( 0 0 ) +*> +*> K L +*> D2 = L ( 0 S ) +*> P-L ( 0 0 ) +*> +*> N-K-L K L +*> ( 0 R ) = K ( 0 R11 R12 ) K +*> L ( 0 0 R22 ) L +*> +*> where +*> +*> C = diag( ALPHA(K+1), ... , ALPHA(K+L) ), +*> S = diag( BETA(K+1), ... , BETA(K+L) ), +*> C**2 + S**2 = I. +*> +*> R is stored in A(1:K+L,N-K-L+1:N) on exit. +*> +*> If M-K-L < 0, +*> +*> K M-K K+L-M +*> D1 = K ( I 0 0 ) +*> M-K ( 0 C 0 ) +*> +*> K M-K K+L-M +*> D2 = M-K ( 0 S 0 ) +*> K+L-M ( 0 0 I ) +*> P-L ( 0 0 0 ) +*> +*> N-K-L K M-K K+L-M +*> ( 0 R ) = K ( 0 R11 R12 R13 ) +*> M-K ( 0 0 R22 R23 ) +*> K+L-M ( 0 0 0 R33 ) +*> +*> where +*> C = diag( ALPHA(K+1), ... , ALPHA(M) ), +*> S = diag( BETA(K+1), ... , BETA(M) ), +*> C**2 + S**2 = I. +*> +*> R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored +*> ( 0 R22 R23 ) +*> in B(M-K+1:L,N+M-K-L+1:N) on exit. +*> +*> The computation of the unitary transformation matrices U, V or Q +*> is optional. These matrices may either be formed explicitly, or they +*> may be postmultiplied into input matrices U1, V1, or Q1. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU +*> \verbatim +*> JOBU is CHARACTER*1 +*> = 'U': U must contain a unitary matrix U1 on entry, and +*> the product U1*U is returned; +*> = 'I': U is initialized to the unit matrix, and the +*> unitary matrix U is returned; +*> = 'N': U is not computed. +*> \endverbatim +*> +*> \param[in] JOBV +*> \verbatim +*> JOBV is CHARACTER*1 +*> = 'V': V must contain a unitary matrix V1 on entry, and +*> the product V1*V is returned; +*> = 'I': V is initialized to the unit matrix, and the +*> unitary matrix V is returned; +*> = 'N': V is not computed. +*> \endverbatim +*> +*> \param[in] JOBQ +*> \verbatim +*> JOBQ is CHARACTER*1 +*> = 'Q': Q must contain a unitary matrix Q1 on entry, and +*> the product Q1*Q is returned; +*> = 'I': Q is initialized to the unit matrix, and the +*> unitary matrix Q is returned; +*> = 'N': Q is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows of the matrix B. P >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> +*> K and L specify the subblocks in the input matrices A and B: +*> A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N) +*> of A and B, whose GSVD is going to be computed by ZTGSJA. +*> See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the M-by-N matrix A. +*> On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular +*> matrix R or part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the P-by-N matrix B. +*> On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains +*> a part of R. See Purpose for details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,P). +*> \endverbatim +*> +*> \param[in] TOLA +*> \verbatim +*> TOLA is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] TOLB +*> \verbatim +*> TOLB is DOUBLE PRECISION +*> +*> TOLA and TOLB are the convergence criteria for the Jacobi- +*> Kogbetliantz iteration procedure. Generally, they are the +*> same as used in the preprocessing step, say +*> TOLA = MAX(M,N)*norm(A)*MAZHEPS, +*> TOLB = MAX(P,N)*norm(B)*MAZHEPS. +*> \endverbatim +*> +*> \param[out] ALPHA +*> \verbatim +*> ALPHA is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION array, dimension (N) +*> +*> On exit, ALPHA and BETA contain the generalized singular +*> value pairs of A and B; +*> ALPHA(1:K) = 1, +*> BETA(1:K) = 0, +*> and if M-K-L >= 0, +*> ALPHA(K+1:K+L) = diag(C), +*> BETA(K+1:K+L) = diag(S), +*> or if M-K-L < 0, +*> ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0 +*> BETA(K+1:M) = S, BETA(M+1:K+L) = 1. +*> Furthermore, if K+L < N, +*> ALPHA(K+L+1:N) = 0 and +*> BETA(K+L+1:N) = 0. +*> \endverbatim +*> +*> \param[in,out] U +*> \verbatim +*> U is COMPLEX*16 array, dimension (LDU,M) +*> On entry, if JOBU = 'U', U must contain a matrix U1 (usually +*> the unitary matrix returned by ZGGSVP). +*> On exit, +*> if JOBU = 'I', U contains the unitary matrix U; +*> if JOBU = 'U', U contains the product U1*U. +*> If JOBU = 'N', U is not referenced. +*> \endverbatim +*> +*> \param[in] LDU +*> \verbatim +*> LDU is INTEGER +*> The leading dimension of the array U. LDU >= max(1,M) if +*> JOBU = 'U'; LDU >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDV,P) +*> On entry, if JOBV = 'V', V must contain a matrix V1 (usually +*> the unitary matrix returned by ZGGSVP). +*> On exit, +*> if JOBV = 'I', V contains the unitary matrix V; +*> if JOBV = 'V', V contains the product V1*V. +*> If JOBV = 'N', V is not referenced. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. LDV >= max(1,P) if +*> JOBV = 'V'; LDV >= 1 otherwise. +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually +*> the unitary matrix returned by ZGGSVP). +*> On exit, +*> if JOBQ = 'I', Q contains the unitary matrix Q; +*> if JOBQ = 'Q', Q contains the product Q1*Q. +*> If JOBQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N) if +*> JOBQ = 'Q'; LDQ >= 1 otherwise. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] NCYCLE +*> \verbatim +*> NCYCLE is INTEGER +*> The number of cycles required for convergence. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> = 1: the procedure does not converge after MAXIT cycles. +*> \endverbatim +* +*> \par Internal Parameters: +* ========================= +*> +*> \verbatim +*> MAXIT INTEGER +*> MAXIT specifies the total loops that the iterative procedure +*> may take. If after MAXIT cycles, the routine fails to +*> converge, we return INFO = 1. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> ZTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce +*> min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L +*> matrix B13 to the form: +*> +*> U1**H *A13*Q1 = C1*R1; V1**H *B13*Q1 = S1*R1, +*> +*> where U1, V1 and Q1 are unitary matrix. +*> C1 and S1 are diagonal matrices satisfying +*> +*> C1**2 + S1**2 = I, +*> +*> and R1 is an L-by-L nonsingular upper triangular matrix. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, + $ LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, + $ Q, LDQ, WORK, NCYCLE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER JOBQ, JOBU, JOBV + INTEGER INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, + $ NCYCLE, P + DOUBLE PRECISION TOLA, TOLB +* .. +* .. Array Arguments .. + DOUBLE PRECISION ALPHA( * ), BETA( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), Q( LDQ, * ), + $ U( LDU, * ), V( LDV, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER MAXIT + PARAMETER ( MAXIT = 40 ) + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. +* + LOGICAL INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV + INTEGER I, J, KCYCLE + DOUBLE PRECISION A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA, + $ RWK, SSMIN + COMPLEX*16 A2, B2, SNQ, SNU, SNV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLARTG, XERBLA, ZCOPY, ZDSCAL, ZLAGS2, ZLAPLL, + $ ZLASET, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCONJG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + INITU = LSAME( JOBU, 'I' ) + WANTU = INITU .OR. LSAME( JOBU, 'U' ) +* + INITV = LSAME( JOBV, 'I' ) + WANTV = INITV .OR. LSAME( JOBV, 'V' ) +* + INITQ = LSAME( JOBQ, 'I' ) + WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' ) +* + INFO = 0 + IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( P.LT.0 ) THEN + INFO = -5 + ELSE IF( N.LT.0 ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDB.LT.MAX( 1, P ) ) THEN + INFO = -12 + ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN + INFO = -18 + ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN + INFO = -20 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -22 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSJA', -INFO ) + RETURN + END IF +* +* Initialize U, V and Q, if necessary +* + IF( INITU ) + $ CALL ZLASET( 'Full', M, M, CZERO, CONE, U, LDU ) + IF( INITV ) + $ CALL ZLASET( 'Full', P, P, CZERO, CONE, V, LDV ) + IF( INITQ ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ ) +* +* Loop until convergence +* + UPPER = .FALSE. + DO 40 KCYCLE = 1, MAXIT +* + UPPER = .NOT.UPPER +* + DO 20 I = 1, L - 1 + DO 10 J = I + 1, L +* + A1 = ZERO + A2 = CZERO + A3 = ZERO + IF( K+I.LE.M ) + $ A1 = DBLE( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A3 = DBLE( A( K+J, N-L+J ) ) +* + B1 = DBLE( B( I, N-L+I ) ) + B3 = DBLE( B( J, N-L+J ) ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A2 = A( K+I, N-L+J ) + B2 = B( I, N-L+J ) + ELSE + IF( K+J.LE.M ) + $ A2 = A( K+J, N-L+I ) + B2 = B( J, N-L+I ) + END IF +* + CALL ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, + $ CSV, SNV, CSQ, SNQ ) +* +* Update (K+I)-th and (K+J)-th rows of matrix A: U**H *A +* + IF( K+J.LE.M ) + $ CALL ZROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ), + $ LDA, CSU, DCONJG( SNU ) ) +* +* Update I-th and J-th rows of matrix B: V**H *B +* + CALL ZROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB, + $ CSV, DCONJG( SNV ) ) +* +* Update (N-L+I)-th and (N-L+J)-th columns of matrices +* A and B: A*Q and B*Q +* + CALL ZROT( MIN( K+L, M ), A( 1, N-L+J ), 1, + $ A( 1, N-L+I ), 1, CSQ, SNQ ) +* + CALL ZROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + IF( UPPER ) THEN + IF( K+I.LE.M ) + $ A( K+I, N-L+J ) = CZERO + B( I, N-L+J ) = CZERO + ELSE + IF( K+J.LE.M ) + $ A( K+J, N-L+I ) = CZERO + B( J, N-L+I ) = CZERO + END IF +* +* Ensure that the diagonal elements of A and B are real. +* + IF( K+I.LE.M ) + $ A( K+I, N-L+I ) = DBLE( A( K+I, N-L+I ) ) + IF( K+J.LE.M ) + $ A( K+J, N-L+J ) = DBLE( A( K+J, N-L+J ) ) + B( I, N-L+I ) = DBLE( B( I, N-L+I ) ) + B( J, N-L+J ) = DBLE( B( J, N-L+J ) ) +* +* Update unitary matrices U, V, Q, if desired. +* + IF( WANTU .AND. K+J.LE.M ) + $ CALL ZROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU, + $ SNU ) +* + IF( WANTV ) + $ CALL ZROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV ) +* + IF( WANTQ ) + $ CALL ZROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ, + $ SNQ ) +* + 10 CONTINUE + 20 CONTINUE +* + IF( .NOT.UPPER ) THEN +* +* The matrices A13 and B13 were lower triangular at the start +* of the cycle, and are now upper triangular. +* +* Convergence test: test the parallelism of the corresponding +* rows of A and B. +* + ERROR = ZERO + DO 30 I = 1, MIN( L, M-K ) + CALL ZCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 ) + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 ) + CALL ZLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN ) + ERROR = MAX( ERROR, SSMIN ) + 30 CONTINUE +* + IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) ) + $ GO TO 50 + END IF +* +* End of cycle loop +* + 40 CONTINUE +* +* The algorithm has not converged after MAXIT cycles. +* + INFO = 1 + GO TO 100 +* + 50 CONTINUE +* +* If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged. +* Compute the generalized singular value pairs (ALPHA, BETA), and +* set the triangular matrix R to array A. +* + DO 60 I = 1, K + ALPHA( I ) = ONE + BETA( I ) = ZERO + 60 CONTINUE +* + DO 70 I = 1, MIN( L, M-K ) +* + A1 = DBLE( A( K+I, N-L+I ) ) + B1 = DBLE( B( I, N-L+I ) ) +* + IF( A1.NE.ZERO ) THEN + GAMMA = B1 / A1 +* + IF( GAMMA.LT.ZERO ) THEN + CALL ZDSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB ) + IF( WANTV ) + $ CALL ZDSCAL( P, -ONE, V( 1, I ), 1 ) + END IF +* + CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ), + $ RWK ) +* + IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN + CALL ZDSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ), + $ LDA ) + ELSE + CALL ZDSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ), + $ LDB ) + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF +* + ELSE +* + ALPHA( K+I ) = ZERO + BETA( K+I ) = ONE + CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ), + $ LDA ) + END IF + 70 CONTINUE +* +* Post-assignment +* + DO 80 I = M + 1, K + L + ALPHA( I ) = ZERO + BETA( I ) = ONE + 80 CONTINUE +* + IF( K+L.LT.N ) THEN + DO 90 I = K + L + 1, N + ALPHA( I ) = ZERO + BETA( I ) = ZERO + 90 CONTINUE + END IF +* + 100 CONTINUE + NCYCLE = KCYCLE +* + RETURN +* +* End of ZTGSJA +* + END diff --git a/dspl/liblapack/SRC/ztgsna.f b/dspl/liblapack/SRC/ztgsna.f new file mode 100644 index 0000000..77cbdcd --- /dev/null +++ b/dspl/liblapack/SRC/ztgsna.f @@ -0,0 +1,519 @@ +*> \brief \b ZTGSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, +* LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* INTEGER IWORK( * ) +* DOUBLE PRECISION DIF( * ), S( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), VL( LDVL, * ), +* $ VR( LDVR, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or eigenvectors of a matrix pair (A, B). +*> +*> (A, B) must be in generalized Schur canonical form, that is, A and +*> B are both upper triangular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (DIF): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (DIF); +*> = 'B': for both eigenvalues and eigenvectors (S and DIF). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the corresponding j-th eigenvalue and/or eigenvector, +*> SELECT(j) must be set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the square matrix pair (A, B). N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The upper triangular matrix A in the pair (A,B). +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> The upper triangular matrix B in the pair (A, B). +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,M) +*> IF JOB = 'E' or 'B', VL must contain left eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VL, as returned by ZTGEVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1; and +*> If JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,M) +*> IF JOB = 'E' or 'B', VR must contain right eigenvectors of +*> (A, B), corresponding to the eigenpairs specified by HOWMNY +*> and SELECT. The eigenvectors must be stored in consecutive +*> columns of VR, as returned by ZTGEVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1; +*> If JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. +*> If the eigenvalues cannot be reordered to compute DIF(j), +*> DIF(j) is set to 0; this can only occur when the true value +*> would be very small anyway. +*> For each eigenvalue/vector specified by SELECT, DIF stores +*> a Frobenius norm-based estimate of Difl. +*> If JOB = 'E', DIF is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S and DIF. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and DIF used to store +*> the specified condition numbers; for each selected eigenvalue +*> one element is used. If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> If JOB = 'V' or 'B', LWORK >= max(1,2*N*N). +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (N+2) +*> If JOB = 'E', IWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: Successful exit +*> < 0: If INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of the i-th generalized +*> eigenvalue w = (a, b) is defined as +*> +*> S(I) = (|v**HAu|**2 + |v**HBu|**2)**(1/2) / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of (A, B) +*> corresponding to w; |z| denotes the absolute value of the complex +*> number, and norm(u) denotes the 2-norm of the vector u. The pair +*> (a, b) corresponds to an eigenvalue w = a/b (= v**HAu/v**HBu) of the +*> matrix pair (A, B). If both a and b equal zero, then (A,B) is +*> singular and S(I) = -1 is returned. +*> +*> An approximate error bound on the chordal distance between the i-th +*> computed generalized eigenvalue w and the corresponding exact +*> eigenvalue lambda is +*> +*> chord(w, lambda) <= EPS * norm(A, B) / S(I), +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> and left eigenvector v corresponding to the generalized eigenvalue w +*> is defined as follows. Suppose +*> +*> (A, B) = ( a * ) ( b * ) 1 +*> ( 0 A22 ),( 0 B22 ) n-1 +*> 1 n-1 1 n-1 +*> +*> Then the reciprocal condition number DIF(I) is +*> +*> Difl[(a, b), (A22, B22)] = sigma-min( Zl ) +*> +*> where sigma-min(Zl) denotes the smallest singular value of +*> +*> Zl = [ kron(a, In-1) -kron(1, A22) ] +*> [ kron(b, In-1) -kron(1, B22) ]. +*> +*> Here In-1 is the identity matrix of size n-1 and X**H is the conjugate +*> transpose of X. kron(X, Y) is the Kronecker product between the +*> matrices X and Y. +*> +*> We approximate the smallest singular value of Zl with an upper +*> bound. This is done by ZLATDF. +*> +*> An approximate error bound for a computed eigenvector VL(i) or +*> VR(i) is given by +*> +*> EPS * norm(A, B) / DIF(i). +*> +*> See ref. [2-3] for more details and further references. +*> \endverbatim +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> \verbatim +*> +*> [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the +*> Generalized Real Schur Form of a Regular Matrix Pair (A, B), in +*> M.S. Moonen et al (eds), Linear Algebra for Large Scale and +*> Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. +*> +*> [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified +*> Eigenvalues of a Regular Matrix Pair (A, B) and Condition +*> Estimation: Theory, Algorithms and Software, Report +*> UMINF - 94.04, Department of Computing Science, Umea University, +*> S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. +*> To appear in Numerical Algorithms, 1996. +*> +*> [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. +*> To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL, + $ LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + INTEGER IWORK( * ) + DOUBLE PRECISION DIF( * ), S( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), VL( LDVL, * ), + $ VR( LDVR, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + INTEGER IDIFJB + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, IDIFJB = 3 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, SOMCON, WANTBH, WANTDF, WANTS + INTEGER I, IERR, IFST, ILST, K, KS, LWMIN, N1, N2 + DOUBLE PRECISION BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM + COMPLEX*16 YHAX, YHBX +* .. +* .. Local Arrays .. + COMPLEX*16 DUMMY( 1 ), DUMMY1( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLAPY2, DZNRM2 + COMPLEX*16 ZDOTC + EXTERNAL LSAME, DLAMCH, DLAPY2, DZNRM2, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZGEMV, ZLACPY, ZTGEXC, ZTGSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DCMPLX, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( WANTS .AND. LDVL.LT.N ) THEN + INFO = -10 + ELSE IF( WANTS .AND. LDVR.LT.N ) THEN + INFO = -12 + ELSE +* +* Set M to the number of eigenpairs for which condition numbers +* are required, and test MM. +* + IF( SOMCON ) THEN + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + IF( N.EQ.0 ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN + LWMIN = 2*N*N + ELSE + LWMIN = N + END IF + WORK( 1 ) = LWMIN +* + IF( MM.LT.M ) THEN + INFO = -15 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -18 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSNA', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + KS = 0 + DO 20 K = 1, N +* +* Determine whether condition numbers are required for the k-th +* eigenpair. +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 20 + END IF +* + KS = KS + 1 +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + RNRM = DZNRM2( N, VR( 1, KS ), 1 ) + LNRM = DZNRM2( N, VL( 1, KS ), 1 ) + CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), A, LDA, + $ VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 ) + YHAX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), B, LDB, + $ VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 ) + YHBX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 ) + COND = DLAPY2( ABS( YHAX ), ABS( YHBX ) ) + IF( COND.EQ.ZERO ) THEN + S( KS ) = -ONE + ELSE + S( KS ) = COND / ( RNRM*LNRM ) + END IF + END IF +* + IF( WANTDF ) THEN + IF( N.EQ.1 ) THEN + DIF( KS ) = DLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) ) + ELSE +* +* Estimate the reciprocal condition number of the k-th +* eigenvectors. +* +* Copy the matrix (A, B) to the array WORK and move the +* (k,k)th pair to the (1,1) position. +* + CALL ZLACPY( 'Full', N, N, A, LDA, WORK, N ) + CALL ZLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N ) + IFST = K + ILST = 1 +* + CALL ZTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), + $ N, DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR ) +* + IF( IERR.GT.0 ) THEN +* +* Ill-conditioned problem - swap rejected. +* + DIF( KS ) = ZERO + ELSE +* +* Reordering successful, solve generalized Sylvester +* equation for R and L, +* A22 * R - L * A11 = A12 +* B22 * R - L * B11 = B12, +* and compute estimate of Difl[(A11,B11), (A22, B22)]. +* + N1 = 1 + N2 = N - N1 + I = N*N + 1 + CALL ZTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), + $ N, WORK, N, WORK( N1+1 ), N, + $ WORK( N*N1+N1+I ), N, WORK( I ), N, + $ WORK( N1+I ), N, SCALE, DIF( KS ), DUMMY, + $ 1, IWORK, IERR ) + END IF + END IF + END IF +* + 20 CONTINUE + WORK( 1 ) = LWMIN + RETURN +* +* End of ZTGSNA +* + END diff --git a/dspl/liblapack/SRC/ztgsy2.f b/dspl/liblapack/SRC/ztgsy2.f new file mode 100644 index 0000000..f89effd --- /dev/null +++ b/dspl/liblapack/SRC/ztgsy2.f @@ -0,0 +1,472 @@ +*> \brief \b ZTGSY2 solves the generalized Sylvester equation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGSY2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N +* DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGSY2 solves the generalized Sylvester equation +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, +*> (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, +*> N-by-N and M-by-N, respectively. A, B, D and E are upper triangular +*> (i.e., (A,D) and (B,E) in generalized Schur form). +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output +*> scaling factor chosen to avoid overflow. +*> +*> In matrix notation solving equation (1) corresponds to solve +*> Zx = scale * b, where Z is defined as +*> +*> Z = [ kron(In, A) -kron(B**H, Im) ] (2) +*> [ kron(In, D) -kron(E**H, Im) ], +*> +*> Ik is the identity matrix of size k and X**H is the conjuguate transpose of X. +*> kron(X, Y) is the Kronecker product between the matrices X and Y. +*> +*> If TRANS = 'C', y in the conjugate transposed system Z**H*y = scale*b +*> is solved for, which is equivalent to solve for R and L in +*> +*> A**H * R + D**H * L = scale * C (3) +*> R * B**H + L * E**H = scale * -F +*> +*> This case is used to compute an estimate of Dif[(A, D), (B, E)] = +*> = sigma_min(Z) using reverse communicaton with ZLACON. +*> +*> ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL +*> of an upper bound on the separation between to matrix pairs. Then +*> the input (A, D), (B, E) are sub-pencils of two matrix pairs in +*> ZTGSYL. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N', solve the generalized Sylvester equation (1). +*> = 'T': solve the 'transposed' system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (look ahead strategy is used). +*> =2: A contribution from this subsystem to a Frobenius +*> norm-based estimate of the separation between two matrix +*> pairs is computed. (DGECON on sub-systems is used.) +*> Not referenced if TRANS = 'T'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the order of A and D, and the row +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of B and E, and the column +*> dimension of C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, M) +*> On entry, A contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> On entry, B contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the matrix B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1). +*> On exit, if IJOB = 0, C has been overwritten by the solution +*> R. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the matrix C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (LDD, M) +*> On entry, D contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the matrix D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (LDE, N) +*> On entry, E contains an upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the matrix E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX*16 array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1). +*> On exit, if IJOB = 0, F has been overwritten by the solution +*> L. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the matrix F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions +*> R and L (C and F on entry) will hold the solutions to a +*> slightly perturbed system but the input matrices A, B, D and +*> E have not been changed. If SCALE = 0, R and L will hold the +*> solutions to the homogeneous system with C = F = 0. +*> Normally, SCALE = 1. +*> \endverbatim +*> +*> \param[in,out] RDSUM +*> \verbatim +*> RDSUM is DOUBLE PRECISION +*> On entry, the sum of squares of computed contributions to +*> the Dif-estimate under computation by ZTGSYL, where the +*> scaling factor RDSCAL (see below) has been factored out. +*> On exit, the corresponding sum of squares updated with the +*> contributions from the current sub-system. +*> If TRANS = 'T' RDSUM is not touched. +*> NOTE: RDSUM only makes sense when ZTGSY2 is called by +*> ZTGSYL. +*> \endverbatim +*> +*> \param[in,out] RDSCAL +*> \verbatim +*> RDSCAL is DOUBLE PRECISION +*> On entry, scaling factor used to prevent overflow in RDSUM. +*> On exit, RDSCAL is updated w.r.t. the current contributions +*> in RDSUM. +*> If TRANS = 'T', RDSCAL is not touched. +*> NOTE: RDSCAL only makes sense when ZTGSY2 is called by +*> ZTGSYL. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> On exit, if INFO is set to +*> =0: Successful exit +*> <0: If INFO = -i, input argument number i is illegal. +*> >0: The matrix pairs (A, D) and (B, E) have common or very +*> close eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYauxiliary +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +* ===================================================================== + SUBROUTINE ZTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL, + $ INFO ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N + DOUBLE PRECISION RDSCAL, RDSUM, SCALE +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + INTEGER LDZ + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, LDZ = 2 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN + INTEGER I, IERR, J, K + DOUBLE PRECISION SCALOC + COMPLEX*16 ALPHA +* .. +* .. Local Arrays .. + INTEGER IPIV( LDZ ), JPIV( LDZ ) + COMPLEX*16 RHS( LDZ ), Z( LDZ, LDZ ) +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZGESC2, ZGETC2, ZLATDF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + IERR = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSY2', -INFO ) + RETURN + END IF +* + IF( NOTRAN ) THEN +* +* Solve (I, J) - system +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = M, M - 1, ..., 1; J = 1, 2, ..., N +* + SCALE = ONE + SCALOC = ONE + DO 30 J = 1, N + DO 20 I = M, 1, -1 +* +* Build 2 by 2 system +* + Z( 1, 1 ) = A( I, I ) + Z( 2, 1 ) = D( I, I ) + Z( 1, 2 ) = -B( J, J ) + Z( 2, 2 ) = -E( J, J ) +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z * x = RHS +* + CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + IF( IJOB.EQ.0 ) THEN + CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 10 K = 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + ELSE + CALL ZLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL, + $ IPIV, JPIV ) + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + IF( I.GT.1 ) THEN + ALPHA = -RHS( 1 ) + CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 ) + CALL ZAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 ) + END IF + IF( J.LT.N ) THEN + CALL ZAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB, + $ C( I, J+1 ), LDC ) + CALL ZAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE, + $ F( I, J+1 ), LDF ) + END IF +* + 20 CONTINUE + 30 CONTINUE + ELSE +* +* Solve transposed (I, J) - system: +* A(I, I)**H * R(I, J) + D(I, I)**H * L(J, J) = C(I, J) +* R(I, I) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1, 2, ..., M, J = N, N - 1, ..., 1 +* + SCALE = ONE + SCALOC = ONE + DO 80 I = 1, M + DO 70 J = N, 1, -1 +* +* Build 2 by 2 system Z**H +* + Z( 1, 1 ) = DCONJG( A( I, I ) ) + Z( 2, 1 ) = -DCONJG( B( J, J ) ) + Z( 1, 2 ) = DCONJG( D( I, I ) ) + Z( 2, 2 ) = -DCONJG( E( J, J ) ) +* +* +* Set up right hand side(s) +* + RHS( 1 ) = C( I, J ) + RHS( 2 ) = F( I, J ) +* +* Solve Z**H * x = RHS +* + CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR ) + IF( IERR.GT.0 ) + $ INFO = IERR + CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC ) + IF( SCALOC.NE.ONE ) THEN + DO 40 K = 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Unpack solution vector(s) +* + C( I, J ) = RHS( 1 ) + F( I, J ) = RHS( 2 ) +* +* Substitute R(I, J) and L(I, J) into remaining equation. +* + DO 50 K = 1, J - 1 + F( I, K ) = F( I, K ) + RHS( 1 )*DCONJG( B( K, J ) ) + + $ RHS( 2 )*DCONJG( E( K, J ) ) + 50 CONTINUE + DO 60 K = I + 1, M + C( K, J ) = C( K, J ) - DCONJG( A( I, K ) )*RHS( 1 ) - + $ DCONJG( D( I, K ) )*RHS( 2 ) + 60 CONTINUE +* + 70 CONTINUE + 80 CONTINUE + END IF + RETURN +* +* End of ZTGSY2 +* + END diff --git a/dspl/liblapack/SRC/ztgsyl.f b/dspl/liblapack/SRC/ztgsyl.f new file mode 100644 index 0000000..2122a09 --- /dev/null +++ b/dspl/liblapack/SRC/ztgsyl.f @@ -0,0 +1,697 @@ +*> \brief \b ZTGSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTGSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, +* LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, +* $ LWORK, M, N +* DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), +* $ D( LDD, * ), E( LDE, * ), F( LDF, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTGSYL solves the generalized Sylvester equation: +*> +*> A * R - L * B = scale * C (1) +*> D * R - L * E = scale * F +*> +*> where R and L are unknown m-by-n matrices, (A, D), (B, E) and +*> (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, +*> respectively, with complex entries. A, B, D and E are upper +*> triangular (i.e., (A,D) and (B,E) in generalized Schur form). +*> +*> The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 +*> is an output scaling factor chosen to avoid overflow. +*> +*> In matrix notation (1) is equivalent to solve Zx = scale*b, where Z +*> is defined as +*> +*> Z = [ kron(In, A) -kron(B**H, Im) ] (2) +*> [ kron(In, D) -kron(E**H, Im) ], +*> +*> Here Ix is the identity matrix of size x and X**H is the conjugate +*> transpose of X. Kron(X, Y) is the Kronecker product between the +*> matrices X and Y. +*> +*> If TRANS = 'C', y in the conjugate transposed system Z**H *y = scale*b +*> is solved for, which is equivalent to solve for R and L in +*> +*> A**H * R + D**H * L = scale * C (3) +*> R * B**H + L * E**H = scale * -F +*> +*> This case (TRANS = 'C') is used to compute an one-norm-based estimate +*> of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) +*> and (B,E), using ZLACON. +*> +*> If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of +*> Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the +*> reciprocal of the smallest singular value of Z. +*> +*> This is a level-3 BLAS algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': solve the generalized sylvester equation (1). +*> = 'C': solve the "conjugate transposed" system (3). +*> \endverbatim +*> +*> \param[in] IJOB +*> \verbatim +*> IJOB is INTEGER +*> Specifies what kind of functionality to be performed. +*> =0: solve (1) only. +*> =1: The functionality of 0 and 3. +*> =2: The functionality of 0 and 4. +*> =3: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (look ahead strategy is used). +*> =4: Only an estimate of Dif[(A,D), (B,E)] is computed. +*> (ZGECON on sub-systems is used). +*> Not referenced if TRANS = 'C'. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrices A and D, and the row dimension of +*> the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices B and E, and the column dimension +*> of the matrices C, F, R and L. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, M) +*> The upper triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1, M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB, N) +*> The upper triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC, N) +*> On entry, C contains the right-hand-side of the first matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, C has been overwritten by +*> the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1, M). +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is COMPLEX*16 array, dimension (LDD, M) +*> The upper triangular matrix D. +*> \endverbatim +*> +*> \param[in] LDD +*> \verbatim +*> LDD is INTEGER +*> The leading dimension of the array D. LDD >= max(1, M). +*> \endverbatim +*> +*> \param[in] E +*> \verbatim +*> E is COMPLEX*16 array, dimension (LDE, N) +*> The upper triangular matrix E. +*> \endverbatim +*> +*> \param[in] LDE +*> \verbatim +*> LDE is INTEGER +*> The leading dimension of the array E. LDE >= max(1, N). +*> \endverbatim +*> +*> \param[in,out] F +*> \verbatim +*> F is COMPLEX*16 array, dimension (LDF, N) +*> On entry, F contains the right-hand-side of the second matrix +*> equation in (1) or (3). +*> On exit, if IJOB = 0, 1 or 2, F has been overwritten by +*> the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, +*> the solution achieved during the computation of the +*> Dif-estimate. +*> \endverbatim +*> +*> \param[in] LDF +*> \verbatim +*> LDF is INTEGER +*> The leading dimension of the array F. LDF >= max(1, M). +*> \endverbatim +*> +*> \param[out] DIF +*> \verbatim +*> DIF is DOUBLE PRECISION +*> On exit DIF is the reciprocal of a lower bound of the +*> reciprocal of the Dif-function, i.e. DIF is an upper bound of +*> Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2). +*> IF IJOB = 0 or TRANS = 'C', DIF is not referenced. +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On exit SCALE is the scaling factor in (1) or (3). +*> If 0 < SCALE < 1, C and F hold the solutions R and L, resp., +*> to a slightly perturbed system but the input matrices A, B, +*> D and E have not been changed. If SCALE = 0, R and L will +*> hold the solutions to the homogenious system with C = F = 0. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK > = 1. +*> If IJOB = 1 or 2 and TRANS = 'N', LWORK >= max(1,2*M*N). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M+N+2) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> =0: successful exit +*> <0: If INFO = -i, the i-th argument had an illegal value. +*> >0: (A, D) and (B, E) have common or very close +*> eigenvalues. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +*> \par Contributors: +* ================== +*> +*> Bo Kagstrom and Peter Poromaa, Department of Computing Science, +*> Umea University, S-901 87 Umea, Sweden. +* +*> \par References: +* ================ +*> +*> [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software +*> for Solving the Generalized Sylvester Equation and Estimating the +*> Separation between Regular Matrix Pairs, Report UMINF - 93.23, +*> Department of Computing Science, Umea University, S-901 87 Umea, +*> Sweden, December 1993, Revised April 1994, Also as LAPACK Working +*> Note 75. To appear in ACM Trans. on Math. Software, Vol 22, +*> No 1, 1996. +*> \n +*> [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester +*> Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. +*> Appl., 15(4):1045-1060, 1994. +*> \n +*> [3] B. Kagstrom and L. Westin, Generalized Schur Methods with +*> Condition Estimators for Solving the Generalized Sylvester +*> Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, +*> July 1989, pp 745-751. +*> +* ===================================================================== + SUBROUTINE ZTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, + $ LWORK, M, N + DOUBLE PRECISION DIF, SCALE +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), + $ D( LDD, * ), E( LDE, * ), F( LDF, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* Replaced various illegal calls to CCOPY by calls to CLASET. +* Sven Hammarling, 1/5/02. +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO + PARAMETER ( CZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, NOTRAN + INTEGER I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K, + $ LINFO, LWMIN, MB, NB, P, PQ, Q + DOUBLE PRECISION DSCALE, DSUM, SCALE2, SCALOC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZGEMM, ZLACPY, ZLASET, ZSCAL, ZTGSY2 +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test input parameters +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( NOTRAN ) THEN + IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN + INFO = -2 + END IF + END IF + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + INFO = -3 + ELSE IF( N.LE.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LDD.LT.MAX( 1, M ) ) THEN + INFO = -12 + ELSE IF( LDE.LT.MAX( 1, N ) ) THEN + INFO = -14 + ELSE IF( LDF.LT.MAX( 1, M ) ) THEN + INFO = -16 + END IF + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NOTRAN ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.2 ) THEN + LWMIN = MAX( 1, 2*M*N ) + ELSE + LWMIN = 1 + END IF + ELSE + LWMIN = 1 + END IF + WORK( 1 ) = LWMIN +* + IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -20 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTGSYL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + SCALE = 1 + IF( NOTRAN ) THEN + IF( IJOB.NE.0 ) THEN + DIF = 0 + END IF + END IF + RETURN + END IF +* +* Determine optimal block sizes MB and NB +* + MB = ILAENV( 2, 'ZTGSYL', TRANS, M, N, -1, -1 ) + NB = ILAENV( 5, 'ZTGSYL', TRANS, M, N, -1, -1 ) +* + ISOLVE = 1 + IFUNC = 0 + IF( NOTRAN ) THEN + IF( IJOB.GE.3 ) THEN + IFUNC = IJOB - 2 + CALL ZLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL ZLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN + ISOLVE = 2 + END IF + END IF +* + IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) ) + $ THEN +* +* Use unblocked Level 2 solver +* + DO 30 IROUND = 1, ISOLVE +* + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + PQ = M*N + CALL ZTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D, + $ LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE, + $ INFO ) + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL ZLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL ZLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL ZLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL ZLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 30 CONTINUE +* + RETURN +* + END IF +* +* Determine block structure of A +* + P = 0 + I = 1 + 40 CONTINUE + IF( I.GT.M ) + $ GO TO 50 + P = P + 1 + IWORK( P ) = I + I = I + MB + IF( I.GE.M ) + $ GO TO 50 + GO TO 40 + 50 CONTINUE + IWORK( P+1 ) = M + 1 + IF( IWORK( P ).EQ.IWORK( P+1 ) ) + $ P = P - 1 +* +* Determine block structure of B +* + Q = P + 1 + J = 1 + 60 CONTINUE + IF( J.GT.N ) + $ GO TO 70 +* + Q = Q + 1 + IWORK( Q ) = J + J = J + NB + IF( J.GE.N ) + $ GO TO 70 + GO TO 60 +* + 70 CONTINUE + IWORK( Q+1 ) = N + 1 + IF( IWORK( Q ).EQ.IWORK( Q+1 ) ) + $ Q = Q - 1 +* + IF( NOTRAN ) THEN + DO 150 IROUND = 1, ISOLVE +* +* Solve (I, J) - subsystem +* A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) +* D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) +* for I = P, P - 1, ..., 1; J = 1, 2, ..., Q +* + PQ = 0 + SCALE = ONE + DSCALE = ZERO + DSUM = ONE + DO 130 J = P + 2, Q + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + DO 120 I = P, 1, -1 + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + PQ = PQ + MB*NB + IF( SCALOC.NE.ONE ) THEN + DO 80 K = 1, JS - 1 + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 80 CONTINUE + DO 90 K = JS, JE + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 90 CONTINUE + DO 100 K = JS, JE + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 100 CONTINUE + DO 110 K = JE + 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 110 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( I.GT.1 ) THEN + CALL ZGEMM( 'N', 'N', IS-1, NB, MB, + $ DCMPLX( -ONE, ZERO ), A( 1, IS ), LDA, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ C( 1, JS ), LDC ) + CALL ZGEMM( 'N', 'N', IS-1, NB, MB, + $ DCMPLX( -ONE, ZERO ), D( 1, IS ), LDD, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ F( 1, JS ), LDF ) + END IF + IF( J.LT.Q ) THEN + CALL ZGEMM( 'N', 'N', MB, N-JE, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ B( JS, JE+1 ), LDB, + $ DCMPLX( ONE, ZERO ), C( IS, JE+1 ), + $ LDC ) + CALL ZGEMM( 'N', 'N', MB, N-JE, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( JS, JE+1 ), LDE, + $ DCMPLX( ONE, ZERO ), F( IS, JE+1 ), + $ LDF ) + END IF + 120 CONTINUE + 130 CONTINUE + IF( DSCALE.NE.ZERO ) THEN + IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN + DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) ) + ELSE + DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) ) + END IF + END IF + IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN + IF( NOTRAN ) THEN + IFUNC = IJOB + END IF + SCALE2 = SCALE + CALL ZLACPY( 'F', M, N, C, LDC, WORK, M ) + CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M ) + CALL ZLASET( 'F', M, N, CZERO, CZERO, C, LDC ) + CALL ZLASET( 'F', M, N, CZERO, CZERO, F, LDF ) + ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN + CALL ZLACPY( 'F', M, N, WORK, M, C, LDC ) + CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF ) + SCALE = SCALE2 + END IF + 150 CONTINUE + ELSE +* +* Solve transposed (I, J)-subsystem +* A(I, I)**H * R(I, J) + D(I, I)**H * L(I, J) = C(I, J) +* R(I, J) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) +* for I = 1,2,..., P; J = Q, Q-1,..., 1 +* + SCALE = ONE + DO 210 I = 1, P + IS = IWORK( I ) + IE = IWORK( I+1 ) - 1 + MB = IE - IS + 1 + DO 200 J = Q, P + 2, -1 + JS = IWORK( J ) + JE = IWORK( J+1 ) - 1 + NB = JE - JS + 1 + CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA, + $ B( JS, JS ), LDB, C( IS, JS ), LDC, + $ D( IS, IS ), LDD, E( JS, JS ), LDE, + $ F( IS, JS ), LDF, SCALOC, DSUM, DSCALE, + $ LINFO ) + IF( LINFO.GT.0 ) + $ INFO = LINFO + IF( SCALOC.NE.ONE ) THEN + DO 160 K = 1, JS - 1 + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 160 CONTINUE + DO 170 K = JS, JE + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ C( 1, K ), 1 ) + CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ), + $ F( 1, K ), 1 ) + 170 CONTINUE + DO 180 K = JS, JE + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ C( IE+1, K ), 1 ) + CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ), + $ F( IE+1, K ), 1 ) + 180 CONTINUE + DO 190 K = JE + 1, N + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ), + $ 1 ) + CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ), + $ 1 ) + 190 CONTINUE + SCALE = SCALE*SCALOC + END IF +* +* Substitute R(I,J) and L(I,J) into remaining equation. +* + IF( J.GT.P+2 ) THEN + CALL ZGEMM( 'N', 'C', MB, JS-1, NB, + $ DCMPLX( ONE, ZERO ), C( IS, JS ), LDC, + $ B( 1, JS ), LDB, DCMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + CALL ZGEMM( 'N', 'C', MB, JS-1, NB, + $ DCMPLX( ONE, ZERO ), F( IS, JS ), LDF, + $ E( 1, JS ), LDE, DCMPLX( ONE, ZERO ), + $ F( IS, 1 ), LDF ) + END IF + IF( I.LT.P ) THEN + CALL ZGEMM( 'C', 'N', M-IE, NB, MB, + $ DCMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA, + $ C( IS, JS ), LDC, DCMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + CALL ZGEMM( 'C', 'N', M-IE, NB, MB, + $ DCMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD, + $ F( IS, JS ), LDF, DCMPLX( ONE, ZERO ), + $ C( IE+1, JS ), LDC ) + END IF + 200 CONTINUE + 210 CONTINUE + END IF +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZTGSYL +* + END diff --git a/dspl/liblapack/SRC/ztpcon.f b/dspl/liblapack/SRC/ztpcon.f new file mode 100644 index 0000000..41f1ae4 --- /dev/null +++ b/dspl/liblapack/SRC/ztpcon.f @@ -0,0 +1,274 @@ +*> \brief \b ZTPCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPCON estimates the reciprocal of the condition number of a packed +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 AP( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTP + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTP +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATPS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = ZLANTP( NORM, UPLO, DIAG, N, AP, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP, + $ WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL ZLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, AP, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTPCON +* + END diff --git a/dspl/liblapack/SRC/ztplqt.f b/dspl/liblapack/SRC/ztplqt.f new file mode 100644 index 0000000..bbf0010 --- /dev/null +++ b/dspl/liblapack/SRC/ztplqt.f @@ -0,0 +1,270 @@ +*> \brief \b ZTPLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DTPLQT computes a blocked LQ factorization of a complex +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B, and the order of the +*> triangular matrix A. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size to be used in the blocked QR. M >= MB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The lower triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MB*M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ] [ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 on left of a M-by-L +*> upper trapezoidal matrix B2: +*> [ B ] = [ B1 ] [ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> M-by-M lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> [ C ] = [ A ] [ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> [ W ] = [ I ] [ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> [ V ] = [ V1 ] [ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(M/MB), where each +*> block is of order MB except for the last block, which is of order +*> IB = M - (M-1)*MB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The MB-by-MB (and IB-by-IB +*> for the last block) T's are stored in the MB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPLQT( M, N, L, MB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, MB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, NB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL ZTPLQT2, ZTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( MB.LT.1 .OR. (MB.GT.M .AND. M.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.MB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPLQT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, M, MB +* +* Compute the QR factorization of the current block +* + IB = MIN( M-I+1, MB ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF +* + CALL ZTPLQT2( IB, NB, LB, A(I,I), LDA, B( I, 1 ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**T to B(I+IB:M,:) from the right +* + IF( I+IB.LE.M ) THEN + CALL ZTPRFB( 'R', 'N', 'F', 'R', M-I-IB+1, NB, IB, LB, + $ B( I, 1 ), LDB, T( 1, I ), LDT, + $ A( I+IB, I ), LDA, B( I+IB, 1 ), LDB, + $ WORK, M-I-IB+1) + END IF + END DO + RETURN +* +* End of ZTPLQT +* + END diff --git a/dspl/liblapack/SRC/ztplqt2.f b/dspl/liblapack/SRC/ztplqt2.f new file mode 100644 index 0000000..9fecfdd --- /dev/null +++ b/dspl/liblapack/SRC/ztplqt2.f @@ -0,0 +1,333 @@ +*> \brief \b ZTPLQT2 computes a LQ factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPLQT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPLQT2 computes a LQ a factorization of a complex "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the lower trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) +*> On entry, the lower triangular M-by-M matrix A. +*> On exit, the elements on and below the diagonal of the array +*> contain the lower triangular matrix L. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first N-L columns +*> are rectangular, and the last L columns are lower trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,M) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a M-by-(M+N) matrix +*> +*> C = [ A ][ B ] +*> +*> +*> where A is an lower triangular M-by-M matrix, and B is M-by-N pentagonal +*> matrix consisting of a M-by-(N-L) rectangular matrix B1 left of a M-by-L +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ][ B2 ] +*> [ B1 ] <- M-by-(N-L) rectangular +*> [ B2 ] <- M-by-L lower trapezoidal. +*> +*> The lower trapezoidal matrix B2 consists of the first L columns of a +*> N-by-N lower triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is lower triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th row +*> above the diagonal (of A) in the M-by-(M+N) input matrix C +*> +*> C = [ A ][ B ] +*> [ A ] <- lower triangular M-by-M +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ][ V ] +*> [ I ] <- identity, M-by-M +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> W = [ V1 ][ V2 ] +*> [ V1 ] <- M-by-(N-L) rectangular +*> [ V2 ] <- M-by-L lower trapezoidal. +*> +*> The rows of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W**T * T * W +*> +*> where W^H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPLQT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER( ZERO = ( 0.0D+0, 0.0D+0 ),ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZGEMV, ZGERC, ZTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPLQT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, M +* +* Generate elementary reflector H(I) to annihilate B(I,:) +* + P = N-L+MIN( L, I ) + CALL ZLARFG( P+1, A( I, I ), B( I, 1 ), LDB, T( 1, I ) ) + T(1,I)=CONJG(T(1,I)) + IF( I.LT.M ) THEN + DO J = 1, P + B( I, J ) = CONJG(B(I,J)) + END DO +* +* W(M-I:1) := C(I+1:M,I:N) * C(I,I:N) [use W = T(M,:)] +* + DO J = 1, M-I + T( M, J ) = (A( I+J, I )) + END DO + CALL ZGEMV( 'N', M-I, P, ONE, B( I+1, 1 ), LDB, + $ B( I, 1 ), LDB, ONE, T( M, 1 ), LDT ) +* +* C(I+1:M,I:N) = C(I+1:M,I:N) + alpha * C(I,I:N)*W(M-1:1)^H +* + ALPHA = -(T( 1, I )) + DO J = 1, M-I + A( I+J, I ) = A( I+J, I ) + ALPHA*(T( M, J )) + END DO + CALL ZGERC( M-I, P, (ALPHA), T( M, 1 ), LDT, + $ B( I, 1 ), LDB, B( I+1, 1 ), LDB ) + DO J = 1, P + B( I, J ) = CONJG(B(I,J)) + END DO + END IF + END DO +* + DO I = 2, M +* +* T(I,1:I-1) := C(I:I-1,1:N)**H * (alpha * C(I,I:N)) +* + ALPHA = -(T( 1, I )) + DO J = 1, I-1 + T( I, J ) = ZERO + END DO + P = MIN( I-1, L ) + NP = MIN( N-L+1, N ) + MP = MIN( P+1, M ) + DO J = 1, N-L+P + B(I,J)=CONJG(B(I,J)) + END DO +* +* Triangular part of B2 +* + DO J = 1, P + T( I, J ) = (ALPHA*B( I, N-L+J )) + END DO + CALL ZTRMV( 'L', 'N', 'N', P, B( 1, NP ), LDB, + $ T( I, 1 ), LDT ) +* +* Rectangular part of B2 +* + CALL ZGEMV( 'N', I-1-P, L, ALPHA, B( MP, NP ), LDB, + $ B( I, NP ), LDB, ZERO, T( I,MP ), LDT ) +* +* B1 + +* + CALL ZGEMV( 'N', I-1, N-L, ALPHA, B, LDB, B( I, 1 ), LDB, + $ ONE, T( I, 1 ), LDT ) +* + +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(I,1:I-1) +* + DO J = 1, I-1 + T(I,J)=CONJG(T(I,J)) + END DO + CALL ZTRMV( 'L', 'C', 'N', I-1, T, LDT, T( I, 1 ), LDT ) + DO J = 1, I-1 + T(I,J)=CONJG(T(I,J)) + END DO + DO J = 1, N-L+P + B(I,J)=CONJG(B(I,J)) + END DO +* +* T(I,I) = tau(I) +* + T( I, I ) = T( 1, I ) + T( 1, I ) = ZERO + END DO + DO I=1,M + DO J= I+1,M + T(I,J)=(T(J,I)) + T(J,I)=ZERO + END DO + END DO + +* +* End of ZTPLQT2 +* + END diff --git a/dspl/liblapack/SRC/ztpmlqt.f b/dspl/liblapack/SRC/ztpmlqt.f new file mode 100644 index 0000000..6a67e44 --- /dev/null +++ b/dspl/liblapack/SRC/ztpmlqt.f @@ -0,0 +1,366 @@ +*> \brief \b ZTPMLQT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPMLQT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. +* COMPLEX*16 V( LDV, * ), A( LDA, * ), B( LDB, * ), +* $ T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPMLQT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] MB +*> \verbatim +*> MB is INTEGER +*> The block size used for the storage of T. K >= MB >= 1. +*> This must be the same value of MB used to generate T +*> in DTPLQT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDA,K) +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> DTPLQT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by DTPLQT, stored as a MB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= MB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. The dimension of WORK is +*> N*MB if SIDE = 'L', or M*MB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup doubleOTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] [V2]. +*> +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is lower trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is lower triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is K-by-M. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is K-by-N. +*> +*> The real orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPMLQT( SIDE, TRANS, M, N, K, L, MB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, MB, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 V( LDV, * ), A( LDA, * ), B( LDB, * ), + $ T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, NB, LB, KF, LDAQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTPRFB +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( MB.LT.1 .OR. (MB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.K ) THEN + INFO = -9 + ELSE IF( LDT.LT.MB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPMLQT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. NOTRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL ZTPRFB( 'L', 'C', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + DO I = 1, K, MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL ZTPRFB( 'R', 'N', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. TRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = 0 + END IF + CALL ZTPRFB( 'L', 'N', 'F', 'R', NB, N, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + KF = ((K-1)/MB)*MB+1 + DO I = KF, 1, -MB + IB = MIN( MB, K-I+1 ) + NB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = NB-N+L-I+1 + END IF + CALL ZTPRFB( 'R', 'C', 'F', 'R', M, NB, IB, LB, + $ V( I, 1 ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of ZTPMLQT +* + END diff --git a/dspl/liblapack/SRC/ztpmqrt.f b/dspl/liblapack/SRC/ztpmqrt.f new file mode 100644 index 0000000..aca7ff0 --- /dev/null +++ b/dspl/liblapack/SRC/ztpmqrt.f @@ -0,0 +1,368 @@ +*> \brief \b ZTPMQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPMQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, +* A, LDA, B, LDB, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. +* COMPLEX*16 V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPMQRT applies a complex orthogonal matrix Q obtained from a +*> "triangular-pentagonal" complex block reflector H to a general +*> complex matrix C, which consists of two blocks A and B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size used for the storage of T. K >= NB >= 1. +*> This must be the same value of NB used to generate T +*> in CTPQRT. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> CTPQRT in B. See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If SIDE = 'L', LDV >= max(1,M); +*> if SIDE = 'R', LDV >= max(1,N). +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The upper triangular factors of the block reflectors +*> as returned by CTPQRT, stored as a NB-by-K matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,N) if SIDE = 'L' or +*> (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array. The dimension of WORK is +*> N*NB if SIDE = 'L', or M*NB if SIDE = 'R'. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The columns of the pentagonal matrix V contain the elementary reflectors +*> H(1), H(2), ..., H(K); V is composed of a rectangular block V1 and a +*> trapezoidal block V2: +*> +*> V = [V1] +*> [V2]. +*> +*> The size of the trapezoidal block V2 is determined by the parameter L, +*> where 0 <= L <= K; V2 is upper trapezoidal, consisting of the first L +*> rows of a K-by-K upper triangular matrix. If L=K, V2 is upper triangular; +*> if L=0, there is no trapezoidal block, hence V = V1 is rectangular. +*> +*> If SIDE = 'L': C = [A] where A is K-by-N, B is M-by-N and V is M-by-K. +*> [B] +*> +*> If SIDE = 'R': C = [A B] where A is M-by-K, B is M-by-N and V is N-by-K. +*> +*> The complex orthogonal matrix Q is formed from V and T. +*> +*> If TRANS='N' and SIDE='L', C is on exit replaced with Q * C. +*> +*> If TRANS='C' and SIDE='L', C is on exit replaced with Q**H * C. +*> +*> If TRANS='N' and SIDE='R', C is on exit replaced with C * Q. +*> +*> If TRANS='C' and SIDE='R', C is on exit replaced with C * Q**H. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPMQRT( SIDE, TRANS, M, N, K, L, NB, V, LDV, T, LDT, + $ A, LDA, B, LDB, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDV, LDA, LDB, M, N, L, NB, LDT +* .. +* .. Array Arguments .. + COMPLEX*16 V( LDV, * ), A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + LOGICAL LEFT, RIGHT, TRAN, NOTRAN + INTEGER I, IB, MB, LB, KF, LDAQ, LDVQ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZTPRFB, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* .. Test the input arguments .. +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + RIGHT = LSAME( SIDE, 'R' ) + TRAN = LSAME( TRANS, 'C' ) + NOTRAN = LSAME( TRANS, 'N' ) +* + IF ( LEFT ) THEN + LDVQ = MAX( 1, M ) + LDAQ = MAX( 1, K ) + ELSE IF ( RIGHT ) THEN + LDVQ = MAX( 1, N ) + LDAQ = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN + INFO = -1 + ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. L.GT.K ) THEN + INFO = -6 + ELSE IF( NB.LT.1 .OR. (NB.GT.K .AND. K.GT.0) ) THEN + INFO = -7 + ELSE IF( LDV.LT.LDVQ ) THEN + INFO = -9 + ELSE IF( LDT.LT.NB ) THEN + INFO = -11 + ELSE IF( LDA.LT.LDAQ ) THEN + INFO = -13 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -15 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPMQRT', -INFO ) + RETURN + END IF +* +* .. Quick return if possible .. +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) RETURN +* + IF( LEFT .AND. TRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL ZTPRFB( 'L', 'C', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. NOTRAN ) THEN +* + DO I = 1, K, NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL ZTPRFB( 'R', 'N', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + ELSE IF( LEFT .AND. NOTRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF + CALL ZTPRFB( 'L', 'N', 'F', 'C', MB, N, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( I, 1 ), LDA, B, LDB, WORK, IB ) + END DO +* + ELSE IF( RIGHT .AND. TRAN ) THEN +* + KF = ((K-1)/NB)*NB+1 + DO I = KF, 1, -NB + IB = MIN( NB, K-I+1 ) + MB = MIN( N-L+I+IB-1, N ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-N+L-I+1 + END IF + CALL ZTPRFB( 'R', 'C', 'F', 'C', M, MB, IB, LB, + $ V( 1, I ), LDV, T( 1, I ), LDT, + $ A( 1, I ), LDA, B, LDB, WORK, M ) + END DO +* + END IF +* + RETURN +* +* End of ZTPMQRT +* + END diff --git a/dspl/liblapack/SRC/ztpqrt.f b/dspl/liblapack/SRC/ztpqrt.f new file mode 100644 index 0000000..c6f186b --- /dev/null +++ b/dspl/liblapack/SRC/ztpqrt.f @@ -0,0 +1,270 @@ +*> \brief \b ZTPQRT +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPQRT + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPQRT computes a blocked QR factorization of a complex +*> "triangular-pentagonal" matrix C, which is composed of a +*> triangular block A and pentagonal block B, using the compact +*> WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of the +*> triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The block size to be used in the blocked QR. N >= NB >= 1. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The upper triangular block reflectors stored in compact form +*> as a sequence of upper triangular blocks. See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= NB. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (NB*N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> +*> The number of blocks is B = ceiling(N/NB), where each +*> block is of order NB except for the last block, which is of order +*> IB = N - (B-1)*NB. For each of the B blocks, a upper triangular block +*> reflector factor is computed: T1, T2, ..., TB. The NB-by-NB (and IB-by-IB +*> for the last block) T's are stored in the NB-by-N matrix T as +*> +*> T = [T1 T2 ... TB]. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPQRT( M, N, L, NB, A, LDA, B, LDB, T, LDT, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L, NB +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. +* .. Local Scalars .. + INTEGER I, IB, LB, MB, IINFO +* .. +* .. External Subroutines .. + EXTERNAL ZTPQRT2, ZTPRFB, XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. (L.GT.MIN(M,N) .AND. MIN(M,N).GE.0)) THEN + INFO = -3 + ELSE IF( NB.LT.1 .OR. (NB.GT.N .AND. N.GT.0)) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -8 + ELSE IF( LDT.LT.NB ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPQRT', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) RETURN +* + DO I = 1, N, NB +* +* Compute the QR factorization of the current block +* + IB = MIN( N-I+1, NB ) + MB = MIN( M-L+I+IB-1, M ) + IF( I.GE.L ) THEN + LB = 0 + ELSE + LB = MB-M+L-I+1 + END IF +* + CALL ZTPQRT2( MB, IB, LB, A(I,I), LDA, B( 1, I ), LDB, + $ T(1, I ), LDT, IINFO ) +* +* Update by applying H**H to B(:,I+IB:N) from the left +* + IF( I+IB.LE.N ) THEN + CALL ZTPRFB( 'L', 'C', 'F', 'C', MB, N-I-IB+1, IB, LB, + $ B( 1, I ), LDB, T( 1, I ), LDT, + $ A( I, I+IB ), LDA, B( 1, I+IB ), LDB, + $ WORK, IB ) + END IF + END DO + RETURN +* +* End of ZTPQRT +* + END diff --git a/dspl/liblapack/SRC/ztpqrt2.f b/dspl/liblapack/SRC/ztpqrt2.f new file mode 100644 index 0000000..2d9300b --- /dev/null +++ b/dspl/liblapack/SRC/ztpqrt2.f @@ -0,0 +1,302 @@ +*> \brief \b ZTPQRT2 computes a QR factorization of a real or complex "triangular-pentagonal" matrix, which is composed of a triangular block and a pentagonal block, using the compact WY representation for Q. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPQRT2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPQRT2 computes a QR factorization of a complex "triangular-pentagonal" +*> matrix C, which is composed of a triangular block A and pentagonal block B, +*> using the compact WY representation for Q. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The total number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B, and the order of +*> the triangular matrix A. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of rows of the upper trapezoidal part of B. +*> MIN(M,N) >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the upper triangular N-by-N matrix A. +*> On exit, the elements on and above the diagonal of the array +*> contain the upper triangular matrix R. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the pentagonal M-by-N matrix B. The first M-L rows +*> are rectangular, and the last L rows are upper trapezoidal. +*> On exit, B contains the pentagonal matrix V. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The N-by-N upper triangular factor T of the block reflector. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The input matrix C is a (N+M)-by-N matrix +*> +*> C = [ A ] +*> [ B ] +*> +*> where A is an upper triangular N-by-N matrix, and B is M-by-N pentagonal +*> matrix consisting of a (M-L)-by-N rectangular matrix B1 on top of a L-by-N +*> upper trapezoidal matrix B2: +*> +*> B = [ B1 ] <- (M-L)-by-N rectangular +*> [ B2 ] <- L-by-N upper trapezoidal. +*> +*> The upper trapezoidal matrix B2 consists of the first L rows of a +*> N-by-N upper triangular matrix, where 0 <= L <= MIN(M,N). If L=0, +*> B is rectangular M-by-N; if M=L=N, B is upper triangular. +*> +*> The matrix W stores the elementary reflectors H(i) in the i-th column +*> below the diagonal (of A) in the (N+M)-by-N input matrix C +*> +*> C = [ A ] <- upper triangular N-by-N +*> [ B ] <- M-by-N pentagonal +*> +*> so that W can be represented as +*> +*> W = [ I ] <- identity, N-by-N +*> [ V ] <- M-by-N, same form as B. +*> +*> Thus, all of information needed for W is contained on exit in B, which +*> we call V above. Note that V has the same form as B; that is, +*> +*> V = [ V1 ] <- (M-L)-by-N rectangular +*> [ V2 ] <- L-by-N upper trapezoidal. +*> +*> The columns of V represent the vectors which define the H(i)'s. +*> The (M+N)-by-(M+N) block reflector H is then given by +*> +*> H = I - W * T * W**H +*> +*> where W**H is the conjugate transpose of W and T is the upper triangular +*> factor of the block reflector. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPQRT2( M, N, L, A, LDA, B, LDB, T, LDT, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, LDT, N, M, L +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER( ONE = (1.0,0.0), ZERO = (0.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, J, P, MP, NP + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZLARFG, ZGEMV, ZGERC, ZTRMV, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( L.LT.0 .OR. L.GT.MIN(M,N) ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPQRT2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) RETURN +* + DO I = 1, N +* +* Generate elementary reflector H(I) to annihilate B(:,I) +* + P = M-L+MIN( L, I ) + CALL ZLARFG( P+1, A( I, I ), B( 1, I ), 1, T( I, 1 ) ) + IF( I.LT.N ) THEN +* +* W(1:N-I) := C(I:M,I+1:N)**H * C(I:M,I) [use W = T(:,N)] +* + DO J = 1, N-I + T( J, N ) = CONJG(A( I, I+J )) + END DO + CALL ZGEMV( 'C', P, N-I, ONE, B( 1, I+1 ), LDB, + $ B( 1, I ), 1, ONE, T( 1, N ), 1 ) +* +* C(I:M,I+1:N) = C(I:m,I+1:N) + alpha*C(I:M,I)*W(1:N-1)**H +* + ALPHA = -CONJG(T( I, 1 )) + DO J = 1, N-I + A( I, I+J ) = A( I, I+J ) + ALPHA*CONJG(T( J, N )) + END DO + CALL ZGERC( P, N-I, ALPHA, B( 1, I ), 1, + $ T( 1, N ), 1, B( 1, I+1 ), LDB ) + END IF + END DO +* + DO I = 2, N +* +* T(1:I-1,I) := C(I:M,1:I-1)**H * (alpha * C(I:M,I)) +* + ALPHA = -T( I, 1 ) + + DO J = 1, I-1 + T( J, I ) = ZERO + END DO + P = MIN( I-1, L ) + MP = MIN( M-L+1, M ) + NP = MIN( P+1, N ) +* +* Triangular part of B2 +* + DO J = 1, P + T( J, I ) = ALPHA*B( M-L+J, I ) + END DO + CALL ZTRMV( 'U', 'C', 'N', P, B( MP, 1 ), LDB, + $ T( 1, I ), 1 ) +* +* Rectangular part of B2 +* + CALL ZGEMV( 'C', L, I-1-P, ALPHA, B( MP, NP ), LDB, + $ B( MP, I ), 1, ZERO, T( NP, I ), 1 ) +* +* B1 +* + CALL ZGEMV( 'C', M-L, I-1, ALPHA, B, LDB, B( 1, I ), 1, + $ ONE, T( 1, I ), 1 ) +* +* T(1:I-1,I) := T(1:I-1,1:I-1) * T(1:I-1,I) +* + CALL ZTRMV( 'U', 'N', 'N', I-1, T, LDT, T( 1, I ), 1 ) +* +* T(I,I) = tau(I) +* + T( I, I ) = T( I, 1 ) + T( I, 1 ) = ZERO + END DO + +* +* End of ZTPQRT2 +* + END diff --git a/dspl/liblapack/SRC/ztprfb.f b/dspl/liblapack/SRC/ztprfb.f new file mode 100644 index 0000000..1a62829 --- /dev/null +++ b/dspl/liblapack/SRC/ztprfb.f @@ -0,0 +1,814 @@ +*> \brief \b ZTPRFB applies a real or complex "triangular-pentagonal" blocked reflector to a real or complex matrix, which is composed of two blocks. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPRFB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, +* V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), +* $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPRFB applies a complex "triangular-pentagonal" block reflector H or its +*> conjugate transpose H**H to a complex matrix C, which is composed of two +*> blocks A and B, either from the left or right. +*> +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columns +*> = 'R': Rows +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix B. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix B. +*> N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T, i.e. the number of elementary +*> reflectors whose product defines the block reflector. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The order of the trapezoidal part of V. +*> K >= L >= 0. See Further Details. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> The pentagonal matrix V, which contains the elementary reflectors +*> H(1), H(2), ..., H(K). See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. +*> LDT >= K. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,N) if SIDE = 'L' or (LDA,K) if SIDE = 'R' +*> On entry, the K-by-N or M-by-K matrix A. +*> On exit, A is overwritten by the corresponding block of +*> H*C or H**H*C or C*H or C*H**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDC >= max(1,K); +*> If SIDE = 'R', LDC >= max(1,M). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> On entry, the M-by-N matrix B. +*> On exit, B is overwritten by the corresponding block of +*> H*C or H**H*C or C*H or C*H**H. See Further Details. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. +*> LDB >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (LDWORK,N) if SIDE = 'L', +*> (LDWORK,K) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= K; +*> if SIDE = 'R', LDWORK >= M. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The matrix C is a composite matrix formed from blocks A and B. +*> The block B is of size M-by-N; if SIDE = 'R', A is of size M-by-K, +*> and if SIDE = 'L', A is of size K-by-N. +*> +*> If SIDE = 'R' and DIRECT = 'F', C = [A B]. +*> +*> If SIDE = 'L' and DIRECT = 'F', C = [A] +*> [B]. +*> +*> If SIDE = 'R' and DIRECT = 'B', C = [B A]. +*> +*> If SIDE = 'L' and DIRECT = 'B', C = [B] +*> [A]. +*> +*> The pentagonal matrix V is composed of a rectangular block V1 and a +*> trapezoidal block V2. The size of the trapezoidal block is determined by +*> the parameter L, where 0<=L<=K. If L=K, the V2 block of V is triangular; +*> if L=0, there is no trapezoidal block, thus V = V1 is rectangular. +*> +*> If DIRECT = 'F' and STOREV = 'C': V = [V1] +*> [V2] +*> - V2 is upper trapezoidal (first L rows of K-by-K upper triangular) +*> +*> If DIRECT = 'F' and STOREV = 'R': V = [V1 V2] +*> +*> - V2 is lower trapezoidal (first L columns of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'C': V = [V2] +*> [V1] +*> - V2 is lower trapezoidal (last L rows of K-by-K lower triangular) +*> +*> If DIRECT = 'B' and STOREV = 'R': V = [V2 V1] +*> +*> - V2 is upper trapezoidal (last L columns of K-by-K upper triangular) +*> +*> If STOREV = 'C' and SIDE = 'L', V is M-by-K with V2 L-by-K. +*> +*> If STOREV = 'C' and SIDE = 'R', V is N-by-K with V2 L-by-K. +*> +*> If STOREV = 'R' and SIDE = 'L', V is K-by-M with V2 K-by-L. +*> +*> If STOREV = 'R' and SIDE = 'R', V is K-by-N with V2 K-by-L. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPRFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, + $ V, LDV, T, LDT, A, LDA, B, LDB, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, L, LDA, LDB, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), T( LDT, * ), + $ V( LDV, * ), WORK( LDWORK, * ) +* .. +* +* ========================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = (1.0,0.0), ZERO = (0.0,0.0) ) +* .. +* .. Local Scalars .. + INTEGER I, J, MP, NP, KP + LOGICAL LEFT, FORWARD, COLUMN, RIGHT, BACKWARD, ROW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC CONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 .OR. K.LE.0 .OR. L.LT.0 ) RETURN +* + IF( LSAME( STOREV, 'C' ) ) THEN + COLUMN = .TRUE. + ROW = .FALSE. + ELSE IF ( LSAME( STOREV, 'R' ) ) THEN + COLUMN = .FALSE. + ROW = .TRUE. + ELSE + COLUMN = .FALSE. + ROW = .FALSE. + END IF +* + IF( LSAME( SIDE, 'L' ) ) THEN + LEFT = .TRUE. + RIGHT = .FALSE. + ELSE IF( LSAME( SIDE, 'R' ) ) THEN + LEFT = .FALSE. + RIGHT = .TRUE. + ELSE + LEFT = .FALSE. + RIGHT = .FALSE. + END IF +* + IF( LSAME( DIRECT, 'F' ) ) THEN + FORWARD = .TRUE. + BACKWARD = .FALSE. + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + FORWARD = .FALSE. + BACKWARD = .TRUE. + ELSE + FORWARD = .FALSE. + BACKWARD = .FALSE. + END IF +* +* --------------------------------------------------------------------------- +* + IF( COLUMN .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (M-by-K) +* +* Form H C or H**H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - T (A + V**H B) or A = A - T**H (A + V**H B) +* B = B - V T (A + V**H B) or B = B - V T**H (A + V**H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL ZTRMM( 'L', 'U', 'C', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + CALL ZGEMM( 'C', 'N', L, N, M-L, ONE, V, LDV, B, LDB, + $ ONE, WORK, LDWORK ) + CALL ZGEMM( 'C', 'N', K-L, N, M, ONE, V( 1, KP ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL ZGEMM( 'N', 'N', L, N, K-L, -ONE, V( MP, KP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL ZTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I ] (K-by-K) +* [ V ] (N-by-K) +* +* Form C H or C H**H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - (A + B V) T or A = A - (A + B V) T**H +* B = B - (A + B V) T V**H or B = B - (A + B V) T**H V**H +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL ZTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + CALL ZGEMM( 'N', 'N', M, L, N-L, ONE, B, LDB, + $ V, LDV, ONE, WORK, LDWORK ) + CALL ZGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V( 1, KP ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'C', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL ZGEMM( 'N', 'C', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( NP, KP ), LDV, ONE, B( 1, NP ), LDB ) + CALL ZTRMM( 'R', 'U', 'C', 'N', M, L, ONE, V( NP, 1 ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (M-by-K) +* [ I ] (K-by-K) +* +* Form H C or H**H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - T (A + V**H B) or A = A - T**H (A + V**H B) +* B = B - V T (A + V**H B) or B = B - V T**H (A + V**H B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO +* + CALL ZTRMM( 'L', 'L', 'C', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL ZGEMM( 'C', 'N', L, N, M-L, ONE, V( MP, KP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL ZGEMM( 'C', 'N', K-L, N, M, ONE, V, LDV, + $ B, LDB, ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'L', 'L', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'N', M-L, N, K, -ONE, V( MP, 1 ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL ZGEMM( 'N', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL ZTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, KP ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( COLUMN .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V ] (N-by-K) +* [ I ] (K-by-K) +* +* Form C H or C H**H where C = [ B A ] (B is M-by-N, A is M-by-K) +* +* H = I - W T W**H or H**H = I - W T**H W**H +* +* A = A - (A + B V) T or A = A - (A + B V) T**H +* B = B - (A + B V) T V**H or B = B - (A + B V) T**H V**H +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL ZTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL ZGEMM( 'N', 'N', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( NP, KP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL ZGEMM( 'N', 'N', M, K-L, N, ONE, B, LDB, + $ V, LDV, ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'C', M, N-L, K, -ONE, WORK, LDWORK, + $ V( NP, 1 ), LDV, ONE, B( 1, NP ), LDB ) + CALL ZGEMM( 'N', 'C', M, L, K-L, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL ZTRMM( 'R', 'L', 'C', 'N', M, L, ONE, V( 1, KP ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**H C where C = [ A ] (K-by-N) +* [ B ] (M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - T (A + V B) or A = A - T**H (A + V B) +* B = B - V**H T (A + V B) or B = B - V**H T**H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( M-L+1, M ) + KP = MIN( L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( I, J ) = B( M-L+I, J ) + END DO + END DO + CALL ZTRMM( 'L', 'L', 'N', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDB ) + CALL ZGEMM( 'N', 'N', L, N, M-L, ONE, V, LDV,B, LDB, + $ ONE, WORK, LDWORK ) + CALL ZGEMM( 'N', 'N', K-L, N, M, ONE, V( KP, 1 ), LDV, + $ B, LDB, ZERO, WORK( KP, 1 ), LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'L', 'U', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'C', 'N', M-L, N, K, -ONE, V, LDV, WORK, LDWORK, + $ ONE, B, LDB ) + CALL ZGEMM( 'C', 'N', L, N, K-L, -ONE, V( KP, MP ), LDV, + $ WORK( KP, 1 ), LDWORK, ONE, B( MP, 1 ), LDB ) + CALL ZTRMM( 'L', 'L', 'C', 'N', L, N, ONE, V( 1, MP ), LDV, + $ WORK, LDWORK ) + DO J = 1, N + DO I = 1, L + B( M-L+I, J ) = B( M-L+I, J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. FORWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ I V ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**H where C = [ A B ] (A is M-by-K, B is M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - (A + B V**H) T or A = A - (A + B V**H) T**H +* B = B - (A + B V**H) T V or B = B - (A + B V**H) T**H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( N-L+1, N ) + KP = MIN( L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, J ) = B( I, N-L+J ) + END DO + END DO + CALL ZTRMM( 'R', 'L', 'C', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + CALL ZGEMM( 'N', 'C', M, L, N-L, ONE, B, LDB, V, LDV, + $ ONE, WORK, LDWORK ) + CALL ZGEMM( 'N', 'C', M, K-L, N, ONE, B, LDB, + $ V( KP, 1 ), LDV, ZERO, WORK( 1, KP ), LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'R', 'U', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL ZGEMM( 'N', 'N', M, L, K-L, -ONE, WORK( 1, KP ), LDWORK, + $ V( KP, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL ZTRMM( 'R', 'L', 'N', 'N', M, L, ONE, V( 1, NP ), LDV, + $ WORK, LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, N-L+J ) = B( I, N-L+J ) - WORK( I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. LEFT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-M ) +* +* Form H C or H**H C where C = [ B ] (M-by-N) +* [ A ] (K-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - T (A + V B) or A = A - T**H (A + V B) +* B = B - V**H T (A + V B) or B = B - V**H T**H (A + V B) +* +* --------------------------------------------------------------------------- +* + MP = MIN( L+1, M ) + KP = MIN( K-L+1, K ) +* + DO J = 1, N + DO I = 1, L + WORK( K-L+I, J ) = B( I, J ) + END DO + END DO + CALL ZTRMM( 'L', 'U', 'N', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + CALL ZGEMM( 'N', 'N', L, N, M-L, ONE, V( KP, MP ), LDV, + $ B( MP, 1 ), LDB, ONE, WORK( KP, 1 ), LDWORK ) + CALL ZGEMM( 'N', 'N', K-L, N, M, ONE, V, LDV, B, LDB, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'L', 'L ', TRANS, 'N', K, N, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, N + DO I = 1, K + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'C', 'N', M-L, N, K, -ONE, V( 1, MP ), LDV, + $ WORK, LDWORK, ONE, B( MP, 1 ), LDB ) + CALL ZGEMM( 'C', 'N', L, N, K-L, -ONE, V, LDV, + $ WORK, LDWORK, ONE, B, LDB ) + CALL ZTRMM( 'L', 'U', 'C', 'N', L, N, ONE, V( KP, 1 ), LDV, + $ WORK( KP, 1 ), LDWORK ) + DO J = 1, N + DO I = 1, L + B( I, J ) = B( I, J ) - WORK( K-L+I, J ) + END DO + END DO +* +* --------------------------------------------------------------------------- +* + ELSE IF( ROW .AND. BACKWARD .AND. RIGHT ) THEN +* +* --------------------------------------------------------------------------- +* +* Let W = [ V I ] ( I is K-by-K, V is K-by-N ) +* +* Form C H or C H**H where C = [ B A ] (A is M-by-K, B is M-by-N) +* +* H = I - W**H T W or H**H = I - W**H T**H W +* +* A = A - (A + B V**H) T or A = A - (A + B V**H) T**H +* B = B - (A + B V**H) T V or B = B - (A + B V**H) T**H V +* +* --------------------------------------------------------------------------- +* + NP = MIN( L+1, N ) + KP = MIN( K-L+1, K ) +* + DO J = 1, L + DO I = 1, M + WORK( I, K-L+J ) = B( I, J ) + END DO + END DO + CALL ZTRMM( 'R', 'U', 'C', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + CALL ZGEMM( 'N', 'C', M, L, N-L, ONE, B( 1, NP ), LDB, + $ V( KP, NP ), LDV, ONE, WORK( 1, KP ), LDWORK ) + CALL ZGEMM( 'N', 'C', M, K-L, N, ONE, B, LDB, V, LDV, + $ ZERO, WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + WORK( I, J ) = WORK( I, J ) + A( I, J ) + END DO + END DO +* + CALL ZTRMM( 'R', 'L', TRANS, 'N', M, K, ONE, T, LDT, + $ WORK, LDWORK ) +* + DO J = 1, K + DO I = 1, M + A( I, J ) = A( I, J ) - WORK( I, J ) + END DO + END DO +* + CALL ZGEMM( 'N', 'N', M, N-L, K, -ONE, WORK, LDWORK, + $ V( 1, NP ), LDV, ONE, B( 1, NP ), LDB ) + CALL ZGEMM( 'N', 'N', M, L, K-L , -ONE, WORK, LDWORK, + $ V, LDV, ONE, B, LDB ) + CALL ZTRMM( 'R', 'U', 'N', 'N', M, L, ONE, V( KP, 1 ), LDV, + $ WORK( 1, KP ), LDWORK ) + DO J = 1, L + DO I = 1, M + B( I, J ) = B( I, J ) - WORK( I, K-L+J ) + END DO + END DO +* + END IF +* + RETURN +* +* End of ZTPRFB +* + END diff --git a/dspl/liblapack/SRC/ztprfs.f b/dspl/liblapack/SRC/ztprfs.f new file mode 100644 index 0000000..d6a29d4 --- /dev/null +++ b/dspl/liblapack/SRC/ztprfs.f @@ -0,0 +1,484 @@ +*> \brief \b ZTPRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, +* FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular packed +*> coefficient matrix. +*> +*> The solution matrix X must be computed by ZTPTRS or some other +*> means before entering this routine. ZTPRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> If DIAG = 'U', the diagonal elements of A are not referenced +*> and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX, + $ FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 AP( * ), B( LDB, * ), WORK( * ), X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, KC, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACN2, ZTPMV, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 30 CONTINUE + KC = KC + K + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-1 ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + K + 60 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 70 CONTINUE + KC = KC + N - K + 1 + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + + $ CABS1( AP( KC+I-K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + KC = KC + N - K + 1 + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + KC = 1 + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + K + 140 CONTINUE + END IF + ELSE + KC = 1 + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + KC = KC + N - K + 1 + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTPRFS +* + END diff --git a/dspl/liblapack/SRC/ztptri.f b/dspl/liblapack/SRC/ztptri.f new file mode 100644 index 0000000..3538819 --- /dev/null +++ b/dspl/liblapack/SRC/ztptri.f @@ -0,0 +1,242 @@ +*> \brief \b ZTPTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPTRI computes the inverse of a complex upper or lower triangular +*> matrix A stored in packed format. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> On entry, the upper or lower triangular matrix A, stored +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n. +*> See below for further details. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same packed storage format. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> A triangular matrix A can be transferred to packed storage using one +*> of the following program segments: +*> +*> UPLO = 'U': UPLO = 'L': +*> +*> JC = 1 JC = 1 +*> DO 2 J = 1, N DO 2 J = 1, N +*> DO 1 I = 1, J DO 1 I = J, N +*> AP(JC+I-1) = A(I,J) AP(JC+I-J) = A(I,J) +*> 1 CONTINUE 1 CONTINUE +*> JC = JC + J JC = JC + N - J + 1 +*> 2 CONTINUE 2 CONTINUE +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC, JCLAST, JJ + COMPLEX*16 AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZTPMV +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTRI', -INFO ) + RETURN + END IF +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JJ = 0 + DO 10 INFO = 1, N + JJ = JJ + INFO + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + ELSE + JJ = 1 + DO 20 INFO = 1, N + IF( AP( JJ ).EQ.ZERO ) + $ RETURN + JJ = JJ + N - INFO + 1 + 20 CONTINUE + END IF + INFO = 0 + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + JC = 1 + DO 30 J = 1, N + IF( NOUNIT ) THEN + AP( JC+J-1 ) = ONE / AP( JC+J-1 ) + AJJ = -AP( JC+J-1 ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL ZTPMV( 'Upper', 'No transpose', DIAG, J-1, AP, + $ AP( JC ), 1 ) + CALL ZSCAL( J-1, AJJ, AP( JC ), 1 ) + JC = JC + J + 30 CONTINUE +* + ELSE +* +* Compute inverse of lower triangular matrix. +* + JC = N*( N+1 ) / 2 + DO 40 J = N, 1, -1 + IF( NOUNIT ) THEN + AP( JC ) = ONE / AP( JC ) + AJJ = -AP( JC ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL ZTPMV( 'Lower', 'No transpose', DIAG, N-J, + $ AP( JCLAST ), AP( JC+1 ), 1 ) + CALL ZSCAL( N-J, AJJ, AP( JC+1 ), 1 ) + END IF + JCLAST = JC + JC = JC - N + J - 2 + 40 CONTINUE + END IF +* + RETURN +* +* End of ZTPTRI +* + END diff --git a/dspl/liblapack/SRC/ztptrs.f b/dspl/liblapack/SRC/ztptrs.f new file mode 100644 index 0000000..c1a852c --- /dev/null +++ b/dspl/liblapack/SRC/ztptrs.f @@ -0,0 +1,228 @@ +*> \brief \b ZTPTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular matrix of order N stored in packed format, +*> and B is an N-by-NRHS matrix. A check is made to verify that A is +*> nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The upper or lower triangular matrix A, packed columnwise in +*> a linear array. The j-th column of A is stored in the array +*> AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the +*> solutions X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JC +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTPSV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + IF( UPPER ) THEN + JC = 1 + DO 10 INFO = 1, N + IF( AP( JC+INFO-1 ).EQ.ZERO ) + $ RETURN + JC = JC + INFO + 10 CONTINUE + ELSE + JC = 1 + DO 20 INFO = 1, N + IF( AP( JC ).EQ.ZERO ) + $ RETURN + JC = JC + N - INFO + 1 + 20 CONTINUE + END IF + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + DO 30 J = 1, NRHS + CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 ) + 30 CONTINUE +* + RETURN +* +* End of ZTPTRS +* + END diff --git a/dspl/liblapack/SRC/ztpttf.f b/dspl/liblapack/SRC/ztpttf.f new file mode 100644 index 0000000..9dfc795 --- /dev/null +++ b/dspl/liblapack/SRC/ztpttf.f @@ -0,0 +1,539 @@ +*> \brief \b ZTPTTF copies a triangular matrix from the standard packed format (TP) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( 0: * ), ARF( 0: * ) +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPTTF copies a triangular matrix A from standard packed format (TP) +*> to rectangular full packed format (TF). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal format is wanted; +*> = 'C': ARF in Conjugate-transpose format is wanted. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTPTTF( TRANSR, UPLO, N, AP, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( 0: * ), ARF( 0: * ) +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER N1, N2, K, NT + INTEGER I, J, IJ + INTEGER IJP, JP, LDA, JS +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = AP( 0 ) + ELSE + ARF( 0 ) = DCONJG( AP( 0 ) ) + END IF + RETURN + END IF +* +* Size of array ARF(0:NT-1) +* + NT = N*( N+1 ) / 2 +* +* Set N1 and N2 depending on LOWER +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE. +* If N is even, set K = N/2 and NISODD = .FALSE. +* +* set lda of ARF^C; ARF^C is (0:(N+1)/2-1,0:N-noe) +* where noe = 0 if n is even, noe = 1 if n is odd +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + LDA = N + 1 + ELSE + NISODD = .TRUE. + LDA = N + END IF +* +* ARF^C has lda rows and n+1-noe cols +* + IF( .NOT.NORMALTRANSR ) + $ LDA = ( N+1 ) / 2 +* +* start execution: there are eight cases +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda = n +* + IJP = 0 + JP = 0 + DO J = 0, N2 + DO I = J, N - 1 + IJ = I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, N2 - 1 + DO J = 1 + I, N2 + IJ = I + J*LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0) +* + IJP = 0 + DO J = 0, N1 - 1 + IJ = N2 + J + DO I = 0, J + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = N1, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 +* + IJP = 0 + DO I = 0, N2 + DO IJ = I*( LDA+1 ), N*LDA - 1, LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO + JS = 1 + DO J = 0, N2 - 1 + DO IJ = JS, JS + N2 - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 +* + IJP = 0 + JS = N2*LDA + DO J = 0, N1 - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, N1 + DO IJ = I, I + ( N1+I )*LDA, LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1) +* + IJP = 0 + JP = 0 + DO J = 0, K - 1 + DO I = J, N - 1 + IJ = 1 + I + JP + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JP = JP + LDA + END DO + DO I = 0, K - 1 + DO J = I, K - 1 + IJ = I + J*LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0) +* + IJP = 0 + DO J = 0, K - 1 + IJ = K + 1 + J + DO I = 0, J + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + IJ = IJ + LDA + END DO + END DO + JS = 0 + DO J = K, N - 1 + IJ = JS + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) +* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k +* + IJP = 0 + DO I = 0, K - 1 + DO IJ = I + ( I+1 )*LDA, ( N+1 )*LDA - 1, LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO + JS = 0 + DO J = 0, K - 1 + DO IJ = JS, JS + K - J - 1 + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + 1 + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper) +* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0) +* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k +* + IJP = 0 + JS = ( K+1 )*LDA + DO J = 0, K - 1 + DO IJ = JS, JS + J + ARF( IJ ) = AP( IJP ) + IJP = IJP + 1 + END DO + JS = JS + LDA + END DO + DO I = 0, K - 1 + DO IJ = I, I + ( K+I )*LDA, LDA + ARF( IJ ) = DCONJG( AP( IJP ) ) + IJP = IJP + 1 + END DO + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZTPTTF +* + END diff --git a/dspl/liblapack/SRC/ztpttr.f b/dspl/liblapack/SRC/ztpttr.f new file mode 100644 index 0000000..17ebc0a --- /dev/null +++ b/dspl/liblapack/SRC/ztpttr.f @@ -0,0 +1,176 @@ +*> \brief \b ZTPTTR copies a triangular matrix from the standard packed format (TP) to the standard full format (TR). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTPTTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTPTTR copies a triangular matrix A from standard packed format (TP) +*> to standard full format (TR). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular. +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On entry, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, N ) +*> On exit, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTPTTR( UPLO, N, AP, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTPTTR', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + A( I, J ) = AP( K ) + END DO + END DO + END IF +* +* + RETURN +* +* End of ZTPTTR +* + END diff --git a/dspl/liblapack/SRC/ztrcon.f b/dspl/liblapack/SRC/ztrcon.f new file mode 100644 index 0000000..e7cc196 --- /dev/null +++ b/dspl/liblapack/SRC/ztrcon.f @@ -0,0 +1,283 @@ +*> \brief \b ZTRCON +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRCON + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, +* RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, NORM, UPLO +* INTEGER INFO, LDA, N +* DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRCON estimates the reciprocal of the condition number of a +*> triangular matrix A, in either the 1-norm or the infinity-norm. +*> +*> The norm of A is computed and an estimate is obtained for +*> norm(inv(A)), then the reciprocal of the condition number is +*> computed as +*> RCOND = 1 / ( norm(A) * norm(inv(A)) ). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies whether the 1-norm condition number or the +*> infinity-norm condition number is required: +*> = '1' or 'O': 1-norm; +*> = 'I': Infinity-norm. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] RCOND +*> \verbatim +*> RCOND is DOUBLE PRECISION +*> The reciprocal of the condition number of the matrix A, +*> computed as RCOND = 1/(norm(A) * norm(inv(A))). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK, + $ RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, NORM, UPLO + INTEGER INFO, LDA, N + DOUBLE PRECISION RCOND +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, ONENRM, UPPER + CHARACTER NORMIN + INTEGER IX, KASE, KASE1 + DOUBLE PRECISION AINVNM, ANORM, SCALE, SMLNUM, XNORM + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, ZLANTR + EXTERNAL LSAME, IZAMAX, DLAMCH, ZLANTR +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLATRS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRCON', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RCOND = ONE + RETURN + END IF +* + RCOND = ZERO + SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) ) +* +* Compute the norm of the triangular matrix A. +* + ANORM = ZLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK ) +* +* Continue only if ANORM > 0. +* + IF( ANORM.GT.ZERO ) THEN +* +* Estimate the norm of the inverse of A. +* + AINVNM = ZERO + NORMIN = 'N' + IF( ONENRM ) THEN + KASE1 = 1 + ELSE + KASE1 = 2 + END IF + KASE = 0 + 10 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, AINVNM, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.KASE1 ) THEN +* +* Multiply by inv(A). +* + CALL ZLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A, + $ LDA, WORK, SCALE, RWORK, INFO ) + ELSE +* +* Multiply by inv(A**H). +* + CALL ZLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN, + $ N, A, LDA, WORK, SCALE, RWORK, INFO ) + END IF + NORMIN = 'Y' +* +* Multiply by 1/SCALE if doing so will not cause overflow. +* + IF( SCALE.NE.ONE ) THEN + IX = IZAMAX( N, WORK, 1 ) + XNORM = CABS1( WORK( IX ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 20 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 10 + END IF +* +* Compute the estimate of the reciprocal condition number. +* + IF( AINVNM.NE.ZERO ) + $ RCOND = ( ONE / ANORM ) / AINVNM + END IF +* + 20 CONTINUE + RETURN +* +* End of ZTRCON +* + END diff --git a/dspl/liblapack/SRC/ztrevc.f b/dspl/liblapack/SRC/ztrevc.f new file mode 100644 index 0000000..ea925dd --- /dev/null +++ b/dspl/liblapack/SRC/ztrevc.f @@ -0,0 +1,486 @@ +*> \brief \b ZTREVC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTREVC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, MM, M, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTREVC computes some or all of the right and/or left eigenvectors of +*> a complex upper triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of the vector y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the unitary factor that reduces a matrix A to +*> Schur form T, then Q*X and Q*Y are the matrices of right and left +*> eigenvectors of A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed using the matrices supplied in +*> VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> The eigenvector corresponding to the j-th eigenvalue is +*> computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The upper triangular matrix T. T is modified, but restored +*> on exit. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by ZHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. LDVL >= 1, and if +*> SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by ZHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. LDVR >= 1, and if +*> SIDE = 'R' or 'B'; LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. If HOWMNY = 'A' or 'B', M +*> is set to N. Each selected eigenvector occupies one +*> column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CMZERO, CMONE + PARAMETER ( CMZERO = ( 0.0D+0, 0.0D+0 ), + $ CMONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI + DOUBLE PRECISION OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + EXTERNAL LSAME, IZAMAX, DLAMCH, DZASUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS, DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTREVC', -INFO ) + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I+N ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = DZASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* Compute right eigenvectors. +* + IS = M + DO 80 KI = N, 1, -1 +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( 1 ) = CMONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K ) = -T( K, KI ) + 40 CONTINUE +* +* Solve the triangular system: +* (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 ), SCALE, RWORK, + $ INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN + CALL ZCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 ) +* + II = IZAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL ZDSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CMZERO + 60 CONTINUE + ELSE + IF( KI.GT.1 ) + $ CALL ZGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ), + $ 1, DCMPLX( SCALE ), VR( 1, KI ), 1 ) +* + II = IZAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL ZDSCAL( N, REMAX, VR( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K+N ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* Compute left eigenvectors. +* + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* + WORK( N ) = CMONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K ) = -DCONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve the triangular system: +* (T(KI+1:N,KI+1:N) - T(KI,KI))**H * X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 ), SCALE, RWORK, INFO ) + WORK( KI ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN + CALL ZCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 ) +* + II = IZAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL ZDSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CMZERO + 110 CONTINUE + ELSE + IF( KI.LT.N ) + $ CALL ZGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 ), 1, DCMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = IZAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL ZDSCAL( N, REMAX, VL( 1, KI ), 1 ) + END IF +* +* Set back the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K+N ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of ZTREVC +* + END diff --git a/dspl/liblapack/SRC/ztrevc3.f b/dspl/liblapack/SRC/ztrevc3.f new file mode 100644 index 0000000..36f7d82 --- /dev/null +++ b/dspl/liblapack/SRC/ztrevc3.f @@ -0,0 +1,631 @@ +*> \brief \b ZTREVC3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTREVC3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* $ LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, SIDE +* INTEGER INFO, LDT, LDVL, LDVR, LWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTREVC3 computes some or all of the right and/or left eigenvectors of +*> a complex upper triangular matrix T. +*> Matrices of this type are produced by the Schur factorization of +*> a complex general matrix: A = Q*T*Q**H, as computed by ZHSEQR. +*> +*> The right eigenvector x and the left eigenvector y of T corresponding +*> to an eigenvalue w are defined by: +*> +*> T*x = w*x, (y**H)*T = w*(y**H) +*> +*> where y**H denotes the conjugate transpose of the vector y. +*> The eigenvalues are not input to this routine, but are read directly +*> from the diagonal of T. +*> +*> This routine returns the matrices X and/or Y of right and left +*> eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an +*> input matrix. If Q is the unitary factor that reduces a matrix A to +*> Schur form T, then Q*X and Q*Y are the matrices of right and left +*> eigenvectors of A. +*> +*> This uses a Level 3 BLAS version of the back transformation. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'R': compute right eigenvectors only; +*> = 'L': compute left eigenvectors only; +*> = 'B': compute both right and left eigenvectors. +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute all right and/or left eigenvectors; +*> = 'B': compute all right and/or left eigenvectors, +*> backtransformed using the matrices supplied in +*> VR and/or VL; +*> = 'S': compute selected right and/or left eigenvectors, +*> as indicated by the logical array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenvectors to be +*> computed. +*> The eigenvector corresponding to the j-th eigenvalue is +*> computed if SELECT(j) = .TRUE.. +*> Not referenced if HOWMNY = 'A' or 'B'. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The upper triangular matrix T. T is modified, but restored +*> on exit. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,MM) +*> On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by ZHSEQR). +*> On exit, if SIDE = 'L' or 'B', VL contains: +*> if HOWMNY = 'A', the matrix Y of left eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*Y; +*> if HOWMNY = 'S', the left eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VL, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1, and if SIDE = 'L' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in,out] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,MM) +*> On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must +*> contain an N-by-N matrix Q (usually the unitary matrix Q of +*> Schur vectors returned by ZHSEQR). +*> On exit, if SIDE = 'R' or 'B', VR contains: +*> if HOWMNY = 'A', the matrix X of right eigenvectors of T; +*> if HOWMNY = 'B', the matrix Q*X; +*> if HOWMNY = 'S', the right eigenvectors of T specified by +*> SELECT, stored consecutively in the columns +*> of VR, in the same order as their +*> eigenvalues. +*> Not referenced if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1, and if SIDE = 'R' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of columns in the arrays VL and/or VR. MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of columns in the arrays VL and/or VR actually +*> used to store the eigenvectors. +*> If HOWMNY = 'A' or 'B', M is set to N. +*> Each selected eigenvector occupies one column. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of array WORK. LWORK >= max(1,2*N). +*> For optimum performance, LWORK >= N + 2*N*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (LRWORK) +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of array RWORK. LRWORK >= max(1,N). +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the RWORK array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +* @precisions fortran z -> c +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The algorithm used in this program is basically backward (forward) +*> substitution, with scaling to make the the code robust against +*> possible overflow. +*> +*> Each eigenvector is normalized so that the element of largest +*> magnitude has magnitude 1; here the magnitude of a complex number +*> (x,y) is taken to be |x| + |y|. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTREVC3( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, MM, M, WORK, LWORK, RWORK, LRWORK, INFO) + IMPLICIT NONE +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, SIDE + INTEGER INFO, LDT, LDVL, LDVR, LWORK, LRWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) + INTEGER NBMIN, NBMAX + PARAMETER ( NBMIN = 8, NBMAX = 128 ) +* .. +* .. Local Scalars .. + LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, RIGHTV, SOMEV + INTEGER I, II, IS, J, K, KI, IV, MAXWRK, NB + DOUBLE PRECISION OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX*16 CDUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV, IZAMAX + DOUBLE PRECISION DLAMCH, DZASUM + EXTERNAL LSAME, ILAENV, IZAMAX, DLAMCH, DZASUM +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS, + $ ZGEMM, DLABAD, ZLASET, ZLACPY +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, CONJG, AIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( AIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + BOTHV = LSAME( SIDE, 'B' ) + RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV + LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV +* + ALLV = LSAME( HOWMNY, 'A' ) + OVER = LSAME( HOWMNY, 'B' ) + SOMEV = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of columns required to store the selected +* eigenvectors. +* + IF( SOMEV ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + NB = ILAENV( 1, 'ZTREVC', SIDE // HOWMNY, N, -1, -1, -1 ) + MAXWRK = N + 2*N*NB + WORK(1) = MAXWRK + RWORK(1) = N + LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 ) + IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN + INFO = -1 + ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN + INFO = -14 + ELSE IF ( LRWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTREVC3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible. +* + IF( N.EQ.0 ) + $ RETURN +* +* Use blocked version of back-transformation if sufficient workspace. +* Zero-out the workspace to avoid potential NaN propagation. +* + IF( OVER .AND. LWORK .GE. N + 2*N*NBMIN ) THEN + NB = (LWORK - N) / (2*N) + NB = MIN( NB, NBMAX ) + CALL ZLASET( 'F', N, 1+2*NB, CZERO, CZERO, WORK, N ) + ELSE + NB = 1 + END IF +* +* Set the constants to control overflow. +* + UNFL = DLAMCH( 'Safe minimum' ) + OVFL = ONE / UNFL + CALL DLABAD( UNFL, OVFL ) + ULP = DLAMCH( 'Precision' ) + SMLNUM = UNFL*( N / ULP ) +* +* Store the diagonal elements of T in working array WORK. +* + DO 20 I = 1, N + WORK( I ) = T( I, I ) + 20 CONTINUE +* +* Compute 1-norm of each column of strictly upper triangular +* part of T to control overflow in triangular solver. +* + RWORK( 1 ) = ZERO + DO 30 J = 2, N + RWORK( J ) = DZASUM( J-1, T( 1, J ), 1 ) + 30 CONTINUE +* + IF( RIGHTV ) THEN +* +* ============================================================ +* Compute right eigenvectors. +* +* IV is index of column in current block. +* Non-blocked version always uses IV=NB=1; +* blocked version starts with IV=NB, goes down to 1. +* (Note the "0-th" column is used to store the original diagonal.) + IV = NB + IS = M + DO 80 KI = N, 1, -1 + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 80 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* +* -------------------------------------------------------- +* Complex right eigenvector +* + WORK( KI + IV*N ) = CONE +* +* Form right-hand side. +* + DO 40 K = 1, KI - 1 + WORK( K + IV*N ) = -T( K, KI ) + 40 CONTINUE +* +* Solve upper triangular system: +* [ T(1:KI-1,1:KI-1) - T(KI,KI) ]*X = SCALE*WORK. +* + DO 50 K = 1, KI - 1 + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 50 CONTINUE +* + IF( KI.GT.1 ) THEN + CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y', + $ KI-1, T, LDT, WORK( 1 + IV*N ), SCALE, + $ RWORK, INFO ) + WORK( KI + IV*N ) = SCALE + END IF +* +* Copy the vector x or Q*x to VR and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VR and normalize. + CALL ZCOPY( KI, WORK( 1 + IV*N ), 1, VR( 1, IS ), 1 ) +* + II = IZAMAX( KI, VR( 1, IS ), 1 ) + REMAX = ONE / CABS1( VR( II, IS ) ) + CALL ZDSCAL( KI, REMAX, VR( 1, IS ), 1 ) +* + DO 60 K = KI + 1, N + VR( K, IS ) = CZERO + 60 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.GT.1 ) + $ CALL ZGEMV( 'N', N, KI-1, CONE, VR, LDVR, + $ WORK( 1 + IV*N ), 1, DCMPLX( SCALE ), + $ VR( 1, KI ), 1 ) +* + II = IZAMAX( N, VR( 1, KI ), 1 ) + REMAX = ONE / CABS1( VR( II, KI ) ) + CALL ZDSCAL( N, REMAX, VR( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out below vector + DO K = KI + 1, N + WORK( K + IV*N ) = CZERO + END DO +* +* Columns IV:NB of work are valid vectors. +* When the number of vectors stored reaches NB, +* or if this was last vector, do the GEMM + IF( (IV.EQ.1) .OR. (KI.EQ.1) ) THEN + CALL ZGEMM( 'N', 'N', N, NB-IV+1, KI+NB-IV, CONE, + $ VR, LDVR, + $ WORK( 1 + (IV)*N ), N, + $ CZERO, + $ WORK( 1 + (NB+IV)*N ), N ) +* normalize vectors + DO K = IV, NB + II = IZAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / CABS1( WORK( II + (NB+K)*N ) ) + CALL ZDSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL ZLACPY( 'F', N, NB-IV+1, + $ WORK( 1 + (NB+IV)*N ), N, + $ VR( 1, KI ), LDVR ) + IV = NB + ELSE + IV = IV - 1 + END IF + END IF +* +* Restore the original diagonal elements of T. +* + DO 70 K = 1, KI - 1 + T( K, K ) = WORK( K ) + 70 CONTINUE +* + IS = IS - 1 + 80 CONTINUE + END IF +* + IF( LEFTV ) THEN +* +* ============================================================ +* Compute left eigenvectors. +* +* IV is index of column in current block. +* Non-blocked version always uses IV=1; +* blocked version starts with IV=1, goes up to NB. +* (Note the "0-th" column is used to store the original diagonal.) + IV = 1 + IS = 1 + DO 130 KI = 1, N +* + IF( SOMEV ) THEN + IF( .NOT.SELECT( KI ) ) + $ GO TO 130 + END IF + SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM ) +* +* -------------------------------------------------------- +* Complex left eigenvector +* + WORK( KI + IV*N ) = CONE +* +* Form right-hand side. +* + DO 90 K = KI + 1, N + WORK( K + IV*N ) = -CONJG( T( KI, K ) ) + 90 CONTINUE +* +* Solve conjugate-transposed triangular system: +* [ T(KI+1:N,KI+1:N) - T(KI,KI) ]**H * X = SCALE*WORK. +* + DO 100 K = KI + 1, N + T( K, K ) = T( K, K ) - T( KI, KI ) + IF( CABS1( T( K, K ) ).LT.SMIN ) + $ T( K, K ) = SMIN + 100 CONTINUE +* + IF( KI.LT.N ) THEN + CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit', + $ 'Y', N-KI, T( KI+1, KI+1 ), LDT, + $ WORK( KI+1 + IV*N ), SCALE, RWORK, INFO ) + WORK( KI + IV*N ) = SCALE + END IF +* +* Copy the vector x or Q*x to VL and normalize. +* + IF( .NOT.OVER ) THEN +* ------------------------------ +* no back-transform: copy x to VL and normalize. + CALL ZCOPY( N-KI+1, WORK( KI + IV*N ), 1, VL(KI,IS), 1 ) +* + II = IZAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1 + REMAX = ONE / CABS1( VL( II, IS ) ) + CALL ZDSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 ) +* + DO 110 K = 1, KI - 1 + VL( K, IS ) = CZERO + 110 CONTINUE +* + ELSE IF( NB.EQ.1 ) THEN +* ------------------------------ +* version 1: back-transform each vector with GEMV, Q*x. + IF( KI.LT.N ) + $ CALL ZGEMV( 'N', N, N-KI, CONE, VL( 1, KI+1 ), LDVL, + $ WORK( KI+1 + IV*N ), 1, DCMPLX( SCALE ), + $ VL( 1, KI ), 1 ) +* + II = IZAMAX( N, VL( 1, KI ), 1 ) + REMAX = ONE / CABS1( VL( II, KI ) ) + CALL ZDSCAL( N, REMAX, VL( 1, KI ), 1 ) +* + ELSE +* ------------------------------ +* version 2: back-transform block of vectors with GEMM +* zero out above vector +* could go from KI-NV+1 to KI-1 + DO K = 1, KI - 1 + WORK( K + IV*N ) = CZERO + END DO +* +* Columns 1:IV of work are valid vectors. +* When the number of vectors stored reaches NB, +* or if this was last vector, do the GEMM + IF( (IV.EQ.NB) .OR. (KI.EQ.N) ) THEN + CALL ZGEMM( 'N', 'N', N, IV, N-KI+IV, CONE, + $ VL( 1, KI-IV+1 ), LDVL, + $ WORK( KI-IV+1 + (1)*N ), N, + $ CZERO, + $ WORK( 1 + (NB+1)*N ), N ) +* normalize vectors + DO K = 1, IV + II = IZAMAX( N, WORK( 1 + (NB+K)*N ), 1 ) + REMAX = ONE / CABS1( WORK( II + (NB+K)*N ) ) + CALL ZDSCAL( N, REMAX, WORK( 1 + (NB+K)*N ), 1 ) + END DO + CALL ZLACPY( 'F', N, IV, + $ WORK( 1 + (NB+1)*N ), N, + $ VL( 1, KI-IV+1 ), LDVL ) + IV = 1 + ELSE + IV = IV + 1 + END IF + END IF +* +* Restore the original diagonal elements of T. +* + DO 120 K = KI + 1, N + T( K, K ) = WORK( K ) + 120 CONTINUE +* + IS = IS + 1 + 130 CONTINUE + END IF +* + RETURN +* +* End of ZTREVC3 +* + END diff --git a/dspl/liblapack/SRC/ztrexc.f b/dspl/liblapack/SRC/ztrexc.f new file mode 100644 index 0000000..4cf352e --- /dev/null +++ b/dspl/liblapack/SRC/ztrexc.f @@ -0,0 +1,241 @@ +*> \brief \b ZTREXC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTREXC + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ +* INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. +* COMPLEX*16 Q( LDQ, * ), T( LDT, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTREXC reorders the Schur factorization of a complex matrix +*> A = Q*T*Q**H, so that the diagonal element of T with row index IFST +*> is moved to row ILST. +*> +*> The Schur form T is reordered by a unitary similarity transformation +*> Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by +*> postmultplying it with Z. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> If N == 0 arguments ILST and IFST may be any value. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> On entry, the upper triangular matrix T. +*> On exit, the reordered upper triangular matrix. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> unitary transformation matrix Z which reorders T. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= 1, and if +*> COMPQ = 'V', LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[in] IFST +*> \verbatim +*> IFST is INTEGER +*> \endverbatim +*> +*> \param[in] ILST +*> \verbatim +*> ILST is INTEGER +*> +*> Specify the reordering of the diagonal elements of T: +*> The element with row index IFST is moved to row ILST by a +*> sequence of transpositions between adjacent elements. +*> 1 <= IFST <= N; 1 <= ILST <= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ + INTEGER IFST, ILST, INFO, LDQ, LDT, N +* .. +* .. Array Arguments .. + COMPLEX*16 Q( LDQ, * ), T( LDT, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL WANTQ + INTEGER K, M1, M2, M3 + DOUBLE PRECISION CS + COMPLEX*16 SN, T11, T22, TEMP +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARTG, ZROT +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + INFO = 0 + WANTQ = LSAME( COMPQ, 'V' ) + IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN + INFO = -6 + ELSE IF(( IFST.LT.1 .OR. IFST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -7 + ELSE IF(( ILST.LT.1 .OR. ILST.GT.N ).AND.( N.GT.0 )) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTREXC', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 .OR. IFST.EQ.ILST ) + $ RETURN +* + IF( IFST.LT.ILST ) THEN +* +* Move the IFST-th diagonal element forward down the diagonal. +* + M1 = 0 + M2 = -1 + M3 = 1 + ELSE +* +* Move the IFST-th diagonal element backward up the diagonal. +* + M1 = -1 + M2 = 0 + M3 = -1 + END IF +* + DO 10 K = IFST + M1, ILST + M2, M3 +* +* Interchange the k-th and (k+1)-th diagonal elements. +* + T11 = T( K, K ) + T22 = T( K+1, K+1 ) +* +* Determine the transformation to perform the interchange. +* + CALL ZLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP ) +* +* Apply transformation to the matrix T. +* + IF( K+2.LE.N ) + $ CALL ZROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS, + $ SN ) + CALL ZROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, + $ DCONJG( SN ) ) +* + T( K, K ) = T22 + T( K+1, K+1 ) = T11 +* + IF( WANTQ ) THEN +* +* Accumulate transformation in the matrix Q. +* + CALL ZROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS, + $ DCONJG( SN ) ) + END IF +* + 10 CONTINUE +* + RETURN +* +* End of ZTREXC +* + END diff --git a/dspl/liblapack/SRC/ztrrfs.f b/dspl/liblapack/SRC/ztrrfs.f new file mode 100644 index 0000000..42c2592 --- /dev/null +++ b/dspl/liblapack/SRC/ztrrfs.f @@ -0,0 +1,479 @@ +*> \brief \b ZTRRFS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRRFS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, +* LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. +* DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) +* COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), +* $ X( LDX, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRRFS provides error bounds and backward error estimates for the +*> solution to a system of linear equations with a triangular +*> coefficient matrix. +*> +*> The solution matrix X must be computed by ZTRTRS or some other +*> means before entering this routine. ZTRRFS does not do iterative +*> refinement because doing so cannot improve the backward error. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrices B and X. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> The right hand side matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (LDX,NRHS) +*> The solution matrix X. +*> \endverbatim +*> +*> \param[in] LDX +*> \verbatim +*> LDX is INTEGER +*> The leading dimension of the array X. LDX >= max(1,N). +*> \endverbatim +*> +*> \param[out] FERR +*> \verbatim +*> FERR is DOUBLE PRECISION array, dimension (NRHS) +*> The estimated forward error bound for each solution vector +*> X(j) (the j-th column of the solution matrix X). +*> If XTRUE is the true solution corresponding to X(j), FERR(j) +*> is an estimated upper bound for the magnitude of the largest +*> element in (X(j) - XTRUE) divided by the magnitude of the +*> largest element in X(j). The estimate is as reliable as +*> the estimate for RCOND, and is almost always a slight +*> overestimate of the true error. +*> \endverbatim +*> +*> \param[out] BERR +*> \verbatim +*> BERR is DOUBLE PRECISION array, dimension (NRHS) +*> The componentwise relative backward error of each solution +*> vector X(j) (i.e., the smallest relative change in +*> any element of A or B that makes X(j) an exact solution). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (2*N) +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X, + $ LDX, FERR, BERR, WORK, RWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, LDX, N, NRHS +* .. +* .. Array Arguments .. + DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * ) + COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * ), + $ X( LDX, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN, NOUNIT, UPPER + CHARACTER TRANSN, TRANST + INTEGER I, J, K, KASE, NZ + DOUBLE PRECISION EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK + COMPLEX*16 ZDUM +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZCOPY, ZLACN2, ZTRMV, ZTRSV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) ) +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOTRAN = LSAME( TRANS, 'N' ) + NOUNIT = LSAME( DIAG, 'N' ) +* + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDX.LT.MAX( 1, N ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRRFS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN + DO 10 J = 1, NRHS + FERR( J ) = ZERO + BERR( J ) = ZERO + 10 CONTINUE + RETURN + END IF +* + IF( NOTRAN ) THEN + TRANSN = 'N' + TRANST = 'C' + ELSE + TRANSN = 'C' + TRANST = 'N' + END IF +* +* NZ = maximum number of nonzero elements in each row of A, plus 1 +* + NZ = N + 1 + EPS = DLAMCH( 'Epsilon' ) + SAFMIN = DLAMCH( 'Safe minimum' ) + SAFE1 = NZ*SAFMIN + SAFE2 = SAFE1 / EPS +* +* Do for each right hand side +* + DO 250 J = 1, NRHS +* +* Compute residual R = B - op(A) * X, +* where op(A) = A, A**T, or A**H, depending on TRANS. +* + CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 ) + CALL ZTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 ) + CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 ) +* +* Compute componentwise relative backward error from formula +* +* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) +* +* where abs(Z) is the componentwise absolute value of the matrix +* or vector Z. If the i-th component of the denominator is less +* than SAFE2, then SAFE1 is added to the i-th components of the +* numerator and denominator before dividing. +* + DO 20 I = 1, N + RWORK( I ) = CABS1( B( I, J ) ) + 20 CONTINUE +* + IF( NOTRAN ) THEN +* +* Compute abs(A)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 40 K = 1, N + XK = CABS1( X( K, J ) ) + DO 30 I = 1, K + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 30 CONTINUE + 40 CONTINUE + ELSE + DO 60 K = 1, N + XK = CABS1( X( K, J ) ) + DO 50 I = 1, K - 1 + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 50 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 60 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 80 K = 1, N + XK = CABS1( X( K, J ) ) + DO 70 I = K, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 70 CONTINUE + 80 CONTINUE + ELSE + DO 100 K = 1, N + XK = CABS1( X( K, J ) ) + DO 90 I = K + 1, N + RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK + 90 CONTINUE + RWORK( K ) = RWORK( K ) + XK + 100 CONTINUE + END IF + END IF + ELSE +* +* Compute abs(A**H)*abs(X) + abs(B). +* + IF( UPPER ) THEN + IF( NOUNIT ) THEN + DO 120 K = 1, N + S = ZERO + DO 110 I = 1, K + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 110 CONTINUE + RWORK( K ) = RWORK( K ) + S + 120 CONTINUE + ELSE + DO 140 K = 1, N + S = CABS1( X( K, J ) ) + DO 130 I = 1, K - 1 + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 130 CONTINUE + RWORK( K ) = RWORK( K ) + S + 140 CONTINUE + END IF + ELSE + IF( NOUNIT ) THEN + DO 160 K = 1, N + S = ZERO + DO 150 I = K, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 150 CONTINUE + RWORK( K ) = RWORK( K ) + S + 160 CONTINUE + ELSE + DO 180 K = 1, N + S = CABS1( X( K, J ) ) + DO 170 I = K + 1, N + S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) ) + 170 CONTINUE + RWORK( K ) = RWORK( K ) + S + 180 CONTINUE + END IF + END IF + END IF + S = ZERO + DO 190 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) ) + ELSE + S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) / + $ ( RWORK( I )+SAFE1 ) ) + END IF + 190 CONTINUE + BERR( J ) = S +* +* Bound error from formula +* +* norm(X - XTRUE) / norm(X) .le. FERR = +* norm( abs(inv(op(A)))* +* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) +* +* where +* norm(Z) is the magnitude of the largest component of Z +* inv(op(A)) is the inverse of op(A) +* abs(Z) is the componentwise absolute value of the matrix or +* vector Z +* NZ is the maximum number of nonzeros in any row of A, plus 1 +* EPS is machine epsilon +* +* The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) +* is incremented by SAFE1 if the i-th component of +* abs(op(A))*abs(X) + abs(B) is less than SAFE2. +* +* Use ZLACN2 to estimate the infinity-norm of the matrix +* inv(op(A)) * diag(W), +* where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) +* + DO 200 I = 1, N + IF( RWORK( I ).GT.SAFE2 ) THEN + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + ELSE + RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) + + $ SAFE1 + END IF + 200 CONTINUE +* + KASE = 0 + 210 CONTINUE + CALL ZLACN2( N, WORK( N+1 ), WORK, FERR( J ), KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Multiply by diag(W)*inv(op(A)**H). +* + CALL ZTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 ) + DO 220 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 220 CONTINUE + ELSE +* +* Multiply by inv(op(A))*diag(W). +* + DO 230 I = 1, N + WORK( I ) = RWORK( I )*WORK( I ) + 230 CONTINUE + CALL ZTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 ) + END IF + GO TO 210 + END IF +* +* Normalize error. +* + LSTRES = ZERO + DO 240 I = 1, N + LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) ) + 240 CONTINUE + IF( LSTRES.NE.ZERO ) + $ FERR( J ) = FERR( J ) / LSTRES +* + 250 CONTINUE +* + RETURN +* +* End of ZTRRFS +* + END diff --git a/dspl/liblapack/SRC/ztrsen.f b/dspl/liblapack/SRC/ztrsen.f new file mode 100644 index 0000000..e033ca5 --- /dev/null +++ b/dspl/liblapack/SRC/ztrsen.f @@ -0,0 +1,456 @@ +*> \brief \b ZTRSEN +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRSEN + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, +* SEP, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPQ, JOB +* INTEGER INFO, LDQ, LDT, LWORK, M, N +* DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* COMPLEX*16 Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRSEN reorders the Schur factorization of a complex matrix +*> A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in +*> the leading positions on the diagonal of the upper triangular matrix +*> T, and the leading columns of Q form an orthonormal basis of the +*> corresponding right invariant subspace. +*> +*> Optionally the routine computes the reciprocal condition numbers of +*> the cluster of eigenvalues and/or the invariant subspace. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for the +*> cluster of eigenvalues (S) or the invariant subspace (SEP): +*> = 'N': none; +*> = 'E': for eigenvalues only (S); +*> = 'V': for invariant subspace only (SEP); +*> = 'B': for both eigenvalues and invariant subspace (S and +*> SEP). +*> \endverbatim +*> +*> \param[in] COMPQ +*> \verbatim +*> COMPQ is CHARACTER*1 +*> = 'V': update the matrix Q of Schur vectors; +*> = 'N': do not update Q. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> SELECT specifies the eigenvalues in the selected cluster. To +*> select the j-th eigenvalue, SELECT(j) must be set to .TRUE.. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in,out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> On entry, the upper triangular matrix T. +*> On exit, T is overwritten by the reordered matrix T, with the +*> selected eigenvalues as the leading diagonal elements. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> On entry, if COMPQ = 'V', the matrix Q of Schur vectors. +*> On exit, if COMPQ = 'V', Q has been postmultiplied by the +*> unitary transformation matrix which reorders T; the leading M +*> columns of Q form an orthonormal basis for the specified +*> invariant subspace. +*> If COMPQ = 'N', Q is not referenced. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= 1; and if COMPQ = 'V', LDQ >= N. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (N) +*> The reordered eigenvalues of T, in the same order as they +*> appear on the diagonal of T. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The dimension of the specified invariant subspace. +*> 0 <= M <= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION +*> If JOB = 'E' or 'B', S is a lower bound on the reciprocal +*> condition number for the selected cluster of eigenvalues. +*> S cannot underestimate the true reciprocal condition number +*> by more than a factor of sqrt(N). If M = 0 or N, S = 1. +*> If JOB = 'N' or 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is DOUBLE PRECISION +*> If JOB = 'V' or 'B', SEP is the estimated reciprocal +*> condition number of the specified invariant subspace. If +*> M = 0 or N, SEP = norm(T). +*> If JOB = 'N' or 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If JOB = 'N', LWORK >= 1; +*> if JOB = 'E', LWORK = max(1,M*(N-M)); +*> if JOB = 'V' or 'B', LWORK >= max(1,2*M*(N-M)). +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> ZTRSEN first collects the selected eigenvalues by computing a unitary +*> transformation Z to move them to the top left corner of T. In other +*> words, the selected eigenvalues are the eigenvalues of T11 in: +*> +*> Z**H * T * Z = ( T11 T12 ) n1 +*> ( 0 T22 ) n2 +*> n1 n2 +*> +*> where N = n1+n2. The first +*> n1 columns of Z span the specified invariant subspace of T. +*> +*> If T has been obtained from the Schur factorization of a matrix +*> A = Q*T*Q**H, then the reordered Schur factorization of A is given by +*> A = (Q*Z)*(Z**H*T*Z)*(Q*Z)**H, and the first n1 columns of Q*Z span the +*> corresponding invariant subspace of A. +*> +*> The reciprocal condition number of the average of the eigenvalues of +*> T11 may be returned in S. S lies between 0 (very badly conditioned) +*> and 1 (very well conditioned). It is computed as follows. First we +*> compute R so that +*> +*> P = ( I R ) n1 +*> ( 0 0 ) n2 +*> n1 n2 +*> +*> is the projector on the invariant subspace associated with T11. +*> R is the solution of the Sylvester equation: +*> +*> T11*R - R*T22 = T12. +*> +*> Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote +*> the two-norm of M. Then S is computed as the lower bound +*> +*> (1 + F-norm(R)**2)**(-1/2) +*> +*> on the reciprocal of 2-norm(P), the true reciprocal condition number. +*> S cannot underestimate 1 / 2-norm(P) by more than a factor of +*> sqrt(N). +*> +*> An approximate error bound for the computed average of the +*> eigenvalues of T11 is +*> +*> EPS * norm(T) / S +*> +*> where EPS is the machine precision. +*> +*> The reciprocal condition number of the right invariant subspace +*> spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP. +*> SEP is defined as the separation of T11 and T22: +*> +*> sep( T11, T22 ) = sigma-min( C ) +*> +*> where sigma-min(C) is the smallest singular value of the +*> n1*n2-by-n1*n2 matrix +*> +*> C = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) ) +*> +*> I(m) is an m by m identity matrix, and kprod denotes the Kronecker +*> product. We estimate sigma-min(C) by the reciprocal of an estimate of +*> the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C) +*> cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2). +*> +*> When SEP is small, small changes in T can cause large changes in +*> the invariant subspace. An approximate bound on the maximum angular +*> error in the computed right invariant subspace is +*> +*> EPS * norm(T) / SEP +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S, + $ SEP, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER COMPQ, JOB + INTEGER INFO, LDQ, LDT, LWORK, M, N + DOUBLE PRECISION S, SEP +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + COMPLEX*16 Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTBH, WANTQ, WANTS, WANTSP + INTEGER IERR, K, KASE, KS, LWMIN, N1, N2, NN + DOUBLE PRECISION EST, RNORM, SCALE +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + DOUBLE PRECISION RWORK( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION ZLANGE + EXTERNAL LSAME, ZLANGE +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACN2, ZLACPY, ZTREXC, ZTRSYL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters. +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH + WANTQ = LSAME( COMPQ, 'V' ) +* +* Set M to the number of selected eigenvalues. +* + M = 0 + DO 10 K = 1, N + IF( SELECT( K ) ) + $ M = M + 1 + 10 CONTINUE +* + N1 = M + N2 = N - M + NN = N1*N2 +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) +* + IF( WANTSP ) THEN + LWMIN = MAX( 1, 2*NN ) + ELSE IF( LSAME( JOB, 'N' ) ) THEN + LWMIN = 1 + ELSE IF( LSAME( JOB, 'E' ) ) THEN + LWMIN = MAX( 1, NN ) + END IF +* + IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP ) + $ THEN + INFO = -1 + ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN + INFO = -8 + ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = LWMIN + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSEN', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.N .OR. M.EQ.0 ) THEN + IF( WANTS ) + $ S = ONE + IF( WANTSP ) + $ SEP = ZLANGE( '1', N, N, T, LDT, RWORK ) + GO TO 40 + END IF +* +* Collect the selected eigenvalues at the top left corner of T. +* + KS = 0 + DO 20 K = 1, N + IF( SELECT( K ) ) THEN + KS = KS + 1 +* +* Swap the K-th eigenvalue to position KS. +* + IF( K.NE.KS ) + $ CALL ZTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR ) + END IF + 20 CONTINUE +* + IF( WANTS ) THEN +* +* Solve the Sylvester equation for R: +* +* T11*R - R*T22 = scale*T12 +* + CALL ZLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 ) + CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ), + $ LDT, WORK, N1, SCALE, IERR ) +* +* Estimate the reciprocal of the condition number of the cluster +* of eigenvalues. +* + RNORM = ZLANGE( 'F', N1, N2, WORK, N1, RWORK ) + IF( RNORM.EQ.ZERO ) THEN + S = ONE + ELSE + S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )* + $ SQRT( RNORM ) ) + END IF + END IF +* + IF( WANTSP ) THEN +* +* Estimate sep(T11,T22). +* + EST = ZERO + KASE = 0 + 30 CONTINUE + CALL ZLACN2( NN, WORK( NN+1 ), WORK, EST, KASE, ISAVE ) + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve T11*R - R*T22 = scale*X. +* + CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + ELSE +* +* Solve T11**H*R - R*T22**H = scale*X. +* + CALL ZTRSYL( 'C', 'C', -1, N1, N2, T, LDT, + $ T( N1+1, N1+1 ), LDT, WORK, N1, SCALE, + $ IERR ) + END IF + GO TO 30 + END IF +* + SEP = SCALE / EST + END IF +* + 40 CONTINUE +* +* Copy reordered eigenvalues to W. +* + DO 50 K = 1, N + W( K ) = T( K, K ) + 50 CONTINUE +* + WORK( 1 ) = LWMIN +* + RETURN +* +* End of ZTRSEN +* + END diff --git a/dspl/liblapack/SRC/ztrsna.f b/dspl/liblapack/SRC/ztrsna.f new file mode 100644 index 0000000..97a42a5 --- /dev/null +++ b/dspl/liblapack/SRC/ztrsna.f @@ -0,0 +1,464 @@ +*> \brief \b ZTRSNA +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRSNA + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, +* LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER HOWMNY, JOB +* INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. +* LOGICAL SELECT( * ) +* DOUBLE PRECISION RWORK( * ), S( * ), SEP( * ) +* COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRSNA estimates reciprocal condition numbers for specified +*> eigenvalues and/or right eigenvectors of a complex upper triangular +*> matrix T (or of any matrix Q*T*Q**H with Q unitary). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOB +*> \verbatim +*> JOB is CHARACTER*1 +*> Specifies whether condition numbers are required for +*> eigenvalues (S) or eigenvectors (SEP): +*> = 'E': for eigenvalues only (S); +*> = 'V': for eigenvectors only (SEP); +*> = 'B': for both eigenvalues and eigenvectors (S and SEP). +*> \endverbatim +*> +*> \param[in] HOWMNY +*> \verbatim +*> HOWMNY is CHARACTER*1 +*> = 'A': compute condition numbers for all eigenpairs; +*> = 'S': compute condition numbers for selected eigenpairs +*> specified by the array SELECT. +*> \endverbatim +*> +*> \param[in] SELECT +*> \verbatim +*> SELECT is LOGICAL array, dimension (N) +*> If HOWMNY = 'S', SELECT specifies the eigenpairs for which +*> condition numbers are required. To select condition numbers +*> for the j-th eigenpair, SELECT(j) must be set to .TRUE.. +*> If HOWMNY = 'A', SELECT is not referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix T. N >= 0. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,N) +*> The upper triangular matrix T. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= max(1,N). +*> \endverbatim +*> +*> \param[in] VL +*> \verbatim +*> VL is COMPLEX*16 array, dimension (LDVL,M) +*> If JOB = 'E' or 'B', VL must contain left eigenvectors of T +*> (or of any Q*T*Q**H with Q unitary), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VL, as returned by +*> ZHSEIN or ZTREVC. +*> If JOB = 'V', VL is not referenced. +*> \endverbatim +*> +*> \param[in] LDVL +*> \verbatim +*> LDVL is INTEGER +*> The leading dimension of the array VL. +*> LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N. +*> \endverbatim +*> +*> \param[in] VR +*> \verbatim +*> VR is COMPLEX*16 array, dimension (LDVR,M) +*> If JOB = 'E' or 'B', VR must contain right eigenvectors of T +*> (or of any Q*T*Q**H with Q unitary), corresponding to the +*> eigenpairs specified by HOWMNY and SELECT. The eigenvectors +*> must be stored in consecutive columns of VR, as returned by +*> ZHSEIN or ZTREVC. +*> If JOB = 'V', VR is not referenced. +*> \endverbatim +*> +*> \param[in] LDVR +*> \verbatim +*> LDVR is INTEGER +*> The leading dimension of the array VR. +*> LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N. +*> \endverbatim +*> +*> \param[out] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'E' or 'B', the reciprocal condition numbers of the +*> selected eigenvalues, stored in consecutive elements of the +*> array. Thus S(j), SEP(j), and the j-th columns of VL and VR +*> all correspond to the same eigenpair (but not in general the +*> j-th eigenpair, unless all eigenpairs are selected). +*> If JOB = 'V', S is not referenced. +*> \endverbatim +*> +*> \param[out] SEP +*> \verbatim +*> SEP is DOUBLE PRECISION array, dimension (MM) +*> If JOB = 'V' or 'B', the estimated reciprocal condition +*> numbers of the selected eigenvectors, stored in consecutive +*> elements of the array. +*> If JOB = 'E', SEP is not referenced. +*> \endverbatim +*> +*> \param[in] MM +*> \verbatim +*> MM is INTEGER +*> The number of elements in the arrays S (if JOB = 'E' or 'B') +*> and/or SEP (if JOB = 'V' or 'B'). MM >= M. +*> \endverbatim +*> +*> \param[out] M +*> \verbatim +*> M is INTEGER +*> The number of elements of the arrays S and/or SEP actually +*> used to store the estimated condition numbers. +*> If HOWMNY = 'A', M is set to N. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LDWORK,N+6) +*> If JOB = 'E', WORK is not referenced. +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (N) +*> If JOB = 'E', RWORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2017 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The reciprocal of the condition number of an eigenvalue lambda is +*> defined as +*> +*> S(lambda) = |v**H*u| / (norm(u)*norm(v)) +*> +*> where u and v are the right and left eigenvectors of T corresponding +*> to lambda; v**H denotes the conjugate transpose of v, and norm(u) +*> denotes the Euclidean norm. These reciprocal condition numbers always +*> lie between zero (very badly conditioned) and one (very well +*> conditioned). If n = 1, S(lambda) is defined to be 1. +*> +*> An approximate error bound for a computed eigenvalue W(i) is given by +*> +*> EPS * norm(T) / S(i) +*> +*> where EPS is the machine precision. +*> +*> The reciprocal of the condition number of the right eigenvector u +*> corresponding to lambda is defined as follows. Suppose +*> +*> T = ( lambda c ) +*> ( 0 T22 ) +*> +*> Then the reciprocal condition number is +*> +*> SEP( lambda, T22 ) = sigma-min( T22 - lambda*I ) +*> +*> where sigma-min denotes the smallest singular value. We approximate +*> the smallest singular value by the reciprocal of an estimate of the +*> one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is +*> defined to be abs(T(1,1)). +*> +*> An approximate error bound for a computed right eigenvector VR(i) +*> is given by +*> +*> EPS * norm(T) / SEP(i) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR, + $ LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2017 +* +* .. Scalar Arguments .. + CHARACTER HOWMNY, JOB + INTEGER INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N +* .. +* .. Array Arguments .. + LOGICAL SELECT( * ) + DOUBLE PRECISION RWORK( * ), S( * ), SEP( * ) + COMPLEX*16 T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D0+0 ) +* .. +* .. Local Scalars .. + LOGICAL SOMCON, WANTBH, WANTS, WANTSP + CHARACTER NORMIN + INTEGER I, IERR, IX, J, K, KASE, KS + DOUBLE PRECISION BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM, + $ XNORM + COMPLEX*16 CDUM, PROD +* .. +* .. Local Arrays .. + INTEGER ISAVE( 3 ) + COMPLEX*16 DUMMY( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER IZAMAX + DOUBLE PRECISION DLAMCH, DZNRM2 + COMPLEX*16 ZDOTC + EXTERNAL LSAME, IZAMAX, DLAMCH, DZNRM2, ZDOTC +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZDRSCL, ZLACN2, ZLACPY, ZLATRS, ZTREXC, + $ DLABAD +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG, MAX +* .. +* .. Statement Functions .. + DOUBLE PRECISION CABS1 +* .. +* .. Statement Function definitions .. + CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) +* .. +* .. Executable Statements .. +* +* Decode and test the input parameters +* + WANTBH = LSAME( JOB, 'B' ) + WANTS = LSAME( JOB, 'E' ) .OR. WANTBH + WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH +* + SOMCON = LSAME( HOWMNY, 'S' ) +* +* Set M to the number of eigenpairs for which condition numbers are +* to be computed. +* + IF( SOMCON ) THEN + M = 0 + DO 10 J = 1, N + IF( SELECT( J ) ) + $ M = M + 1 + 10 CONTINUE + ELSE + M = N + END IF +* + INFO = 0 + IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDT.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN + INFO = -8 + ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN + INFO = -10 + ELSE IF( MM.LT.M ) THEN + INFO = -13 + ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN + INFO = -16 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSNA', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( SOMCON ) THEN + IF( .NOT.SELECT( 1 ) ) + $ RETURN + END IF + IF( WANTS ) + $ S( 1 ) = ONE + IF( WANTSP ) + $ SEP( 1 ) = ABS( T( 1, 1 ) ) + RETURN + END IF +* +* Get machine constants +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) / EPS + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) +* + KS = 1 + DO 50 K = 1, N +* + IF( SOMCON ) THEN + IF( .NOT.SELECT( K ) ) + $ GO TO 50 + END IF +* + IF( WANTS ) THEN +* +* Compute the reciprocal condition number of the k-th +* eigenvalue. +* + PROD = ZDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 ) + RNRM = DZNRM2( N, VR( 1, KS ), 1 ) + LNRM = DZNRM2( N, VL( 1, KS ), 1 ) + S( KS ) = ABS( PROD ) / ( RNRM*LNRM ) +* + END IF +* + IF( WANTSP ) THEN +* +* Estimate the reciprocal condition number of the k-th +* eigenvector. +* +* Copy the matrix T to the array WORK and swap the k-th +* diagonal element to the (1,1) position. +* + CALL ZLACPY( 'Full', N, N, T, LDT, WORK, LDWORK ) + CALL ZTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR ) +* +* Form C = T22 - lambda*I in WORK(2:N,2:N). +* + DO 20 I = 2, N + WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 ) + 20 CONTINUE +* +* Estimate a lower bound for the 1-norm of inv(C**H). The 1st +* and (N+1)th columns of WORK are used to store work vectors. +* + SEP( KS ) = ZERO + EST = ZERO + KASE = 0 + NORMIN = 'N' + 30 CONTINUE + CALL ZLACN2( N-1, WORK( 1, N+1 ), WORK, EST, KASE, ISAVE ) +* + IF( KASE.NE.0 ) THEN + IF( KASE.EQ.1 ) THEN +* +* Solve C**H*x = scale*b +* + CALL ZLATRS( 'Upper', 'Conjugate transpose', + $ 'Nonunit', NORMIN, N-1, WORK( 2, 2 ), + $ LDWORK, WORK, SCALE, RWORK, IERR ) + ELSE +* +* Solve C*x = scale*b +* + CALL ZLATRS( 'Upper', 'No transpose', 'Nonunit', + $ NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK, + $ SCALE, RWORK, IERR ) + END IF + NORMIN = 'Y' + IF( SCALE.NE.ONE ) THEN +* +* Multiply by 1/SCALE if doing so will not cause +* overflow. +* + IX = IZAMAX( N-1, WORK, 1 ) + XNORM = CABS1( WORK( IX, 1 ) ) + IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO ) + $ GO TO 40 + CALL ZDRSCL( N, SCALE, WORK, 1 ) + END IF + GO TO 30 + END IF +* + SEP( KS ) = ONE / MAX( EST, SMLNUM ) + END IF +* + 40 CONTINUE + KS = KS + 1 + 50 CONTINUE + RETURN +* +* End of ZTRSNA +* + END diff --git a/dspl/liblapack/SRC/ztrsyl.f b/dspl/liblapack/SRC/ztrsyl.f new file mode 100644 index 0000000..6fd0354 --- /dev/null +++ b/dspl/liblapack/SRC/ztrsyl.f @@ -0,0 +1,454 @@ +*> \brief \b ZTRSYL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRSYL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, +* LDC, SCALE, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANA, TRANB +* INTEGER INFO, ISGN, LDA, LDB, LDC, M, N +* DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRSYL solves the complex Sylvester matrix equation: +*> +*> op(A)*X + X*op(B) = scale*C or +*> op(A)*X - X*op(B) = scale*C, +*> +*> where op(A) = A or A**H, and A and B are both upper triangular. A is +*> M-by-M and B is N-by-N; the right hand side C and the solution X are +*> M-by-N; and scale is an output scale factor, set <= 1 to avoid +*> overflow in X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANA +*> \verbatim +*> TRANA is CHARACTER*1 +*> Specifies the option op(A): +*> = 'N': op(A) = A (No transpose) +*> = 'C': op(A) = A**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] TRANB +*> \verbatim +*> TRANB is CHARACTER*1 +*> Specifies the option op(B): +*> = 'N': op(B) = B (No transpose) +*> = 'C': op(B) = B**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] ISGN +*> \verbatim +*> ISGN is INTEGER +*> Specifies the sign in the equation: +*> = +1: solve op(A)*X + X*op(B) = scale*C +*> = -1: solve op(A)*X - X*op(B) = scale*C +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The order of the matrix A, and the number of rows in the +*> matrices X and C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix B, and the number of columns in the +*> matrices X and C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,M) +*> The upper triangular matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,N) +*> The upper triangular matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N right hand side matrix C. +*> On exit, C is overwritten by the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M) +*> \endverbatim +*> +*> \param[out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> The scale factor, scale, set <= 1 to avoid overflow in X. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> = 1: A and B have common or very close eigenvalues; perturbed +*> values were used to solve the equation (but the matrices +*> A and B are unchanged). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16SYcomputational +* +* ===================================================================== + SUBROUTINE ZTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C, + $ LDC, SCALE, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANA, TRANB + INTEGER INFO, ISGN, LDA, LDB, LDC, M, N + DOUBLE PRECISION SCALE +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRNA, NOTRNB + INTEGER J, K, L + DOUBLE PRECISION BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN, + $ SMLNUM + COMPLEX*16 A11, SUML, SUMR, VEC, X11 +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, ZLANGE + COMPLEX*16 ZDOTC, ZDOTU, ZLADIV + EXTERNAL LSAME, DLAMCH, ZLANGE, ZDOTC, ZDOTU, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DLABAD, XERBLA, ZDSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN +* .. +* .. Executable Statements .. +* +* Decode and Test input parameters +* + NOTRNA = LSAME( TRANA, 'N' ) + NOTRNB = LSAME( TRANB, 'N' ) +* + INFO = 0 + IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'C' ) ) THEN + INFO = -2 + ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRSYL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + SCALE = ONE + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* +* Set constants to control overflow +* + EPS = DLAMCH( 'P' ) + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM + CALL DLABAD( SMLNUM, BIGNUM ) + SMLNUM = SMLNUM*DBLE( M*N ) / EPS + BIGNUM = ONE / SMLNUM + SMIN = MAX( SMLNUM, EPS*ZLANGE( 'M', M, M, A, LDA, DUM ), + $ EPS*ZLANGE( 'M', N, N, B, LDB, DUM ) ) + SGN = ISGN +* + IF( NOTRNA .AND. NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* M L-1 +* R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]. +* I=K+1 J=1 +* + DO 30 L = 1, N + DO 20 K = M, 1, -1 +* + SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*B( L, L ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 10 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 10 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN +* +* Solve A**H *X + ISGN*X*B = scale*C. +* +* The (K,L)th block of X is determined starting from +* upper-left corner column by column by +* +* A**H(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 L-1 +* R(K,L) = SUM [A**H(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)] +* I=1 J=1 +* + DO 60 L = 1, N + DO 50 K = 1, M +* + SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 ) + VEC = C( K, L ) - ( SUML+SGN*SUMR ) +* + SCALOC = ONE + A11 = DCONJG( A( K, K ) ) + SGN*B( L, L ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 40 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 40 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 50 CONTINUE + 60 CONTINUE +* + ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A**H*X + ISGN*X*B**H = C. +* +* The (K,L)th block of X is determined starting from +* upper-right corner column by column by +* +* A**H(K,K)*X(K,L) + ISGN*X(K,L)*B**H(L,L) = C(K,L) - R(K,L) +* +* Where +* K-1 +* R(K,L) = SUM [A**H(I,K)*X(I,L)] + +* I=1 +* N +* ISGN*SUM [X(K,J)*B**H(L,J)]. +* J=L+1 +* + DO 90 L = N, 1, -1 + DO 80 K = 1, M +* + SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 ) + SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) ) +* + SCALOC = ONE + A11 = DCONJG( A( K, K )+SGN*B( L, L ) ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 70 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 70 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN +* +* Solve A*X + ISGN*X*B**H = C. +* +* The (K,L)th block of X is determined starting from +* bottom-left corner column by column by +* +* A(K,K)*X(K,L) + ISGN*X(K,L)*B**H(L,L) = C(K,L) - R(K,L) +* +* Where +* M N +* R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B**H(L,J)] +* I=K+1 J=L+1 +* + DO 120 L = N, 1, -1 + DO 110 K = M, 1, -1 +* + SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA, + $ C( MIN( K+1, M ), L ), 1 ) + SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC, + $ B( L, MIN( L+1, N ) ), LDB ) + VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) ) +* + SCALOC = ONE + A11 = A( K, K ) + SGN*DCONJG( B( L, L ) ) + DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) ) + IF( DA11.LE.SMIN ) THEN + A11 = SMIN + DA11 = SMIN + INFO = 1 + END IF + DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) ) + IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN + IF( DB.GT.BIGNUM*DA11 ) + $ SCALOC = ONE / DB + END IF +* + X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 ) +* + IF( SCALOC.NE.ONE ) THEN + DO 100 J = 1, N + CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 ) + 100 CONTINUE + SCALE = SCALE*SCALOC + END IF + C( K, L ) = X11 +* + 110 CONTINUE + 120 CONTINUE +* + END IF +* + RETURN +* +* End of ZTRSYL +* + END diff --git a/dspl/liblapack/SRC/ztrti2.f b/dspl/liblapack/SRC/ztrti2.f new file mode 100644 index 0000000..87b8717 --- /dev/null +++ b/dspl/liblapack/SRC/ztrti2.f @@ -0,0 +1,212 @@ +*> \brief \b ZTRTI2 computes the inverse of a triangular matrix (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRTI2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRTI2 computes the inverse of a complex upper or lower triangular +*> matrix. +*> +*> This is the Level 2 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the matrix A is upper or lower triangular. +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> Specifies whether or not the matrix A is unit triangular. +*> = 'N': Non-unit triangular +*> = 'U': Unit triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading n by n upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n by n lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -k, the k-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRTI2( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J + COMPLEX*16 AJJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZSCAL, ZTRMV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTI2', -INFO ) + RETURN + END IF +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix. +* + DO 10 J = 1, N + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF +* +* Compute elements 1:j-1 of j-th column. +* + CALL ZTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA, + $ A( 1, J ), 1 ) + CALL ZSCAL( J-1, AJJ, A( 1, J ), 1 ) + 10 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix. +* + DO 20 J = N, 1, -1 + IF( NOUNIT ) THEN + A( J, J ) = ONE / A( J, J ) + AJJ = -A( J, J ) + ELSE + AJJ = -ONE + END IF + IF( J.LT.N ) THEN +* +* Compute elements j+1:n of j-th column. +* + CALL ZTRMV( 'Lower', 'No transpose', DIAG, N-J, + $ A( J+1, J+1 ), LDA, A( J+1, J ), 1 ) + CALL ZSCAL( N-J, AJJ, A( J+1, J ), 1 ) + END IF + 20 CONTINUE + END IF +* + RETURN +* +* End of ZTRTI2 +* + END diff --git a/dspl/liblapack/SRC/ztrtri.f b/dspl/liblapack/SRC/ztrtri.f new file mode 100644 index 0000000..cb5bd45 --- /dev/null +++ b/dspl/liblapack/SRC/ztrtri.f @@ -0,0 +1,243 @@ +*> \brief \b ZTRTRI +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRTRI + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRTRI computes the inverse of a complex upper or lower triangular +*> matrix A. +*> +*> This is the Level 3 BLAS version of the algorithm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. If DIAG = 'U', the +*> diagonal elements of A are also not referenced and are +*> assumed to be 1. +*> On exit, the (triangular) inverse of the original matrix, in +*> the same storage format. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, A(i,i) is exactly zero. The triangular +*> matrix is singular and its inverse can not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRTRI( UPLO, DIAG, N, A, LDA, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT, UPPER + INTEGER J, JB, NB, NN +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRMM, ZTRSM, ZTRTI2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTRI', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity if non-unit. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + INFO = 0 + END IF +* +* Determine the block size for this environment. +* + NB = ILAENV( 1, 'ZTRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 .OR. NB.GE.N ) THEN +* +* Use unblocked code +* + CALL ZTRTI2( UPLO, DIAG, N, A, LDA, INFO ) + ELSE +* +* Use blocked code +* + IF( UPPER ) THEN +* +* Compute inverse of upper triangular matrix +* + DO 20 J = 1, N, NB + JB = MIN( NB, N-J+1 ) +* +* Compute rows 1:j-1 of current block column +* + CALL ZTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1, + $ JB, ONE, A, LDA, A( 1, J ), LDA ) + CALL ZTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1, + $ JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA ) +* +* Compute inverse of current diagonal block +* + CALL ZTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO ) + 20 CONTINUE + ELSE +* +* Compute inverse of lower triangular matrix +* + NN = ( ( N-1 ) / NB )*NB + 1 + DO 30 J = NN, 1, -NB + JB = MIN( NB, N-J+1 ) + IF( J+JB.LE.N ) THEN +* +* Compute rows j+jb:n of current block column +* + CALL ZTRMM( 'Left', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA, + $ A( J+JB, J ), LDA ) + CALL ZTRSM( 'Right', 'Lower', 'No transpose', DIAG, + $ N-J-JB+1, JB, -ONE, A( J, J ), LDA, + $ A( J+JB, J ), LDA ) + END IF +* +* Compute inverse of current diagonal block +* + CALL ZTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO ) + 30 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZTRTRI +* + END diff --git a/dspl/liblapack/SRC/ztrtrs.f b/dspl/liblapack/SRC/ztrtrs.f new file mode 100644 index 0000000..a5c36bc --- /dev/null +++ b/dspl/liblapack/SRC/ztrtrs.f @@ -0,0 +1,227 @@ +*> \brief \b ZTRTRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRTRS + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER DIAG, TRANS, UPLO +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRTRS solves a triangular system of the form +*> +*> A * X = B, A**T * X = B, or A**H * X = B, +*> +*> where A is a triangular matrix of order N, and B is an N-by-NRHS +*> matrix. A check is made to verify that A is nonsingular. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T * X = B (Transpose) +*> = 'C': A**H * X = B (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> = 'N': A is non-unit triangular; +*> = 'U': A is unit triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The triangular matrix A. If UPLO = 'U', the leading N-by-N +*> upper triangular part of the array A contains the upper +*> triangular matrix, and the strictly lower triangular part of +*> A is not referenced. If UPLO = 'L', the leading N-by-N lower +*> triangular part of the array A contains the lower triangular +*> matrix, and the strictly upper triangular part of A is not +*> referenced. If DIAG = 'U', the diagonal elements of A are +*> also not referenced and are assumed to be 1. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is COMPLEX*16 array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, if INFO = 0, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the i-th diagonal element of A is zero, +*> indicating that the matrix is singular and the solutions +*> X have not been computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER DIAG, TRANS, UPLO + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZTRSM +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOUNIT = LSAME( DIAG, 'N' ) + IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT. + $ LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -7 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Check for singularity. +* + IF( NOUNIT ) THEN + DO 10 INFO = 1, N + IF( A( INFO, INFO ).EQ.ZERO ) + $ RETURN + 10 CONTINUE + END IF + INFO = 0 +* +* Solve A * x = b, A**T * x = b, or A**H * x = b. +* + CALL ZTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B, + $ LDB ) +* + RETURN +* +* End of ZTRTRS +* + END diff --git a/dspl/liblapack/SRC/ztrttf.f b/dspl/liblapack/SRC/ztrttf.f new file mode 100644 index 0000000..f18312b --- /dev/null +++ b/dspl/liblapack/SRC/ztrttf.f @@ -0,0 +1,537 @@ +*> \brief \b ZTRTTF copies a triangular matrix from the standard full format (TR) to the rectangular full packed format (TF). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRTTF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANSR, UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRTTF copies a triangular matrix A from standard full format (TR) +*> to rectangular full packed format (TF) . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSR +*> \verbatim +*> TRANSR is CHARACTER*1 +*> = 'N': ARF in Normal mode is wanted; +*> = 'C': ARF in Conjugate Transpose mode is wanted; +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension ( LDA, N ) +*> On entry, the triangular matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of the array A contains +*> the upper triangular matrix, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of the array A contains +*> the lower triangular matrix, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the matrix A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] ARF +*> \verbatim +*> ARF is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A stored in +*> RFP format. For a further discussion see Notes below. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> We first consider Standard Packed Format when N is even. +*> We give an example where N = 6. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 05 00 +*> 11 12 13 14 15 10 11 +*> 22 23 24 25 20 21 22 +*> 33 34 35 30 31 32 33 +*> 44 45 40 41 42 43 44 +*> 55 50 51 52 53 54 55 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(4:6,0:2) consists of +*> conjugate-transpose of the first three columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:2,0:2) consists of +*> conjugate-transpose of the last three columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N even and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- -- +*> 03 04 05 33 43 53 +*> -- -- +*> 13 14 15 00 44 54 +*> -- +*> 23 24 25 10 11 55 +*> +*> 33 34 35 20 21 22 +*> -- +*> 00 44 45 30 31 32 +*> -- -- +*> 01 11 55 40 41 42 +*> -- -- -- +*> 02 12 22 50 51 52 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 00 01 02 33 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 11 12 43 44 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- -- +*> 05 15 25 35 45 55 22 53 54 55 22 32 42 52 +*> +*> +*> We next consider Standard Packed Format when N is odd. +*> We give an example where N = 5. +*> +*> AP is Upper AP is Lower +*> +*> 00 01 02 03 04 00 +*> 11 12 13 14 10 11 +*> 22 23 24 20 21 22 +*> 33 34 30 31 32 33 +*> 44 40 41 42 43 44 +*> +*> +*> Let TRANSR = 'N'. RFP holds AP as follows: +*> For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last +*> three columns of AP upper. The lower triangle A(3:4,0:1) consists of +*> conjugate-transpose of the first two columns of AP upper. +*> For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first +*> three columns of AP lower. The upper triangle A(0:1,1:2) consists of +*> conjugate-transpose of the last two columns of AP lower. +*> To denote conjugate we place -- above the element. This covers the +*> case N odd and TRANSR = 'N'. +*> +*> RFP A RFP A +*> +*> -- -- +*> 02 03 04 00 33 43 +*> -- +*> 12 13 14 10 11 44 +*> +*> 22 23 24 20 21 22 +*> -- +*> 00 33 34 30 31 32 +*> -- -- +*> 01 11 44 40 41 42 +*> +*> Now let TRANSR = 'C'. RFP A in both UPLO cases is just the conjugate- +*> transpose of RFP A above. One therefore gets: +*> +*> +*> RFP A RFP A +*> +*> -- -- -- -- -- -- -- -- -- +*> 02 12 22 00 01 00 10 20 30 40 50 +*> -- -- -- -- -- -- -- -- -- +*> 03 13 23 33 11 33 11 21 31 41 51 +*> -- -- -- -- -- -- -- -- -- +*> 04 14 24 34 44 43 44 22 32 42 52 +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTRTTF( TRANSR, UPLO, N, A, LDA, ARF, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER TRANSR, UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( 0: LDA-1, 0: * ), ARF( 0: * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER, NISODD, NORMALTRANSR + INTEGER I, IJ, J, K, L, N1, N2, NT, NX2, NP1X2 +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX, MOD +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NORMALTRANSR = LSAME( TRANSR, 'N' ) + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.NORMALTRANSR .AND. .NOT.LSAME( TRANSR, 'C' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTTF', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.1 ) THEN + IF( N.EQ.1 ) THEN + IF( NORMALTRANSR ) THEN + ARF( 0 ) = A( 0, 0 ) + ELSE + ARF( 0 ) = DCONJG( A( 0, 0 ) ) + END IF + END IF + RETURN + END IF +* +* Size of array ARF(1:2,0:nt-1) +* + NT = N*( N+1 ) / 2 +* +* set N1 and N2 depending on LOWER: for N even N1=N2=K +* + IF( LOWER ) THEN + N2 = N / 2 + N1 = N - N2 + ELSE + N1 = N / 2 + N2 = N - N1 + END IF +* +* If N is odd, set NISODD = .TRUE., LDA=N+1 and A is (N+1)--by--K2. +* If N is even, set K = N/2 and NISODD = .FALSE., LDA=N and A is +* N--by--(N+1)/2. +* + IF( MOD( N, 2 ).EQ.0 ) THEN + K = N / 2 + NISODD = .FALSE. + IF( .NOT.LOWER ) + $ NP1X2 = N + N + 2 + ELSE + NISODD = .TRUE. + IF( .NOT.LOWER ) + $ NX2 = N + N + END IF +* + IF( NISODD ) THEN +* +* N is odd +* + IF( NORMALTRANSR ) THEN +* +* N is odd and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) +* T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) +* T1 -> a(0), T2 -> a(n), S -> a(n1); lda=n +* + IJ = 0 + DO J = 0, N2 + DO I = N1, N2 + J + ARF( IJ ) = DCONJG( A( N2+J, I ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) +* T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) +* T1 -> a(n2), T2 -> a(n1), S -> a(0); lda=n +* + IJ = NT - N + DO J = N - 1, N1, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - N1, N1 - 1 + ARF( IJ ) = DCONJG( A( J-N1, L ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NX2 + END DO +* + END IF +* + ELSE +* +* N is odd and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is odd +* T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) +* T1 -> A(0+0) , T2 -> A(1+0) , S -> A(0+n1*n1); lda=n1 +* + IJ = 0 + DO J = 0, N2 - 1 + DO I = 0, J + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + DO I = N1 + J, N - 1 + ARF( IJ ) = A( I, N1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = N2, N - 1 + DO I = 0, N1 - 1 + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is odd +* T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) +* T1 -> A(n2*n2), T2 -> A(n1*n2), S -> A(0); lda=n2 +* + IJ = 0 + DO J = 0, N1 + DO I = N1, N - 1 + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, N1 - 1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = N2 + J, N - 1 + ARF( IJ ) = DCONJG( A( N2+J, L ) ) + IJ = IJ + 1 + END DO + END DO +* + END IF +* + END IF +* + ELSE +* +* N is even +* + IF( NORMALTRANSR ) THEN +* +* N is even and TRANSR = 'N' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) +* T1 -> a(1), T2 -> a(0), S -> a(k+1); lda=n+1 +* + IJ = 0 + DO J = 0, K - 1 + DO I = K, K + J + ARF( IJ ) = DCONJG( A( K+J, I ) ) + IJ = IJ + 1 + END DO + DO I = J, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) +* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) +* T1 -> a(k+1), T2 -> a(k), S -> a(0); lda=n+1 +* + IJ = NT - N - 1 + DO J = N - 1, K, -1 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = J - K, K - 1 + ARF( IJ ) = DCONJG( A( J-K, L ) ) + IJ = IJ + 1 + END DO + IJ = IJ - NP1X2 + END DO +* + END IF +* + ELSE +* +* N is even and TRANSR = 'C' +* + IF( LOWER ) THEN +* +* SRPA for LOWER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,1) , T2 -> A(0,0) , S -> A(0,k+1) : +* T1 -> A(0+k) , T2 -> A(0+0) , S -> A(0+k*(k+1)); lda=k +* + IJ = 0 + J = K + DO I = K, N - 1 + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + DO I = K + 1 + J, N - 1 + ARF( IJ ) = A( I, K+1+J ) + IJ = IJ + 1 + END DO + END DO + DO J = K - 1, N - 1 + DO I = 0, K - 1 + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO +* + ELSE +* +* SRPA for UPPER, TRANSPOSE and N is even (see paper, A=B) +* T1 -> A(0,k+1) , T2 -> A(0,k) , S -> A(0,0) +* T1 -> A(0+k*(k+1)) , T2 -> A(0+k*k) , S -> A(0+0)); lda=k +* + IJ = 0 + DO J = 0, K + DO I = K, N - 1 + ARF( IJ ) = DCONJG( A( J, I ) ) + IJ = IJ + 1 + END DO + END DO + DO J = 0, K - 2 + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO + DO L = K + 1 + J, N - 1 + ARF( IJ ) = DCONJG( A( K+1+J, L ) ) + IJ = IJ + 1 + END DO + END DO +* +* Note that here J = K-1 +* + DO I = 0, J + ARF( IJ ) = A( I, J ) + IJ = IJ + 1 + END DO +* + END IF +* + END IF +* + END IF +* + RETURN +* +* End of ZTRTTF +* + END diff --git a/dspl/liblapack/SRC/ztrttp.f b/dspl/liblapack/SRC/ztrttp.f new file mode 100644 index 0000000..52663b5 --- /dev/null +++ b/dspl/liblapack/SRC/ztrttp.f @@ -0,0 +1,176 @@ +*> \brief \b ZTRTTP copies a triangular matrix from the standard full format (TR) to the standard packed format (TP). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTRTTP + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), AP( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRTTP copies a triangular matrix A from full format (TR) to standard +*> packed format (TP). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': A is upper triangular; +*> = 'L': A is lower triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices AP and A. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the triangular matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension ( N*(N+1)/2 ), +*> On exit, the upper or lower triangular matrix A, packed +*> columnwise in a linear array. The j-th column of A is stored +*> in the array AP as follows: +*> if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; +*> if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZTRTTP( UPLO, N, A, LDA, AP, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), AP( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. +* .. +* .. Local Scalars .. + LOGICAL LOWER + INTEGER I, J, K +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + LOWER = LSAME( UPLO, 'L' ) + IF( .NOT.LOWER .AND. .NOT.LSAME( UPLO, 'U' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTRTTP', -INFO ) + RETURN + END IF +* + IF( LOWER ) THEN + K = 0 + DO J = 1, N + DO I = J, N + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + ELSE + K = 0 + DO J = 1, N + DO I = 1, J + K = K + 1 + AP( K ) = A( I, J ) + END DO + END DO + END IF +* +* + RETURN +* +* End of ZTRTTP +* + END diff --git a/dspl/liblapack/SRC/ztzrzf.f b/dspl/liblapack/SRC/ztzrzf.f new file mode 100644 index 0000000..d806dc0 --- /dev/null +++ b/dspl/liblapack/SRC/ztzrzf.f @@ -0,0 +1,313 @@ +*> \brief \b ZTZRZF +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZTZRZF + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A +*> to upper triangular form by means of unitary transformations. +*> +*> The upper trapezoidal matrix A is factored as +*> +*> A = ( R 0 ) * Z, +*> +*> where Z is an N-by-N unitary matrix and R is an M-by-M upper +*> triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= M. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the leading M-by-N upper trapezoidal part of the +*> array A must contain the matrix to be factorized. +*> On exit, the leading M-by-M upper triangular part of A +*> contains the upper triangular matrix R, and elements M+1 to +*> N of the first M rows of A, with the array TAU, represent the +*> unitary matrix Z as a product of M elementary reflectors. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (M) +*> The scalar factors of the elementary reflectors. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The N-by-N matrix Z can be computed by +*> +*> Z = Z(1)*Z(2)* ... *Z(M) +*> +*> where each N-by-N Z(k) is given by +*> +*> Z(k) = I - tau(k)*v(k)*v(k)**H +*> +*> with v(k) is the kth row vector of the M-by-N matrix +*> +*> V = ( I A(:,M+1:N) ) +*> +*> I is the M-by-M identity matrix, A(:,M+1:N) +*> is the output stored in A on exit from DTZRZF, +*> and tau(k) is the kth element of the array TAU. +*> +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IWS, KI, KK, LDWORK, LWKMIN, LWKOPT, + $ M1, MU, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZB, ZLARZT, ZLATRZ +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -4 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.EQ.0 .OR. M.EQ.N ) THEN + LWKOPT = 1 + LWKMIN = 1 + ELSE +* +* Determine the block size. +* + NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 ) + LWKOPT = M*NB + LWKMIN = MAX( 1, M ) + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZTZRZF', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 ) THEN + RETURN + ELSE IF( M.EQ.N ) THEN + DO 10 I = 1, N + TAU( I ) = ZERO + 10 CONTINUE + RETURN + END IF +* + NBMIN = 2 + NX = 1 + IWS = M + IF( NB.GT.1 .AND. NB.LT.M ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) ) + IF( NX.LT.M ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1, + $ -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN +* +* Use blocked code initially. +* The last kk rows are handled by the block method. +* + M1 = MIN( M+1, N ) + KI = ( ( M-NX-1 ) / NB )*NB + KK = MIN( M, KI+NB ) +* + DO 20 I = M - KK + KI + 1, M - KK + 1, -NB + IB = MIN( M-I+1, NB ) +* +* Compute the TZ factorization of the current block +* A(i:i+ib-1,i:n) +* + CALL ZLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ), + $ WORK ) + IF( I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:i-1,i:n) from the right +* + CALL ZLARZB( 'Right', 'No transpose', 'Backward', + $ 'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ), + $ LDA, WORK, LDWORK, A( 1, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF + 20 CONTINUE + MU = I + NB - 1 + ELSE + MU = M + END IF +* +* Use unblocked code to factor the last or only block +* + IF( MU.GT.0 ) + $ CALL ZLATRZ( MU, N, N-M, A, LDA, TAU, WORK ) +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZTZRZF +* + END diff --git a/dspl/liblapack/SRC/zunbdb.f b/dspl/liblapack/SRC/zunbdb.f new file mode 100644 index 0000000..d06dacd --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb.f @@ -0,0 +1,689 @@ +*> \brief \b ZUNBDB +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, +* X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, +* TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIGNS, TRANS +* INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, +* $ Q +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI( * ), THETA( * ) +* COMPLEX*16 TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), +* $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), +* $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNBDB simultaneously bidiagonalizes the blocks of an M-by-M +*> partitioned unitary matrix X: +*> +*> [ B11 | B12 0 0 ] +*> [ X11 | X12 ] [ P1 | ] [ 0 | 0 -I 0 ] [ Q1 | ]**H +*> X = [-----------] = [---------] [----------------] [---------] . +*> [ X21 | X22 ] [ | P2 ] [ B21 | B22 0 0 ] [ | Q2 ] +*> [ 0 | 0 0 I ] +*> +*> X11 is P-by-Q. Q must be no larger than P, M-P, or M-Q. (If this is +*> not the case, then X must be transposed and/or permuted. This can be +*> done in constant time using the TRANS and SIGNS options. See ZUNCSD +*> for details.) +*> +*> The unitary matrices P1, P2, Q1, and Q2 are P-by-P, (M-P)-by- +*> (M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. They are +*> represented implicitly by Householder vectors. +*> +*> B11, B12, B21, and B22 are Q-by-Q bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, the top-left block of the unitary matrix to be +*> reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X11) specify reflectors for P1, +*> the rows of triu(X11,1) specify reflectors for Q1; +*> else TRANS = 'T', and +*> the rows of triu(X11) specify reflectors for P1, +*> the columns of tril(X11,-1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. If TRANS = 'N', then LDX11 >= +*> P; else LDX11 >= Q. +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is COMPLEX*16 array, dimension (LDX12,M-Q) +*> On entry, the top-right block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X12) specify the first P reflectors for +*> Q2; +*> else TRANS = 'T', and +*> the columns of tril(X12) specify the first P reflectors +*> for Q2. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. If TRANS = 'N', then LDX12 >= +*> P; else LDX11 >= M-Q. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, the bottom-left block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the columns of tril(X21) specify reflectors for P2; +*> else TRANS = 'T', and +*> the rows of triu(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. If TRANS = 'N', then LDX21 >= +*> M-P; else LDX21 >= Q. +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is COMPLEX*16 array, dimension (LDX22,M-Q) +*> On entry, the bottom-right block of the unitary matrix to +*> be reduced. On exit, the form depends on TRANS: +*> If TRANS = 'N', then +*> the rows of triu(X22(Q+1:M-P,P+1:M-Q)) specify the last +*> M-P-Q reflectors for Q2, +*> else TRANS = 'T', and +*> the columns of tril(X22(P+1:M-Q,Q+1:M-P)) specify the last +*> M-P-Q reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X22. If TRANS = 'N', then LDX22 >= +*> M-P; else LDX22 >= M-Q. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B12, B21, B22 can +*> be computed from the angles THETA and PHI. See Further +*> Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX*16 array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX*16 array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX*16 array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] TAUQ2 +*> \verbatim +*> TAUQ2 is COMPLEX*16 array, dimension (M-Q) +*> The scalar factors of the elementary reflectors that define +*> Q2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The bidiagonal blocks B11, B12, B21, and B22 are represented +*> implicitly by angles THETA(1), ..., THETA(Q) and PHI(1), ..., +*> PHI(Q-1). B11 and B21 are upper bidiagonal, while B21 and B22 are +*> lower bidiagonal. Every entry in each bidiagonal band is a product +*> of a sine or cosine of a THETA with a sine or cosine of a PHI. See +*> [1] or ZUNCSD for details. +*> +*> P1, P2, Q1, and Q2 are represented as products of elementary +*> reflectors. See ZUNCSD for details on generating P1, P2, Q1, and Q2 +*> using ZUNGQR and ZUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, PHI, TAUP1, + $ TAUP2, TAUQ1, TAUQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIGNS, TRANS + INTEGER INFO, LDX11, LDX12, LDX21, LDX22, LWORK, M, P, + $ Q +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI( * ), THETA( * ) + COMPLEX*16 TAUP1( * ), TAUP2( * ), TAUQ1( * ), TAUQ2( * ), + $ WORK( * ), X11( LDX11, * ), X12( LDX12, * ), + $ X21( LDX21, * ), X22( LDX22, * ) +* .. +* +* ==================================================================== +* +* .. Parameters .. + DOUBLE PRECISION REALONE + PARAMETER ( REALONE = 1.0D0 ) + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + LOGICAL COLMAJOR, LQUERY + INTEGER I, LWORKMIN, LWORKOPT + DOUBLE PRECISION Z1, Z2, Z3, Z4 +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZLARF, ZLARFGP, ZSCAL, XERBLA + EXTERNAL ZLACGV +* +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + LOGICAL LSAME + EXTERNAL DZNRM2, LSAME +* .. +* .. Intrinsic Functions + INTRINSIC ATAN2, COS, MAX, MIN, SIN + INTRINSIC DCMPLX, DCONJG +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + IF( .NOT. LSAME( SIGNS, 'O' ) ) THEN + Z1 = REALONE + Z2 = REALONE + Z3 = REALONE + Z4 = REALONE + ELSE + Z1 = REALONE + Z2 = -REALONE + Z3 = REALONE + Z4 = -REALONE + END IF + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -3 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -4 + ELSE IF( Q .LT. 0 .OR. Q .GT. P .OR. Q .GT. M-P .OR. + $ Q .GT. M-Q ) THEN + INFO = -5 + ELSE IF( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -7 + ELSE IF( .NOT.COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -7 + ELSE IF( COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -9 + ELSE IF( .NOT.COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -9 + ELSE IF( COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -11 + ELSE IF( .NOT.COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF( COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -13 + ELSE IF( .NOT.COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + LWORKOPT = M - Q + LWORKMIN = M - Q + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN + INFO = -21 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'xORBDB', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Handle column-major and row-major separately +* + IF( COLMAJOR ) THEN +* +* Reduce columns 1, ..., Q of X11, X12, X21, and X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL ZSCAL( P-I+1, DCMPLX( Z1, 0.0D0 ), X11(I,I), 1 ) + ELSE + CALL ZSCAL( P-I+1, DCMPLX( Z1*COS(PHI(I-1)), 0.0D0 ), + $ X11(I,I), 1 ) + CALL ZAXPY( P-I+1, DCMPLX( -Z1*Z3*Z4*SIN(PHI(I-1)), + $ 0.0D0 ), X12(I,I-1), 1, X11(I,I), 1 ) + END IF + IF( I .EQ. 1 ) THEN + CALL ZSCAL( M-P-I+1, DCMPLX( Z2, 0.0D0 ), X21(I,I), 1 ) + ELSE + CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), 0.0D0 ), + $ X21(I,I), 1 ) + CALL ZAXPY( M-P-I+1, DCMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), + $ 0.0D0 ), X22(I,I-1), 1, X21(I,I), 1 ) + END IF +* + THETA(I) = ATAN2( DZNRM2( M-P-I+1, X21(I,I), 1 ), + $ DZNRM2( P-I+1, X11(I,I), 1 ) ) +* + IF( P .GT. I ) THEN + CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + ELSE IF ( P .EQ. I ) THEN + CALL ZLARFGP( P-I+1, X11(I,I), X11(I,I), 1, TAUP1(I) ) + END IF + X11(I,I) = ONE + IF ( M-P .GT. I ) THEN + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, + $ TAUP2(I) ) + ELSE IF ( M-P .EQ. I ) THEN + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I), 1, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + IF ( Q .GT. I ) THEN + CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, + $ DCONJG(TAUP1(I)), X11(I,I+1), LDX11, WORK ) + CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, + $ DCONJG(TAUP2(I)), X21(I,I+1), LDX21, WORK ) + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL ZLARF( 'L', P-I+1, M-Q-I+1, X11(I,I), 1, + $ DCONJG(TAUP1(I)), X12(I,I), LDX12, WORK ) + CALL ZLARF( 'L', M-P-I+1, M-Q-I+1, X21(I,I), 1, + $ DCONJG(TAUP2(I)), X22(I,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) THEN + CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), 0.0D0 ), + $ X11(I,I+1), LDX11 ) + CALL ZAXPY( Q-I, DCMPLX( Z2*Z3*COS(THETA(I)), 0.0D0 ), + $ X21(I,I+1), LDX21, X11(I,I+1), LDX11 ) + END IF + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), 0.0D0 ), + $ X12(I,I), LDX12 ) + CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), 0.0D0 ), + $ X22(I,I), LDX22, X12(I,I), LDX12 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( DZNRM2( Q-I, X11(I,I+1), LDX11 ), + $ DZNRM2( M-Q-I+1, X12(I,I), LDX12 ) ) +* + IF( I .LT. Q ) THEN + CALL ZLACGV( Q-I, X11(I,I+1), LDX11 ) + IF ( I .EQ. Q-1 ) THEN + CALL ZLARFGP( Q-I, X11(I,I+1), X11(I,I+1), LDX11, + $ TAUQ1(I) ) + ELSE + CALL ZLARFGP( Q-I, X11(I,I+1), X11(I,I+2), LDX11, + $ TAUQ1(I) ) + END IF + X11(I,I+1) = ONE + END IF + IF ( M-Q+1 .GT. I ) THEN + CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) + IF ( M-Q .EQ. I ) THEN + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + END IF + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL ZLARF( 'R', P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK ) + CALL ZLARF( 'R', M-P-I, Q-I, X11(I,I+1), LDX11, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK ) + END IF + IF ( P .GT. I ) THEN + CALL ZLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF ( M-P .GT. I ) THEN + CALL ZLARF( 'R', M-P-I, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(I+1,I), LDX22, WORK ) + END IF +* + IF( I .LT. Q ) + $ CALL ZLACGV( Q-I, X11(I,I+1), LDX11 ) + CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4, 0.0D0 ), X12(I,I), + $ LDX12 ) + CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) + IF ( I .GE. M-Q ) THEN + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I,I), LDX12, + $ TAUQ2(I) ) + ELSE + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I,I+1), LDX12, + $ TAUQ2(I) ) + END IF + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL ZLARF( 'R', P-I, M-Q-I+1, X12(I,I), LDX12, TAUQ2(I), + $ X12(I+1,I), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL ZLARF( 'R', M-P-Q, M-Q-I+1, X12(I,I), LDX12, + $ TAUQ2(I), X22(Q+1,I), LDX22, WORK ) +* + CALL ZLACGV( M-Q-I+1, X12(I,I), LDX12 ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL ZSCAL( M-P-Q-I+1, DCMPLX( Z2*Z4, 0.0D0 ), + $ X22(Q+I,P+I), LDX22 ) + CALL ZLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) + CALL ZLARFGP( M-P-Q-I+1, X22(Q+I,P+I), X22(Q+I,P+I+1), + $ LDX22, TAUQ2(P+I) ) + X22(Q+I,P+I) = ONE + CALL ZLARF( 'R', M-P-Q-I, M-P-Q-I+1, X22(Q+I,P+I), LDX22, + $ TAUQ2(P+I), X22(Q+I+1,P+I), LDX22, WORK ) +* + CALL ZLACGV( M-P-Q-I+1, X22(Q+I,P+I), LDX22 ) +* + END DO +* + ELSE +* +* Reduce columns 1, ..., Q of X11, X12, X21, X22 +* + DO I = 1, Q +* + IF( I .EQ. 1 ) THEN + CALL ZSCAL( P-I+1, DCMPLX( Z1, 0.0D0 ), X11(I,I), + $ LDX11 ) + ELSE + CALL ZSCAL( P-I+1, DCMPLX( Z1*COS(PHI(I-1)), 0.0D0 ), + $ X11(I,I), LDX11 ) + CALL ZAXPY( P-I+1, DCMPLX( -Z1*Z3*Z4*SIN(PHI(I-1)), + $ 0.0D0 ), X12(I-1,I), LDX12, X11(I,I), LDX11 ) + END IF + IF( I .EQ. 1 ) THEN + CALL ZSCAL( M-P-I+1, DCMPLX( Z2, 0.0D0 ), X21(I,I), + $ LDX21 ) + ELSE + CALL ZSCAL( M-P-I+1, DCMPLX( Z2*COS(PHI(I-1)), 0.0D0 ), + $ X21(I,I), LDX21 ) + CALL ZAXPY( M-P-I+1, DCMPLX( -Z2*Z3*Z4*SIN(PHI(I-1)), + $ 0.0D0 ), X22(I-1,I), LDX22, X21(I,I), LDX21 ) + END IF +* + THETA(I) = ATAN2( DZNRM2( M-P-I+1, X21(I,I), LDX21 ), + $ DZNRM2( P-I+1, X11(I,I), LDX11 ) ) +* + CALL ZLACGV( P-I+1, X11(I,I), LDX11 ) + CALL ZLACGV( M-P-I+1, X21(I,I), LDX21 ) +* + CALL ZLARFGP( P-I+1, X11(I,I), X11(I,I+1), LDX11, TAUP1(I) ) + X11(I,I) = ONE + IF ( I .EQ. M-P ) THEN + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I), LDX21, + $ TAUP2(I) ) + ELSE + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I,I+1), LDX21, + $ TAUP2(I) ) + END IF + X21(I,I) = ONE +* + CALL ZLARF( 'R', Q-I, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X11(I+1,I), LDX11, WORK ) + CALL ZLARF( 'R', M-Q-I+1, P-I+1, X11(I,I), LDX11, TAUP1(I), + $ X12(I,I), LDX12, WORK ) + CALL ZLARF( 'R', Q-I, M-P-I+1, X21(I,I), LDX21, TAUP2(I), + $ X21(I+1,I), LDX21, WORK ) + CALL ZLARF( 'R', M-Q-I+1, M-P-I+1, X21(I,I), LDX21, + $ TAUP2(I), X22(I,I), LDX22, WORK ) +* + CALL ZLACGV( P-I+1, X11(I,I), LDX11 ) + CALL ZLACGV( M-P-I+1, X21(I,I), LDX21 ) +* + IF( I .LT. Q ) THEN + CALL ZSCAL( Q-I, DCMPLX( -Z1*Z3*SIN(THETA(I)), 0.0D0 ), + $ X11(I+1,I), 1 ) + CALL ZAXPY( Q-I, DCMPLX( Z2*Z3*COS(THETA(I)), 0.0D0 ), + $ X21(I+1,I), 1, X11(I+1,I), 1 ) + END IF + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4*SIN(THETA(I)), 0.0D0 ), + $ X12(I,I), 1 ) + CALL ZAXPY( M-Q-I+1, DCMPLX( Z2*Z4*COS(THETA(I)), 0.0D0 ), + $ X22(I,I), 1, X12(I,I), 1 ) +* + IF( I .LT. Q ) + $ PHI(I) = ATAN2( DZNRM2( Q-I, X11(I+1,I), 1 ), + $ DZNRM2( M-Q-I+1, X12(I,I), 1 ) ) +* + IF( I .LT. Q ) THEN + CALL ZLARFGP( Q-I, X11(I+1,I), X11(I+2,I), 1, TAUQ1(I) ) + X11(I+1,I) = ONE + END IF + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF( I .LT. Q ) THEN + CALL ZLARF( 'L', Q-I, P-I, X11(I+1,I), 1, + $ DCONJG(TAUQ1(I)), X11(I+1,I+1), LDX11, WORK ) + CALL ZLARF( 'L', Q-I, M-P-I, X11(I+1,I), 1, + $ DCONJG(TAUQ1(I)), X21(I+1,I+1), LDX21, WORK ) + END IF + CALL ZLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ DCONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + IF ( M-P .GT. I ) THEN + CALL ZLARF( 'L', M-Q-I+1, M-P-I, X12(I,I), 1, + $ DCONJG(TAUQ2(I)), X22(I,I+1), LDX22, WORK ) + END IF +* + END DO +* +* Reduce columns Q + 1, ..., P of X12, X22 +* + DO I = Q + 1, P +* + CALL ZSCAL( M-Q-I+1, DCMPLX( -Z1*Z4, 0.0D0 ), X12(I,I), 1 ) + CALL ZLARFGP( M-Q-I+1, X12(I,I), X12(I+1,I), 1, TAUQ2(I) ) + X12(I,I) = ONE +* + IF ( P .GT. I ) THEN + CALL ZLARF( 'L', M-Q-I+1, P-I, X12(I,I), 1, + $ DCONJG(TAUQ2(I)), X12(I,I+1), LDX12, WORK ) + END IF + IF( M-P-Q .GE. 1 ) + $ CALL ZLARF( 'L', M-Q-I+1, M-P-Q, X12(I,I), 1, + $ DCONJG(TAUQ2(I)), X22(I,Q+1), LDX22, WORK ) +* + END DO +* +* Reduce columns P + 1, ..., M - Q of X12, X22 +* + DO I = 1, M - P - Q +* + CALL ZSCAL( M-P-Q-I+1, DCMPLX( Z2*Z4, 0.0D0 ), + $ X22(P+I,Q+I), 1 ) + CALL ZLARFGP( M-P-Q-I+1, X22(P+I,Q+I), X22(P+I+1,Q+I), 1, + $ TAUQ2(P+I) ) + X22(P+I,Q+I) = ONE +* + IF ( M-P-Q .NE. I ) THEN + CALL ZLARF( 'L', M-P-Q-I+1, M-P-Q-I, X22(P+I,Q+I), 1, + $ DCONJG(TAUQ2(P+I)), X22(P+I,Q+I+1), LDX22, + $ WORK ) + END IF +* + END DO +* + END IF +* + RETURN +* +* End of ZUNBDB +* + END + diff --git a/dspl/liblapack/SRC/zunbdb1.f b/dspl/liblapack/SRC/zunbdb1.f new file mode 100644 index 0000000..039d160 --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb1.f @@ -0,0 +1,327 @@ +*> \brief \b ZUNBDB1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB1 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. Q must be no larger than P, +*> M-P, or M-Q. Routines ZUNBDB2, ZUNBDB3, and ZUNBDB4 handle cases in +*> which Q is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are Q-by-Q bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= +*> MIN(P,M-P,M-Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX*16 array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX*16 array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX*16 array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +*> +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or ZUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See ZUNCSD2BY1 for details on generating P1, P2, and Q1 using ZUNGQR +*> and ZUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, XERBLA + EXTERNAL ZLACGV +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + EXTERNAL DZNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. Q .OR. M-P .LT. Q ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. M-Q .LT. Q ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-2 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., Q of X11 and X21 +* + DO I = 1, Q +* + CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + THETA(I) = ATAN2( DBLE( X21(I,I) ), DBLE( X11(I,I) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I) = ONE + X21(I,I) = ONE + CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + IF( I .LT. Q ) THEN + CALL ZDROT( Q-I, X11(I,I+1), LDX11, X21(I,I+1), LDX21, C, + $ S ) + CALL ZLACGV( Q-I, X21(I,I+1), LDX21 ) + CALL ZLARFGP( Q-I, X21(I,I+1), X21(I,I+2), LDX21, TAUQ1(I) ) + S = DBLE( X21(I,I+1) ) + X21(I,I+1) = ONE + CALL ZLARF( 'R', P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + CALL ZLARF( 'R', M-P-I, Q-I, X21(I,I+1), LDX21, TAUQ1(I), + $ X21(I+1,I+1), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I, X21(I,I+1), LDX21 ) + C = SQRT( DZNRM2( P-I, X11(I+1,I+1), 1 )**2 + $ + DZNRM2( M-P-I, X21(I+1,I+1), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + CALL ZUNBDB5( P-I, M-P-I, Q-I-1, X11(I+1,I+1), 1, + $ X21(I+1,I+1), 1, X11(I+1,I+2), LDX11, + $ X21(I+1,I+2), LDX21, WORK(IORBDB5), LORBDB5, + $ CHILDINFO ) + END IF +* + END DO +* + RETURN +* +* End of ZUNBDB1 +* + END + diff --git a/dspl/liblapack/SRC/zunbdb2.f b/dspl/liblapack/SRC/zunbdb2.f new file mode 100644 index 0000000..8bfca46 --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb2.f @@ -0,0 +1,337 @@ +*> \brief \b ZUNBDB2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB2 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. P must be no larger than M-P, +*> Q, or M-Q. Routines ZUNBDB1, ZUNBDB3, and ZUNBDB4 handle cases in +*> which P is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are P-by-P bidiagonal matrices represented implicitly by +*> angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= min(M-P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX*16 array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX*16 array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX*16 array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or ZUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See ZUNCSD2BY1 for details on generating P1, P2, and Q1 using ZUNGQR +*> and ZUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 NEGONE, ONE + PARAMETER ( NEGONE = (-1.0D0,0.0D0), + $ ONE = (1.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + EXTERNAL DZNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. 0 .OR. P .GT. M-P ) THEN + INFO = -2 + ELSE IF( Q .LT. 0 .OR. Q .LT. P .OR. M-Q .LT. P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P-1, M-P, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB2', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., P of X11 and X21 +* + DO I = 1, P +* + IF( I .GT. 1 ) THEN + CALL ZDROT( Q-I+1, X11(I,I), LDX11, X21(I-1,I), LDX21, C, + $ S ) + END IF + CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) + CALL ZLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + C = DBLE( X11(I,I) ) + X11(I,I) = ONE + CALL ZLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF( 'R', M-P-I+1, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(I,I), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) + S = SQRT( DZNRM2( P-I, X11(I+1,I), 1 )**2 + $ + DZNRM2( M-P-I+1, X21(I,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL ZUNBDB5( P-I, M-P-I+1, Q-I, X11(I+1,I), 1, X21(I,I), 1, + $ X11(I+1,I+1), LDX11, X21(I,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL ZSCAL( P-I, NEGONE, X11(I+1,I), 1 ) + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + IF( I .LT. P ) THEN + CALL ZLARFGP( P-I, X11(I+1,I), X11(I+2,I), 1, TAUP1(I) ) + PHI(I) = ATAN2( DBLE( X11(I+1,I) ), DBLE( X21(I,I) ) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X11(I+1,I) = ONE + CALL ZLARF( 'L', P-I, Q-I, X11(I+1,I), 1, DCONJG(TAUP1(I)), + $ X11(I+1,I+1), LDX11, WORK(ILARF) ) + END IF + X21(I,I) = ONE + CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X21 to the identity matrix +* + DO I = P + 1, Q + CALL ZLARFGP( M-P-I+1, X21(I,I), X21(I+1,I), 1, TAUP2(I) ) + X21(I,I) = ONE + CALL ZLARF( 'L', M-P-I+1, Q-I, X21(I,I), 1, DCONJG(TAUP2(I)), + $ X21(I,I+1), LDX21, WORK(ILARF) ) + END DO +* + RETURN +* +* End of ZUNBDB2 +* + END + diff --git a/dspl/liblapack/SRC/zunbdb3.f b/dspl/liblapack/SRC/zunbdb3.f new file mode 100644 index 0000000..523aee5 --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb3.f @@ -0,0 +1,336 @@ +*> \brief \b ZUNBDB3 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB3 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-P must be no larger than P, +*> Q, or M-Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB4 handle cases in +*> which M-P is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-P)-by-(M-P) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. M-P <= min(P,Q,M-Q). +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX*16 array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX*16 array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX*16 array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or ZUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See ZUNCSD2BY1 for details on generating P1, P2, and Q1 using ZUNGQR +*> and ZUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + COMPLEX*16 TAUP1(*), TAUP2(*), TAUQ1(*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = (1.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, LLARF, LORBDB5, + $ LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZLACGV, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + EXTERNAL DZNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( 2*P .LT. M .OR. P .GT. M ) THEN + INFO = -2 + ELSE IF( Q .LT. M-P .OR. M-Q .LT. M-P ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( P, M-P-1, Q-1 ) + IORBDB5 = 2 + LORBDB5 = Q-1 + LWORKOPT = MAX( ILARF+LLARF-1, IORBDB5+LORBDB5-1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB3', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce rows 1, ..., M-P of X11 and X21 +* + DO I = 1, M-P +* + IF( I .GT. 1 ) THEN + CALL ZDROT( Q-I+1, X11(I-1,I), LDX11, X21(I,I), LDX11, C, + $ S ) + END IF +* + CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) + CALL ZLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + S = DBLE( X21(I,I) ) + X21(I,I) = ONE + CALL ZLARF( 'R', P-I+1, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I,I), LDX11, WORK(ILARF) ) + CALL ZLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) + C = SQRT( DZNRM2( P-I+1, X11(I,I), 1 )**2 + $ + DZNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + THETA(I) = ATAN2( S, C ) +* + CALL ZUNBDB5( P-I+1, M-P-I, Q-I, X11(I,I), 1, X21(I+1,I), 1, + $ X11(I,I+1), LDX11, X21(I+1,I+1), LDX21, + $ WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + IF( I .LT. M-P ) THEN + CALL ZLARFGP( M-P-I, X21(I+1,I), X21(I+2,I), 1, TAUP2(I) ) + PHI(I) = ATAN2( DBLE( X21(I+1,I) ), DBLE( X11(I,I) ) ) + C = COS( PHI(I) ) + S = SIN( PHI(I) ) + X21(I+1,I) = ONE + CALL ZLARF( 'L', M-P-I, Q-I, X21(I+1,I), 1, + $ DCONJG(TAUP2(I)), X21(I+1,I+1), LDX21, + $ WORK(ILARF) ) + END IF + X11(I,I) = ONE + CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) +* + END DO +* +* Reduce the bottom-right portion of X11 to the identity matrix +* + DO I = M-P + 1, Q + CALL ZLARFGP( P-I+1, X11(I,I), X11(I+1,I), 1, TAUP1(I) ) + X11(I,I) = ONE + CALL ZLARF( 'L', P-I+1, Q-I, X11(I,I), 1, DCONJG(TAUP1(I)), + $ X11(I,I+1), LDX11, WORK(ILARF) ) + END DO +* + RETURN +* +* End of ZUNBDB3 +* + END + diff --git a/dspl/liblapack/SRC/zunbdb4.f b/dspl/liblapack/SRC/zunbdb4.f new file mode 100644 index 0000000..026f5a5 --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb4.f @@ -0,0 +1,385 @@ +*> \brief \b ZUNBDB4 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB4 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, +* TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, +* INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. +* DOUBLE PRECISION PHI(*), THETA(*) +* COMPLEX*16 PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), +* $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB4 simultaneously bidiagonalizes the blocks of a tall and skinny +*> matrix X with orthonomal columns: +*> +*> [ B11 ] +*> [ X11 ] [ P1 | ] [ 0 ] +*> [-----] = [---------] [-----] Q1**T . +*> [ X21 ] [ | P2 ] [ B21 ] +*> [ 0 ] +*> +*> X11 is P-by-Q, and X21 is (M-P)-by-Q. M-Q must be no larger than P, +*> M-P, or Q. Routines ZUNBDB1, ZUNBDB2, and ZUNBDB3 handle cases in +*> which M-Q is not the minimum dimension. +*> +*> The unitary matrices P1, P2, and Q1 are P-by-P, (M-P)-by-(M-P), +*> and (M-Q)-by-(M-Q), respectively. They are represented implicitly by +*> Householder vectors. +*> +*> B11 and B12 are (M-Q)-by-(M-Q) bidiagonal matrices represented +*> implicitly by angles THETA, PHI. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows X11 plus the number of rows in X21. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M and +*> M-Q <= min(P,M-P,Q). +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, the top block of the matrix X to be reduced. On +*> exit, the columns of tril(X11) specify reflectors for P1 and +*> the rows of triu(X11,1) specify reflectors for Q1. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= P. +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, the bottom block of the matrix X to be reduced. On +*> exit, the columns of tril(X21) specify reflectors for P2. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= M-P. +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (Q) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] PHI +*> \verbatim +*> PHI is DOUBLE PRECISION array, dimension (Q-1) +*> The entries of the bidiagonal blocks B11, B21 are defined by +*> THETA and PHI. See Further Details. +*> \endverbatim +*> +*> \param[out] TAUP1 +*> \verbatim +*> TAUP1 is COMPLEX*16 array, dimension (P) +*> The scalar factors of the elementary reflectors that define +*> P1. +*> \endverbatim +*> +*> \param[out] TAUP2 +*> \verbatim +*> TAUP2 is COMPLEX*16 array, dimension (M-P) +*> The scalar factors of the elementary reflectors that define +*> P2. +*> \endverbatim +*> +*> \param[out] TAUQ1 +*> \verbatim +*> TAUQ1 is COMPLEX*16 array, dimension (Q) +*> The scalar factors of the elementary reflectors that define +*> Q1. +*> \endverbatim +*> +*> \param[out] PHANTOM +*> \verbatim +*> PHANTOM is COMPLEX*16 array, dimension (M) +*> The routine computes an M-by-1 column vector Y that is +*> orthogonal to the columns of [ X11; X21 ]. PHANTOM(1:P) and +*> PHANTOM(P+1:M) contain Householder vectors for Y(1:P) and +*> Y(P+1:M), respectively. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= M-Q. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The upper-bidiagonal blocks B11, B21 are represented implicitly by +*> angles THETA(1), ..., THETA(Q) and PHI(1), ..., PHI(Q-1). Every entry +*> in each bidiagonal band is a product of a sine or cosine of a THETA +*> with a sine or cosine of a PHI. See [1] or ZUNCSD for details. +*> +*> P1, P2, and Q1 are represented as products of elementary reflectors. +*> See ZUNCSD2BY1 for details on generating P1, P2, and Q1 using ZUNGQR +*> and ZUNGLQ. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +*> +* ===================================================================== + SUBROUTINE ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, PHI, + $ TAUP1, TAUP2, TAUQ1, PHANTOM, WORK, LWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.8.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, LWORK, M, P, Q, LDX11, LDX21 +* .. +* .. Array Arguments .. + DOUBLE PRECISION PHI(*), THETA(*) + COMPLEX*16 PHANTOM(*), TAUP1(*), TAUP2(*), TAUQ1(*), + $ WORK(*), X11(LDX11,*), X21(LDX21,*) +* .. +* +* ==================================================================== +* +* .. Parameters .. + COMPLEX*16 NEGONE, ONE, ZERO + PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0), + $ ZERO = (0.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION C, S + INTEGER CHILDINFO, I, ILARF, IORBDB5, J, LLARF, + $ LORBDB5, LWORKMIN, LWORKOPT + LOGICAL LQUERY +* .. +* .. External Subroutines .. + EXTERNAL ZLARF, ZLARFGP, ZUNBDB5, ZDROT, ZSCAL, ZLACGV, + $ XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + EXTERNAL DZNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC ATAN2, COS, MAX, SIN, SQRT +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -1 + ELSE IF( P .LT. M-Q .OR. M-P .LT. M-Q ) THEN + INFO = -2 + ELSE IF( Q .LT. M-Q .OR. Q .GT. M ) THEN + INFO = -3 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -5 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -7 + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN + ILARF = 2 + LLARF = MAX( Q-1, P-1, M-P-1 ) + IORBDB5 = 2 + LORBDB5 = Q + LWORKOPT = ILARF + LLARF - 1 + LWORKOPT = MAX( LWORKOPT, IORBDB5 + LORBDB5 - 1 ) + LWORKMIN = LWORKOPT + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -14 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB4', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Reduce columns 1, ..., M-Q of X11 and X21 +* + DO I = 1, M-Q +* + IF( I .EQ. 1 ) THEN + DO J = 1, M + PHANTOM(J) = ZERO + END DO + CALL ZUNBDB5( P, M-P, Q, PHANTOM(1), 1, PHANTOM(P+1), 1, + $ X11, LDX11, X21, LDX21, WORK(IORBDB5), + $ LORBDB5, CHILDINFO ) + CALL ZSCAL( P, NEGONE, PHANTOM(1), 1 ) + CALL ZLARFGP( P, PHANTOM(1), PHANTOM(2), 1, TAUP1(1) ) + CALL ZLARFGP( M-P, PHANTOM(P+1), PHANTOM(P+2), 1, TAUP2(1) ) + THETA(I) = ATAN2( DBLE( PHANTOM(1) ), DBLE( PHANTOM(P+1) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + PHANTOM(1) = ONE + PHANTOM(P+1) = ONE + CALL ZLARF( 'L', P, Q, PHANTOM(1), 1, DCONJG(TAUP1(1)), X11, + $ LDX11, WORK(ILARF) ) + CALL ZLARF( 'L', M-P, Q, PHANTOM(P+1), 1, DCONJG(TAUP2(1)), + $ X21, LDX21, WORK(ILARF) ) + ELSE + CALL ZUNBDB5( P-I+1, M-P-I+1, Q-I+1, X11(I,I-1), 1, + $ X21(I,I-1), 1, X11(I,I), LDX11, X21(I,I), + $ LDX21, WORK(IORBDB5), LORBDB5, CHILDINFO ) + CALL ZSCAL( P-I+1, NEGONE, X11(I,I-1), 1 ) + CALL ZLARFGP( P-I+1, X11(I,I-1), X11(I+1,I-1), 1, TAUP1(I) ) + CALL ZLARFGP( M-P-I+1, X21(I,I-1), X21(I+1,I-1), 1, + $ TAUP2(I) ) + THETA(I) = ATAN2( DBLE( X11(I,I-1) ), DBLE( X21(I,I-1) ) ) + C = COS( THETA(I) ) + S = SIN( THETA(I) ) + X11(I,I-1) = ONE + X21(I,I-1) = ONE + CALL ZLARF( 'L', P-I+1, Q-I+1, X11(I,I-1), 1, + $ DCONJG(TAUP1(I)), X11(I,I), LDX11, WORK(ILARF) ) + CALL ZLARF( 'L', M-P-I+1, Q-I+1, X21(I,I-1), 1, + $ DCONJG(TAUP2(I)), X21(I,I), LDX21, WORK(ILARF) ) + END IF +* + CALL ZDROT( Q-I+1, X11(I,I), LDX11, X21(I,I), LDX21, S, -C ) + CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) + CALL ZLARFGP( Q-I+1, X21(I,I), X21(I,I+1), LDX21, TAUQ1(I) ) + C = DBLE( X21(I,I) ) + X21(I,I) = ONE + CALL ZLARF( 'R', P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF( 'R', M-P-I, Q-I+1, X21(I,I), LDX21, TAUQ1(I), + $ X21(I+1,I), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I+1, X21(I,I), LDX21 ) + IF( I .LT. M-Q ) THEN + S = SQRT( DZNRM2( P-I, X11(I+1,I), 1 )**2 + $ + DZNRM2( M-P-I, X21(I+1,I), 1 )**2 ) + PHI(I) = ATAN2( S, C ) + END IF +* + END DO +* +* Reduce the bottom-right portion of X11 to [ I 0 ] +* + DO I = M - Q + 1, P + CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) + CALL ZLARFGP( Q-I+1, X11(I,I), X11(I,I+1), LDX11, TAUQ1(I) ) + X11(I,I) = ONE + CALL ZLARF( 'R', P-I, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X11(I+1,I), LDX11, WORK(ILARF) ) + CALL ZLARF( 'R', Q-P, Q-I+1, X11(I,I), LDX11, TAUQ1(I), + $ X21(M-Q+1,I), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I+1, X11(I,I), LDX11 ) + END DO +* +* Reduce the bottom-right portion of X21 to [ 0 I ] +* + DO I = P + 1, Q + CALL ZLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) + CALL ZLARFGP( Q-I+1, X21(M-Q+I-P,I), X21(M-Q+I-P,I+1), LDX21, + $ TAUQ1(I) ) + X21(M-Q+I-P,I) = ONE + CALL ZLARF( 'R', Q-I, Q-I+1, X21(M-Q+I-P,I), LDX21, TAUQ1(I), + $ X21(M-Q+I-P+1,I), LDX21, WORK(ILARF) ) + CALL ZLACGV( Q-I+1, X21(M-Q+I-P,I), LDX21 ) + END DO +* + RETURN +* +* End of ZUNBDB4 +* + END + diff --git a/dspl/liblapack/SRC/zunbdb5.f b/dspl/liblapack/SRC/zunbdb5.f new file mode 100644 index 0000000..be708b7 --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb5.f @@ -0,0 +1,274 @@ +*> \brief \b ZUNBDB5 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB5 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* COMPLEX*16 Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB5 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then some other vector from the orthogonal complement +*> is returned. This vector is chosen in an arbitrary but deterministic +*> way. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is COMPLEX*16 array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is COMPLEX*16 array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is COMPLEX*16 array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is COMPLEX*16 array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNBDB5( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + COMPLEX*16 Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = (1.0D0,0.0D0), ZERO = (0.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, J +* .. +* .. External Subroutines .. + EXTERNAL ZUNBDB6, XERBLA +* .. +* .. External Functions .. + DOUBLE PRECISION DZNRM2 + EXTERNAL DZNRM2 +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB5', -INFO ) + RETURN + END IF +* +* Project X onto the orthogonal complement of Q +* + CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, LDQ2, + $ WORK, LWORK, CHILDINFO ) +* +* If the projection is nonzero, then return +* + IF( DZNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF +* +* Project each standard basis vector e_1,...,e_M1 in turn, stopping +* when a nonzero projection is found +* + DO I = 1, M1 + DO J = 1, M1 + X1(J) = ZERO + END DO + X1(I) = ONE + DO J = 1, M2 + X2(J) = ZERO + END DO + CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( DZNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* +* Project each standard basis vector e_(M1+1),...,e_(M1+M2) in turn, +* stopping when a nonzero projection is found +* + DO I = 1, M2 + DO J = 1, M1 + X1(J) = ZERO + END DO + DO J = 1, M2 + X2(J) = ZERO + END DO + X2(I) = ONE + CALL ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, CHILDINFO ) + IF( DZNRM2(M1,X1,INCX1) .NE. ZERO + $ .OR. DZNRM2(M2,X2,INCX2) .NE. ZERO ) THEN + RETURN + END IF + END DO +* + RETURN +* +* End of ZUNBDB5 +* + END + diff --git a/dspl/liblapack/SRC/zunbdb6.f b/dspl/liblapack/SRC/zunbdb6.f new file mode 100644 index 0000000..c4ccbfb --- /dev/null +++ b/dspl/liblapack/SRC/zunbdb6.f @@ -0,0 +1,313 @@ +*> \brief \b ZUNBDB6 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNBDB6 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, +* LDQ2, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, +* $ N +* .. +* .. Array Arguments .. +* COMPLEX*16 Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNBDB6 orthogonalizes the column vector +*> X = [ X1 ] +*> [ X2 ] +*> with respect to the columns of +*> Q = [ Q1 ] . +*> [ Q2 ] +*> The columns of Q must be orthonormal. +*> +*> If the projection is zero according to Kahan's "twice is enough" +*> criterion, then the zero vector is returned. +*> +*>\endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M1 +*> \verbatim +*> M1 is INTEGER +*> The dimension of X1 and the number of rows in Q1. 0 <= M1. +*> \endverbatim +*> +*> \param[in] M2 +*> \verbatim +*> M2 is INTEGER +*> The dimension of X2 and the number of rows in Q2. 0 <= M2. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns in Q1 and Q2. 0 <= N. +*> \endverbatim +*> +*> \param[in,out] X1 +*> \verbatim +*> X1 is COMPLEX*16 array, dimension (M1) +*> On entry, the top part of the vector to be orthogonalized. +*> On exit, the top part of the projected vector. +*> \endverbatim +*> +*> \param[in] INCX1 +*> \verbatim +*> INCX1 is INTEGER +*> Increment for entries of X1. +*> \endverbatim +*> +*> \param[in,out] X2 +*> \verbatim +*> X2 is COMPLEX*16 array, dimension (M2) +*> On entry, the bottom part of the vector to be +*> orthogonalized. On exit, the bottom part of the projected +*> vector. +*> \endverbatim +*> +*> \param[in] INCX2 +*> \verbatim +*> INCX2 is INTEGER +*> Increment for entries of X2. +*> \endverbatim +*> +*> \param[in] Q1 +*> \verbatim +*> Q1 is COMPLEX*16 array, dimension (LDQ1, N) +*> The top part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ1 +*> \verbatim +*> LDQ1 is INTEGER +*> The leading dimension of Q1. LDQ1 >= M1. +*> \endverbatim +*> +*> \param[in] Q2 +*> \verbatim +*> Q2 is COMPLEX*16 array, dimension (LDQ2, N) +*> The bottom part of the orthonormal basis matrix. +*> \endverbatim +*> +*> \param[in] LDQ2 +*> \verbatim +*> LDQ2 is INTEGER +*> The leading dimension of Q2. LDQ2 >= M2. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LWORK) +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNBDB6( M1, M2, N, X1, INCX1, X2, INCX2, Q1, LDQ1, Q2, + $ LDQ2, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + INTEGER INCX1, INCX2, INFO, LDQ1, LDQ2, LWORK, M1, M2, + $ N +* .. +* .. Array Arguments .. + COMPLEX*16 Q1(LDQ1,*), Q2(LDQ2,*), WORK(*), X1(*), X2(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ALPHASQ, REALONE, REALZERO + PARAMETER ( ALPHASQ = 0.01D0, REALONE = 1.0D0, + $ REALZERO = 0.0D0 ) + COMPLEX*16 NEGONE, ONE, ZERO + PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0), + $ ZERO = (0.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + INTEGER I + DOUBLE PRECISION NORMSQ1, NORMSQ2, SCL1, SCL2, SSQ1, SSQ2 +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLASSQ, XERBLA +* .. +* .. Intrinsic Function .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + IF( M1 .LT. 0 ) THEN + INFO = -1 + ELSE IF( M2 .LT. 0 ) THEN + INFO = -2 + ELSE IF( N .LT. 0 ) THEN + INFO = -3 + ELSE IF( INCX1 .LT. 1 ) THEN + INFO = -5 + ELSE IF( INCX2 .LT. 1 ) THEN + INFO = -7 + ELSE IF( LDQ1 .LT. MAX( 1, M1 ) ) THEN + INFO = -9 + ELSE IF( LDQ2 .LT. MAX( 1, M2 ) ) THEN + INFO = -11 + ELSE IF( LWORK .LT. N ) THEN + INFO = -13 + END IF +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNBDB6', -INFO ) + RETURN + END IF +* +* First, project X onto the orthogonal complement of Q's column +* space +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL ZLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL ZLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ1 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL ZGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL ZGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL ZGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL ZGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL ZLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL ZLASSQ( M2, X2, INCX2, SCL2, SSQ2 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If projection is sufficiently large in norm, then stop. +* If projection is zero, then stop. +* Otherwise, project again. +* + IF( NORMSQ2 .GE. ALPHASQ*NORMSQ1 ) THEN + RETURN + END IF +* + IF( NORMSQ2 .EQ. ZERO ) THEN + RETURN + END IF +* + NORMSQ1 = NORMSQ2 +* + DO I = 1, N + WORK(I) = ZERO + END DO +* + IF( M1 .EQ. 0 ) THEN + DO I = 1, N + WORK(I) = ZERO + END DO + ELSE + CALL ZGEMV( 'C', M1, N, ONE, Q1, LDQ1, X1, INCX1, ZERO, WORK, + $ 1 ) + END IF +* + CALL ZGEMV( 'C', M2, N, ONE, Q2, LDQ2, X2, INCX2, ONE, WORK, 1 ) +* + CALL ZGEMV( 'N', M1, N, NEGONE, Q1, LDQ1, WORK, 1, ONE, X1, + $ INCX1 ) + CALL ZGEMV( 'N', M2, N, NEGONE, Q2, LDQ2, WORK, 1, ONE, X2, + $ INCX2 ) +* + SCL1 = REALZERO + SSQ1 = REALONE + CALL ZLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + SCL2 = REALZERO + SSQ2 = REALONE + CALL ZLASSQ( M1, X1, INCX1, SCL1, SSQ1 ) + NORMSQ2 = SCL1**2*SSQ1 + SCL2**2*SSQ2 +* +* If second projection is sufficiently large in norm, then do +* nothing more. Alternatively, if it shrunk significantly, then +* truncate it to zero. +* + IF( NORMSQ2 .LT. ALPHASQ*NORMSQ1 ) THEN + DO I = 1, M1 + X1(I) = ZERO + END DO + DO I = 1, M2 + X2(I) = ZERO + END DO + END IF +* + RETURN +* +* End of ZUNBDB6 +* + END + diff --git a/dspl/liblapack/SRC/zuncsd.f b/dspl/liblapack/SRC/zuncsd.f new file mode 100644 index 0000000..db53f93 --- /dev/null +++ b/dspl/liblapack/SRC/zuncsd.f @@ -0,0 +1,658 @@ +*> \brief \b ZUNCSD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNCSD + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* RECURSIVE SUBROUTINE ZUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, +* SIGNS, M, P, Q, X11, LDX11, X12, +* LDX12, X21, LDX21, X22, LDX22, THETA, +* U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, +* LDV2T, WORK, LWORK, RWORK, LRWORK, +* IWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS +* INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, +* $ LDX21, LDX22, LRWORK, LWORK, M, P, Q +* .. +* .. Array Arguments .. +* INTEGER IWORK( * ) +* DOUBLE PRECISION THETA( * ) +* DOUBLE PRECISION RWORK( * ) +* COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), +* $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), +* $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, +* $ * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNCSD computes the CS decomposition of an M-by-M partitioned +*> unitary matrix X: +*> +*> [ I 0 0 | 0 0 0 ] +*> [ 0 C 0 | 0 -S 0 ] +*> [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H +*> X = [-----------] = [---------] [---------------------] [---------] . +*> [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ] +*> [ 0 S 0 | 0 C 0 ] +*> [ 0 0 I | 0 0 0 ] +*> +*> X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P, +*> (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are +*> R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in +*> which R = MIN(P,M-P,Q,M-Q). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] JOBV2T +*> \verbatim +*> JOBV2T is CHARACTER +*> = 'Y': V2T is computed; +*> otherwise: V2T is not computed. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER +*> = 'T': X, U1, U2, V1T, and V2T are stored in row-major +*> order; +*> otherwise: X, U1, U2, V1T, and V2T are stored in column- +*> major order. +*> \endverbatim +*> +*> \param[in] SIGNS +*> \verbatim +*> SIGNS is CHARACTER +*> = 'O': The lower-left block is made nonpositive (the +*> "other" convention); +*> otherwise: The upper-right block is made nonpositive (the +*> "default" convention). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows and columns in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11 and X12. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X12 +*> \verbatim +*> X12 is COMPLEX*16 array, dimension (LDX12,M-Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX12 +*> \verbatim +*> LDX12 is INTEGER +*> The leading dimension of X12. LDX12 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X11. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[in,out] X22 +*> \verbatim +*> X22 is COMPLEX*16 array, dimension (LDX22,M-Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX22 +*> \verbatim +*> LDX22 is INTEGER +*> The leading dimension of X11. LDX22 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is COMPLEX*16 array, dimension (LDU1,P) +*> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is COMPLEX*16 array, dimension (LDU2,M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is COMPLEX*16 array, dimension (LDV1T,Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary +*> matrix V1**H. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] V2T +*> \verbatim +*> V2T is COMPLEX*16 array, dimension (LDV2T,M-Q) +*> If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary +*> matrix V2**H. +*> \endverbatim +*> +*> \param[in] LDV2T +*> \verbatim +*> LDV2T is INTEGER +*> The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >= +*> MAX(1,M-Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension MAX(1,LRWORK) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: ZBBCSD did not converge. See the description of RWORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date June 2017 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + RECURSIVE SUBROUTINE ZUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, + $ SIGNS, M, P, Q, X11, LDX11, X12, + $ LDX12, X21, LDX21, X22, LDX22, THETA, + $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, WORK, LWORK, RWORK, LRWORK, + $ IWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* June 2017 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS + INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12, + $ LDX21, LDX22, LRWORK, LWORK, M, P, Q +* .. +* .. Array Arguments .. + INTEGER IWORK( * ) + DOUBLE PRECISION THETA( * ) + DOUBLE PRECISION RWORK( * ) + COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ), + $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ), + $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22, + $ * ) +* .. +* +* =================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = (1.0D0,0.0D0), + $ ZERO = (0.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST, SIGNST + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN, + $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN, + $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN, + $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN, + $ LORGQRWORKOPT, LWORKMIN, LWORKOPT, P1, Q1 + LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2, + $ WANTV1T, WANTV2T + INTEGER LRWORKMIN, LRWORKOPT + LOGICAL LRQUERY +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZBBCSD, ZLACPY, ZLAPMR, ZLAPMT, + $ ZUNBDB, ZUNGLQ, ZUNGQR +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + WANTV2T = LSAME( JOBV2T, 'Y' ) + COLMAJOR = .NOT. LSAME( TRANS, 'T' ) + DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' ) + LQUERY = LWORK .EQ. -1 + LRQUERY = LRWORK .EQ. -1 + IF( M .LT. 0 ) THEN + INFO = -7 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -8 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -9 + ELSE IF ( COLMAJOR .AND. LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -11 + ELSE IF (.NOT. COLMAJOR .AND. LDX11 .LT. MAX( 1, Q ) ) THEN + INFO = -11 + ELSE IF (COLMAJOR .AND. LDX12 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF (.NOT. COLMAJOR .AND. LDX12 .LT. MAX( 1, M-Q ) ) THEN + INFO = -13 + ELSE IF (COLMAJOR .AND. LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -15 + ELSE IF (.NOT. COLMAJOR .AND. LDX21 .LT. MAX( 1, Q ) ) THEN + INFO = -15 + ELSE IF (COLMAJOR .AND. LDX22 .LT. MAX( 1, M-P ) ) THEN + INFO = -17 + ELSE IF (.NOT. COLMAJOR .AND. LDX22 .LT. MAX( 1, M-Q ) ) THEN + INFO = -17 + ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN + INFO = -20 + ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN + INFO = -22 + ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN + INFO = -24 + ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN + INFO = -26 + END IF +* +* Work with transpose if convenient +* + IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN + IF( COLMAJOR ) THEN + TRANST = 'T' + ELSE + TRANST = 'N' + END IF + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL ZUNCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M, + $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22, + $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1, + $ U2, LDU2, WORK, LWORK, RWORK, LRWORK, IWORK, + $ INFO ) + RETURN + END IF +* +* Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if +* convenient +* + IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN + IF( DEFAULTSIGNS ) THEN + SIGNST = 'O' + ELSE + SIGNST = 'D' + END IF + CALL ZUNCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M, + $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11, + $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T, + $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, INFO ) + RETURN + END IF +* +* Compute workspace +* + IF( INFO .EQ. 0 ) THEN +* +* Real workspace +* + IPHI = 2 + IB11D = IPHI + MAX( 1, Q - 1 ) + IB11E = IB11D + MAX( 1, Q ) + IB12D = IB11E + MAX( 1, Q - 1 ) + IB12E = IB12D + MAX( 1, Q ) + IB21D = IB12E + MAX( 1, Q - 1 ) + IB21E = IB21D + MAX( 1, Q ) + IB22D = IB21E + MAX( 1, Q - 1 ) + IB22E = IB22D + MAX( 1, Q ) + IBBCSD = IB22E + MAX( 1, Q - 1 ) + CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, + $ THETA, THETA, U1, LDU1, U2, LDU2, V1T, LDV1T, + $ V2T, LDV2T, THETA, THETA, THETA, THETA, THETA, + $ THETA, THETA, THETA, RWORK, -1, CHILDINFO ) + LBBCSDWORKOPT = INT( RWORK(1) ) + LBBCSDWORKMIN = LBBCSDWORKOPT + LRWORKOPT = IBBCSD + LBBCSDWORKOPT - 1 + LRWORKMIN = IBBCSD + LBBCSDWORKMIN - 1 + RWORK(1) = LRWORKOPT +* +* Complex workspace +* + ITAUP1 = 2 + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M - P ) + ITAUQ2 = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ2 + MAX( 1, M - Q ) + CALL ZUNGQR( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGQRWORKOPT = INT( WORK(1) ) + LORGQRWORKMIN = MAX( 1, M - Q ) + IORGLQ = ITAUQ2 + MAX( 1, M - Q ) + CALL ZUNGLQ( M-Q, M-Q, M-Q, U1, MAX(1,M-Q), U1, WORK, -1, + $ CHILDINFO ) + LORGLQWORKOPT = INT( WORK(1) ) + LORGLQWORKMIN = MAX( 1, M - Q ) + IORBDB = ITAUQ2 + MAX( 1, M - Q ) + CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, + $ X21, LDX21, X22, LDX22, THETA, THETA, U1, U2, + $ V1T, V2T, WORK, -1, CHILDINFO ) + LORBDBWORKOPT = INT( WORK(1) ) + LORBDBWORKMIN = LORBDBWORKOPT + LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT, + $ IORBDB + LORBDBWORKOPT ) - 1 + LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN, + $ IORBDB + LORBDBWORKMIN ) - 1 + WORK(1) = MAX(LWORKOPT,LWORKMIN) +* + IF( LWORK .LT. LWORKMIN + $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN + INFO = -22 + ELSE IF( LRWORK .LT. LRWORKMIN + $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN + INFO = -24 + ELSE + LORGQRWORK = LWORK - IORGQR + 1 + LORGLQWORK = LWORK - IORGLQ + 1 + LORBDBWORK = LWORK - IORBDB + 1 + LBBCSDWORK = LRWORK - IBBCSD + 1 + END IF + END IF +* +* Abort if any illegal arguments +* + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNCSD', -INFO ) + RETURN + ELSE IF( LQUERY .OR. LRQUERY ) THEN + RETURN + END IF +* +* Transform to bidiagonal block form +* + CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21, + $ LDX21, X22, LDX22, THETA, RWORK(IPHI), WORK(ITAUP1), + $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2), + $ WORK(IORBDB), LORBDBWORK, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( COLMAJOR ) THEN + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQRWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + CALL ZLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T ) + IF( M-P .GT. Q) THEN + CALL ZLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + END IF + IF( M .GT. Q ) THEN + CALL ZUNGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + END IF + ELSE + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 ) + CALL ZUNGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ), + $ LORGLQWORK, INFO) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 ) + CALL ZUNGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGLQ), LORGLQWORK, INFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2), + $ LDV1T ) + V1T(1, 1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL ZUNGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + IF( WANTV2T .AND. M-Q .GT. 0 ) THEN + P1 = MIN( P+1, M ) + Q1 = MIN( Q+1, M ) + CALL ZLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T ) + IF( M .GT. P+Q ) THEN + CALL ZLACPY( 'L', M-P-Q, M-P-Q, X22(P1,Q1), LDX22, + $ V2T(P+1,P+1), LDV2T ) + END IF + CALL ZUNGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2), + $ WORK(IORGQR), LORGQRWORK, INFO ) + END IF + END IF +* +* Compute the CSD of the matrix in bidiagonal-block form +* + CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA, + $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, + $ LDV2T, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), + $ LBBCSDWORK, INFO ) +* +* Permute rows and columns to place identity submatrices in top- +* left corner of (1,1)-block and/or bottom-right corner of (1,2)- +* block and/or bottom-right corner of (2,1)-block and/or top-left +* corner of (2,2)-block +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + IF( COLMAJOR ) THEN + CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + ELSE + CALL ZLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + END IF + IF( M .GT. 0 .AND. WANTV2T ) THEN + DO I = 1, P + IWORK(I) = M - P - Q + I + END DO + DO I = P + 1, M - Q + IWORK(I) = I - P + END DO + IF( .NOT. COLMAJOR ) THEN + CALL ZLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + ELSE + CALL ZLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK ) + END IF + END IF +* + RETURN +* +* End ZUNCSD +* + END + diff --git a/dspl/liblapack/SRC/zuncsd2by1.f b/dspl/liblapack/SRC/zuncsd2by1.f new file mode 100644 index 0000000..56285a1 --- /dev/null +++ b/dspl/liblapack/SRC/zuncsd2by1.f @@ -0,0 +1,772 @@ +*> \brief \b ZUNCSD2BY1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNCSD2BY1 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, +* X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, +* LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBU1, JOBU2, JOBV1T +* INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, +* $ M, P, Q +* INTEGER LRWORK, LRWORKMIN, LRWORKOPT +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK(*) +* DOUBLE PRECISION THETA(*) +* COMPLEX*16 U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), +* $ X11(LDX11,*), X21(LDX21,*) +* INTEGER IWORK(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*>\verbatim +*> +*> ZUNCSD2BY1 computes the CS decomposition of an M-by-Q matrix X with +*> orthonormal columns that has been partitioned into a 2-by-1 block +*> structure: +*> +*> [ I1 0 0 ] +*> [ 0 C 0 ] +*> [ X11 ] [ U1 | ] [ 0 0 0 ] +*> X = [-----] = [---------] [----------] V1**T . +*> [ X21 ] [ | U2 ] [ 0 0 0 ] +*> [ 0 S 0 ] +*> [ 0 0 I2] +*> +*> X11 is P-by-Q. The unitary matrices U1, U2, and V1 are P-by-P, +*> (M-P)-by-(M-P), and Q-by-Q, respectively. C and S are R-by-R +*> nonnegative diagonal matrices satisfying C^2 + S^2 = I, in which +*> R = MIN(P,M-P,Q,M-Q). I1 is a K1-by-K1 identity matrix and I2 is a +*> K2-by-K2 identity matrix, where K1 = MAX(Q+P-M,0), K2 = MAX(Q-P,0). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBU1 +*> \verbatim +*> JOBU1 is CHARACTER +*> = 'Y': U1 is computed; +*> otherwise: U1 is not computed. +*> \endverbatim +*> +*> \param[in] JOBU2 +*> \verbatim +*> JOBU2 is CHARACTER +*> = 'Y': U2 is computed; +*> otherwise: U2 is not computed. +*> \endverbatim +*> +*> \param[in] JOBV1T +*> \verbatim +*> JOBV1T is CHARACTER +*> = 'Y': V1T is computed; +*> otherwise: V1T is not computed. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows in X. +*> \endverbatim +*> +*> \param[in] P +*> \verbatim +*> P is INTEGER +*> The number of rows in X11. 0 <= P <= M. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is INTEGER +*> The number of columns in X11 and X21. 0 <= Q <= M. +*> \endverbatim +*> +*> \param[in,out] X11 +*> \verbatim +*> X11 is COMPLEX*16 array, dimension (LDX11,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX11 +*> \verbatim +*> LDX11 is INTEGER +*> The leading dimension of X11. LDX11 >= MAX(1,P). +*> \endverbatim +*> +*> \param[in,out] X21 +*> \verbatim +*> X21 is COMPLEX*16 array, dimension (LDX21,Q) +*> On entry, part of the unitary matrix whose CSD is desired. +*> \endverbatim +*> +*> \param[in] LDX21 +*> \verbatim +*> LDX21 is INTEGER +*> The leading dimension of X21. LDX21 >= MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] THETA +*> \verbatim +*> THETA is DOUBLE PRECISION array, dimension (R), in which R = +*> MIN(P,M-P,Q,M-Q). +*> C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and +*> S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ). +*> \endverbatim +*> +*> \param[out] U1 +*> \verbatim +*> U1 is COMPLEX*16 array, dimension (P) +*> If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1. +*> \endverbatim +*> +*> \param[in] LDU1 +*> \verbatim +*> LDU1 is INTEGER +*> The leading dimension of U1. If JOBU1 = 'Y', LDU1 >= +*> MAX(1,P). +*> \endverbatim +*> +*> \param[out] U2 +*> \verbatim +*> U2 is COMPLEX*16 array, dimension (M-P) +*> If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary +*> matrix U2. +*> \endverbatim +*> +*> \param[in] LDU2 +*> \verbatim +*> LDU2 is INTEGER +*> The leading dimension of U2. If JOBU2 = 'Y', LDU2 >= +*> MAX(1,M-P). +*> \endverbatim +*> +*> \param[out] V1T +*> \verbatim +*> V1T is COMPLEX*16 array, dimension (Q) +*> If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary +*> matrix V1**T. +*> \endverbatim +*> +*> \param[in] LDV1T +*> \verbatim +*> LDV1T is INTEGER +*> The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >= +*> MAX(1,Q). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) +*> On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. +*> If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1), +*> ..., PHI(R-1) that, together with THETA(1), ..., THETA(R), +*> define the matrix in intermediate bidiagonal-block form +*> remaining after nonconvergence. INFO specifies the number +*> of nonzero PHI's. +*> \endverbatim +*> +*> \param[in] LRWORK +*> \verbatim +*> LRWORK is INTEGER +*> The dimension of the array RWORK. +*> +*> If LRWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the RWORK array, returns +*> this value as the first entry of the work array, and no error +*> message related to LRWORK is issued by XERBLA. +*> \endverbatim +* +*> \param[out] IWORK +*> \verbatim +*> IWORK is INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit. +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> > 0: ZBBCSD did not converge. See the description of WORK +*> above for details. +*> \endverbatim +* +*> \par References: +* ================ +*> +*> [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. +*> Algorithms, 50(1):33-65, 2009. +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date July 2012 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11, + $ X21, LDX21, THETA, U1, LDU1, U2, LDU2, V1T, + $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* July 2012 +* +* .. Scalar Arguments .. + CHARACTER JOBU1, JOBU2, JOBV1T + INTEGER INFO, LDU1, LDU2, LDV1T, LWORK, LDX11, LDX21, + $ M, P, Q + INTEGER LRWORK, LRWORKMIN, LRWORKOPT +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK(*) + DOUBLE PRECISION THETA(*) + COMPLEX*16 U1(LDU1,*), U2(LDU2,*), V1T(LDV1T,*), WORK(*), + $ X11(LDX11,*), X21(LDX21,*) + INTEGER IWORK(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = (1.0D0,0.0D0), ZERO = (0.0D0,0.0D0) ) +* .. +* .. Local Scalars .. + INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E, + $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB, + $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1, + $ J, LBBCSD, LORBDB, LORGLQ, LORGLQMIN, + $ LORGLQOPT, LORGQR, LORGQRMIN, LORGQROPT, + $ LWORKMIN, LWORKOPT, R + LOGICAL LQUERY, WANTU1, WANTU2, WANTV1T +* .. +* .. Local Arrays .. + DOUBLE PRECISION DUM( 1 ) + COMPLEX*16 CDUM( 1, 1 ) +* .. +* .. External Subroutines .. + EXTERNAL ZBBCSD, ZCOPY, ZLACPY, ZLAPMR, ZLAPMT, ZUNBDB1, + $ ZUNBDB2, ZUNBDB3, ZUNBDB4, ZUNGLQ, ZUNGQR, + $ XERBLA +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Function .. + INTRINSIC INT, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test input arguments +* + INFO = 0 + WANTU1 = LSAME( JOBU1, 'Y' ) + WANTU2 = LSAME( JOBU2, 'Y' ) + WANTV1T = LSAME( JOBV1T, 'Y' ) + LQUERY = LWORK .EQ. -1 +* + IF( M .LT. 0 ) THEN + INFO = -4 + ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN + INFO = -5 + ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN + INFO = -6 + ELSE IF( LDX11 .LT. MAX( 1, P ) ) THEN + INFO = -8 + ELSE IF( LDX21 .LT. MAX( 1, M-P ) ) THEN + INFO = -10 + ELSE IF( WANTU1 .AND. LDU1 .LT. MAX( 1, P ) ) THEN + INFO = -13 + ELSE IF( WANTU2 .AND. LDU2 .LT. MAX( 1, M - P ) ) THEN + INFO = -15 + ELSE IF( WANTV1T .AND. LDV1T .LT. MAX( 1, Q ) ) THEN + INFO = -17 + END IF +* + R = MIN( P, M-P, Q, M-Q ) +* +* Compute workspace +* +* WORK layout: +* |-----------------------------------------| +* | LWORKOPT (1) | +* |-----------------------------------------| +* | TAUP1 (MAX(1,P)) | +* | TAUP2 (MAX(1,M-P)) | +* | TAUQ1 (MAX(1,Q)) | +* |-----------------------------------------| +* | ZUNBDB WORK | ZUNGQR WORK | ZUNGLQ WORK | +* | | | | +* | | | | +* | | | | +* | | | | +* |-----------------------------------------| +* RWORK layout: +* |------------------| +* | LRWORKOPT (1) | +* |------------------| +* | PHI (MAX(1,R-1)) | +* |------------------| +* | B11D (R) | +* | B11E (R-1) | +* | B12D (R) | +* | B12E (R-1) | +* | B21D (R) | +* | B21E (R-1) | +* | B22D (R) | +* | B22E (R-1) | +* | ZBBCSD RWORK | +* |------------------| +* + IF( INFO .EQ. 0 ) THEN + IPHI = 2 + IB11D = IPHI + MAX( 1, R-1 ) + IB11E = IB11D + MAX( 1, R ) + IB12D = IB11E + MAX( 1, R - 1 ) + IB12E = IB12D + MAX( 1, R ) + IB21D = IB12E + MAX( 1, R - 1 ) + IB21E = IB21D + MAX( 1, R ) + IB22D = IB21E + MAX( 1, R - 1 ) + IB22E = IB22D + MAX( 1, R ) + IBBCSD = IB22E + MAX( 1, R - 1 ) + ITAUP1 = 2 + ITAUP2 = ITAUP1 + MAX( 1, P ) + ITAUQ1 = ITAUP2 + MAX( 1, M-P ) + IORBDB = ITAUQ1 + MAX( 1, Q ) + IORGQR = ITAUQ1 + MAX( 1, Q ) + IORGLQ = ITAUQ1 + MAX( 1, Q ) + LORGQRMIN = 1 + LORGQROPT = 1 + LORGLQMIN = 1 + LORGLQOPT = 1 + IF( R .EQ. Q ) THEN + CALL ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, WORK, -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZUNGQR( P, P, Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + ENDIF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T, LDV1T, + $ CDUM, WORK(1), -1, CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q-1 ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ DUM, U1, LDU1, U2, LDU2, V1T, LDV1T, CDUM, 1, + $ DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE IF( R .EQ. P ) THEN + CALL ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZUNGQR( P-1, P-1, P-1, U1(2,2), LDU1, CDUM, WORK(1), + $ -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZUNGLQ( Q, Q, R, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL ZBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ DUM, V1T, LDV1T, CDUM, 1, U1, LDU1, U2, LDU2, + $ DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE IF( R .EQ. M-P ) THEN + CALL ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO ) + LORBDB = INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZUNGQR( P, P, Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZUNGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, CDUM, + $ WORK(1), -1, CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P-1 ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZUNGLQ( Q, Q, R, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL ZBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, DUM, CDUM, 1, V1T, LDV1T, U2, LDU2, U1, + $ LDU1, DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + ELSE + CALL ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, DUM, + $ CDUM, CDUM, CDUM, CDUM, WORK(1), -1, CHILDINFO + $ ) + LORBDB = M + INT( WORK(1) ) + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZUNGQR( P, P, M-Q, U1, LDU1, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZUNGQR( M-P, M-P, M-Q, U2, LDU2, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGQRMIN = MAX( LORGQRMIN, M-P ) + LORGQROPT = MAX( LORGQROPT, INT( WORK(1) ) ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZUNGLQ( Q, Q, Q, V1T, LDV1T, CDUM, WORK(1), -1, + $ CHILDINFO ) + LORGLQMIN = MAX( LORGLQMIN, Q ) + LORGLQOPT = MAX( LORGLQOPT, INT( WORK(1) ) ) + END IF + CALL ZBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, DUM, U2, LDU2, U1, LDU1, CDUM, 1, V1T, + $ LDV1T, DUM, DUM, DUM, DUM, DUM, DUM, DUM, DUM, + $ RWORK(1), -1, CHILDINFO ) + LBBCSD = INT( RWORK(1) ) + END IF + LRWORKMIN = IBBCSD+LBBCSD-1 + LRWORKOPT = LRWORKMIN + RWORK(1) = LRWORKOPT + LWORKMIN = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQRMIN-1, + $ IORGLQ+LORGLQMIN-1 ) + LWORKOPT = MAX( IORBDB+LORBDB-1, + $ IORGQR+LORGQROPT-1, + $ IORGLQ+LORGLQOPT-1 ) + WORK(1) = LWORKOPT + IF( LWORK .LT. LWORKMIN .AND. .NOT.LQUERY ) THEN + INFO = -19 + END IF + END IF + IF( INFO .NE. 0 ) THEN + CALL XERBLA( 'ZUNCSD2BY1', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF + LORGQR = LWORK-IORGQR+1 + LORGLQ = LWORK-IORGLQ+1 +* +* Handle four cases separately: R = Q, R = P, R = M-P, and R = M-Q, +* in which R = MIN(P,M-P,Q,M-Q) +* + IF( R .EQ. Q ) THEN +* +* Case 1: R = Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL ZUNBDB1( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + V1T(1,1) = ONE + DO J = 2, Q + V1T(1,J) = ZERO + V1T(J,1) = ZERO + END DO + CALL ZLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2), + $ LDV1T ) + CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, 'N', 'N', M, P, Q, THETA, + $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, CDUM, + $ 1, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place zero submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. P ) THEN +* +* Case 2: R = P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL ZUNBDB2( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + U1(1,1) = ONE + DO J = 2, P + U1(1,J) = ZERO + U1(J,1) = ZERO + END DO + CALL ZLACPY( 'L', P-1, P-1, X11(2,1), LDX11, U1(2,2), LDU1 ) + CALL ZUNGQR( P-1, P-1, P-1, U1(2,2), LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 ) + CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZLACPY( 'U', P, Q, X11, LDX11, V1T, LDV1T ) + CALL ZUNGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL ZBBCSD( JOBV1T, 'N', JOBU1, JOBU2, 'T', M, Q, P, THETA, + $ RWORK(IPHI), V1T, LDV1T, CDUM, 1, U1, LDU1, U2, + $ LDU2, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D), + $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E), + $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD), LBBCSD, + $ CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. 0 .AND. WANTU2 ) THEN + DO I = 1, Q + IWORK(I) = M - P - Q + I + END DO + DO I = Q + 1, M - P + IWORK(I) = I - Q + END DO + CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK ) + END IF + ELSE IF( R .EQ. M-P ) THEN +* +* Case 3: R = M-P +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL ZUNBDB3( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), LORBDB, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 ) + CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR), + $ LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + U2(1,1) = ONE + DO J = 2, M-P + U2(1,J) = ZERO + U2(J,1) = ZERO + END DO + CALL ZLACPY( 'L', M-P-1, M-P-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL ZUNGQR( M-P-1, M-P-1, M-P-1, U2(2,2), LDU2, + $ WORK(ITAUP2), WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZLACPY( 'U', M-P, Q, X21, LDX21, V1T, LDV1T ) + CALL ZUNGLQ( Q, Q, R, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL ZBBCSD( 'N', JOBV1T, JOBU2, JOBU1, 'T', M, M-Q, M-P, + $ THETA, RWORK(IPHI), CDUM, 1, V1T, LDV1T, U2, LDU2, + $ U1, LDU1, RWORK(IB11D), RWORK(IB11E), + $ RWORK(IB12D), RWORK(IB12E), RWORK(IB21D), + $ RWORK(IB21E), RWORK(IB22D), RWORK(IB22E), + $ RWORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( Q .GT. R ) THEN + DO I = 1, R + IWORK(I) = Q - R + I + END DO + DO I = R + 1, Q + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL ZLAPMT( .FALSE., P, Q, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL ZLAPMR( .FALSE., Q, Q, V1T, LDV1T, IWORK ) + END IF + END IF + ELSE +* +* Case 4: R = M-Q +* +* Simultaneously bidiagonalize X11 and X21 +* + CALL ZUNBDB4( M, P, Q, X11, LDX11, X21, LDX21, THETA, + $ RWORK(IPHI), WORK(ITAUP1), WORK(ITAUP2), + $ WORK(ITAUQ1), WORK(IORBDB), WORK(IORBDB+M), + $ LORBDB-M, CHILDINFO ) +* +* Accumulate Householder reflectors +* + IF( WANTU1 .AND. P .GT. 0 ) THEN + CALL ZCOPY( P, WORK(IORBDB), 1, U1, 1 ) + DO J = 2, P + U1(1,J) = ZERO + END DO + CALL ZLACPY( 'L', P-1, M-Q-1, X11(2,1), LDX11, U1(2,2), + $ LDU1 ) + CALL ZUNGQR( P, P, M-Q, U1, LDU1, WORK(ITAUP1), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTU2 .AND. M-P .GT. 0 ) THEN + CALL ZCOPY( M-P, WORK(IORBDB+P), 1, U2, 1 ) + DO J = 2, M-P + U2(1,J) = ZERO + END DO + CALL ZLACPY( 'L', M-P-1, M-Q-1, X21(2,1), LDX21, U2(2,2), + $ LDU2 ) + CALL ZUNGQR( M-P, M-P, M-Q, U2, LDU2, WORK(ITAUP2), + $ WORK(IORGQR), LORGQR, CHILDINFO ) + END IF + IF( WANTV1T .AND. Q .GT. 0 ) THEN + CALL ZLACPY( 'U', M-Q, Q, X21, LDX21, V1T, LDV1T ) + CALL ZLACPY( 'U', P-(M-Q), Q-(M-Q), X11(M-Q+1,M-Q+1), LDX11, + $ V1T(M-Q+1,M-Q+1), LDV1T ) + CALL ZLACPY( 'U', -P+Q, Q-P, X21(M-Q+1,P+1), LDX21, + $ V1T(P+1,P+1), LDV1T ) + CALL ZUNGLQ( Q, Q, Q, V1T, LDV1T, WORK(ITAUQ1), + $ WORK(IORGLQ), LORGLQ, CHILDINFO ) + END IF +* +* Simultaneously diagonalize X11 and X21. +* + CALL ZBBCSD( JOBU2, JOBU1, 'N', JOBV1T, 'N', M, M-P, M-Q, + $ THETA, RWORK(IPHI), U2, LDU2, U1, LDU1, CDUM, 1, + $ V1T, LDV1T, RWORK(IB11D), RWORK(IB11E), + $ RWORK(IB12D), RWORK(IB12E), RWORK(IB21D), + $ RWORK(IB21E), RWORK(IB22D), RWORK(IB22E), + $ RWORK(IBBCSD), LBBCSD, CHILDINFO ) +* +* Permute rows and columns to place identity submatrices in +* preferred positions +* + IF( P .GT. R ) THEN + DO I = 1, R + IWORK(I) = P - R + I + END DO + DO I = R + 1, P + IWORK(I) = I - R + END DO + IF( WANTU1 ) THEN + CALL ZLAPMT( .FALSE., P, P, U1, LDU1, IWORK ) + END IF + IF( WANTV1T ) THEN + CALL ZLAPMR( .FALSE., P, Q, V1T, LDV1T, IWORK ) + END IF + END IF + END IF +* + RETURN +* +* End of ZUNCSD2BY1 +* + END + diff --git a/dspl/liblapack/SRC/zung2l.f b/dspl/liblapack/SRC/zung2l.f new file mode 100644 index 0000000..1a48c4d --- /dev/null +++ b/dspl/liblapack/SRC/zung2l.f @@ -0,0 +1,199 @@ +*> \brief \b ZUNG2L generates all or part of the unitary matrix Q from a QL factorization determined by cgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNG2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNG2L generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL ZLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL ZSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2L +* + END diff --git a/dspl/liblapack/SRC/zung2r.f b/dspl/liblapack/SRC/zung2r.f new file mode 100644 index 0000000..4a3fed0 --- /dev/null +++ b/dspl/liblapack/SRC/zung2r.f @@ -0,0 +1,201 @@ +*> \brief \b ZUNG2R +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNG2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNG2R generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL ZSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2R +* + END diff --git a/dspl/liblapack/SRC/zungbr.f b/dspl/liblapack/SRC/zungbr.f new file mode 100644 index 0000000..3cdb812 --- /dev/null +++ b/dspl/liblapack/SRC/zungbr.f @@ -0,0 +1,338 @@ +*> \brief \b ZUNGBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER VECT +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGBR generates one of the complex unitary matrices Q or P**H +*> determined by ZGEBRD when reducing a complex matrix A to bidiagonal +*> form: A = Q * B * P**H. Q and P**H are defined as products of +*> elementary reflectors H(i) or G(i) respectively. +*> +*> If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q +*> is of order M: +*> if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n +*> columns of Q, where m >= n >= k; +*> if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an +*> M-by-M matrix. +*> +*> If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H +*> is of order N: +*> if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m +*> rows of P**H, where n >= m >= k; +*> if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as +*> an N-by-N matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> Specifies whether the matrix Q or the matrix P**H is +*> required, as defined in the transformation applied by ZGEBRD: +*> = 'Q': generate Q; +*> = 'P': generate P**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q or P**H to be returned. +*> M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q or P**H to be returned. +*> N >= 0. +*> If VECT = 'Q', M >= N >= min(M,K); +*> if VECT = 'P', N >= M >= min(N,K). +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original M-by-K +*> matrix reduced by ZGEBRD. +*> If VECT = 'P', the number of rows in the original K-by-N +*> matrix reduced by ZGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by ZGEBRD. +*> On exit, the M-by-N matrix Q or P**H. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= M. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension +*> (min(M,K)) if VECT = 'Q' +*> (min(N,K)) if VECT = 'P' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i), which determines Q or P**H, as +*> returned by ZGEBRD in its array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,min(M,N)). +*> For optimum performance LWORK >= min(M,N)*NB, where NB +*> is the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date April 2012 +* +*> \ingroup complex16GBcomputational +* +* ===================================================================== + SUBROUTINE ZUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* April 2012 +* +* .. Scalar Arguments .. + CHARACTER VECT + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, WANTQ + INTEGER I, IINFO, J, LWKOPT, MN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGLQ, ZUNGQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + WANTQ = LSAME( VECT, 'Q' ) + MN = MIN( M, N ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( M.LT.0 ) THEN + INFO = -2 + ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M, + $ K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT. + $ MIN( N, K ) ) ) ) THEN + INFO = -3 + ELSE IF( K.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -6 + ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN + WORK( 1 ) = 1 + IF( WANTQ ) THEN + IF( M.GE.K ) THEN + CALL ZUNGQR( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( M.GT.1 ) THEN + CALL ZUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + ELSE + IF( K.LT.N ) THEN + CALL ZUNGLQ( M, N, K, A, LDA, TAU, WORK, -1, IINFO ) + ELSE + IF( N.GT.1 ) THEN + CALL ZUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ -1, IINFO ) + END IF + END IF + END IF + LWKOPT = WORK( 1 ) + LWKOPT = MAX (LWKOPT, MN) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + WORK( 1 ) = LWKOPT + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( WANTQ ) THEN +* +* Form Q, determined by a call to ZGEBRD to reduce an m-by-k +* matrix +* + IF( M.GE.K ) THEN +* +* If m >= k, assume m >= n >= k +* + CALL ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If m < k, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q +* to those of the unit matrix +* + DO 20 J = M, 2, -1 + A( 1, J ) = ZERO + DO 10 I = J + 1, M + A( I, J ) = A( I, J-1 ) + 10 CONTINUE + 20 CONTINUE + A( 1, 1 ) = ONE + DO 30 I = 2, M + A( I, 1 ) = ZERO + 30 CONTINUE + IF( M.GT.1 ) THEN +* +* Form Q(2:m,2:m) +* + CALL ZUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + ELSE +* +* Form P**H, determined by a call to ZGEBRD to reduce a k-by-n +* matrix +* + IF( K.LT.N ) THEN +* +* If k < n, assume k <= m <= n +* + CALL ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* If k >= n, assume m = n +* +* Shift the vectors which define the elementary reflectors one +* row downward, and set the first row and column of P**H to +* those of the unit matrix +* + A( 1, 1 ) = ONE + DO 40 I = 2, N + A( I, 1 ) = ZERO + 40 CONTINUE + DO 60 J = 2, N + DO 50 I = J - 1, 2, -1 + A( I, J ) = A( I-1, J ) + 50 CONTINUE + A( 1, J ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Form P**H(2:n,2:n) +* + CALL ZUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGBR +* + END diff --git a/dspl/liblapack/SRC/zunghr.f b/dspl/liblapack/SRC/zunghr.f new file mode 100644 index 0000000..084eceb --- /dev/null +++ b/dspl/liblapack/SRC/zunghr.f @@ -0,0 +1,241 @@ +*> \brief \b ZUNGHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGHR generates a complex unitary matrix Q which is defined as the +*> product of IHI-ILO elementary reflectors of order N, as returned by +*> ZGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of ZGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by ZGEHRD. +*> On exit, the N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEHRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= IHI-ILO. +*> For optimum performance LWORK >= (IHI-ILO)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER IHI, ILO, INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IINFO, J, LWKOPT, NB, NH +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGQR +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LQUERY = ( LWORK.EQ.-1 ) + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN + INFO = -2 + ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZUNGQR', ' ', NH, NH, NH, -1 ) + LWKOPT = MAX( 1, NH )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first ilo and the last n-ihi +* rows and columns to those of the unit matrix +* + DO 40 J = IHI, ILO + 1, -1 + DO 10 I = 1, J - 1 + A( I, J ) = ZERO + 10 CONTINUE + DO 20 I = J + 1, IHI + A( I, J ) = A( I, J-1 ) + 20 CONTINUE + DO 30 I = IHI + 1, N + A( I, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + DO 60 J = 1, ILO + DO 50 I = 1, N + A( I, J ) = ZERO + 50 CONTINUE + A( J, J ) = ONE + 60 CONTINUE + DO 80 J = IHI + 1, N + DO 70 I = 1, N + A( I, J ) = ZERO + 70 CONTINUE + A( J, J ) = ONE + 80 CONTINUE +* + IF( NH.GT.0 ) THEN +* +* Generate Q(ilo+1:ihi,ilo+1:ihi) +* + CALL ZUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ), + $ WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGHR +* + END diff --git a/dspl/liblapack/SRC/zungl2.f b/dspl/liblapack/SRC/zungl2.f new file mode 100644 index 0000000..0774cc4 --- /dev/null +++ b/dspl/liblapack/SRC/zungl2.f @@ -0,0 +1,207 @@ +*> \brief \b ZUNGL2 generates all or part of the unitary matrix Q from an LQ factorization determined by cgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGL2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by ZGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by ZGELQF in the first k rows of its array argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)**H to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ DCONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL ZSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - DCONJG( TAU( I ) ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNGL2 +* + END diff --git a/dspl/liblapack/SRC/zunglq.f b/dspl/liblapack/SRC/zunglq.f new file mode 100644 index 0000000..b0e5028 --- /dev/null +++ b/dspl/liblapack/SRC/zunglq.f @@ -0,0 +1,289 @@ +*> \brief \b ZUNGLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows, +*> which is defined as the first M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by ZGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by ZGELQF in the first k rows of its array argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit; +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNGL2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGLQ', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, M )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGLQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGLQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk rows are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(kk+1:m,1:kk) to zero. +* + DO 20 J = 1, KK + DO 10 I = KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.M ) + $ CALL ZUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.M ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(i+ib:m,i:n) from the right +* + CALL ZLARFB( 'Right', 'Conjugate transpose', 'Forward', + $ 'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ), + $ LDA, WORK, LDWORK, A( I+IB, I ), LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H**H to columns i:n of current block +* + CALL ZUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set columns 1:i-1 of current block to zero +* + DO 40 J = 1, I - 1 + DO 30 L = I, I + IB - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGLQ +* + END diff --git a/dspl/liblapack/SRC/zungql.f b/dspl/liblapack/SRC/zungql.f new file mode 100644 index 0000000..c63a47d --- /dev/null +++ b/dspl/liblapack/SRC/zungql.f @@ -0,0 +1,296 @@ +*> \brief \b ZUNGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2L +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZUNGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL ZUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL ZUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQL +* + END diff --git a/dspl/liblapack/SRC/zungqr.f b/dspl/liblapack/SRC/zungqr.f new file mode 100644 index 0000000..5f95b64 --- /dev/null +++ b/dspl/liblapack/SRC/zungqr.f @@ -0,0 +1,290 @@ +*> \brief \b ZUNGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL ZUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL ZUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQR +* + END diff --git a/dspl/liblapack/SRC/zungr2.f b/dspl/liblapack/SRC/zungr2.f new file mode 100644 index 0000000..c65149d --- /dev/null +++ b/dspl/liblapack/SRC/zungr2.f @@ -0,0 +1,205 @@ +*> \brief \b ZUNGR2 generates all or part of the unitary matrix Q from an RQ factorization determined by cgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGR2 generates an m by n complex matrix Q with orthonormal rows, +*> which is defined as the last m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by ZGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGERQF in the last k rows of its array argument +*> A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows 1:m-k to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = 1, M - K + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.N-M .AND. J.LE.N-K ) + $ A( M-N+J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = 1, K + II = M - K + I +* +* Apply H(i)**H to A(1:m-k+i,1:n-k+i) from the right +* + CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE + CALL ZLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, + $ DCONJG( TAU( I ) ), A, LDA, WORK ) + CALL ZSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA ) + CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA ) + A( II, N-M+II ) = ONE - DCONJG( TAU( I ) ) +* +* Set A(m-k+i,n-k+i+1:n) to zero +* + DO 30 L = N - M + II + 1, N + A( II, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNGR2 +* + END diff --git a/dspl/liblapack/SRC/zungrq.f b/dspl/liblapack/SRC/zungrq.f new file mode 100644 index 0000000..56cd327 --- /dev/null +++ b/dspl/liblapack/SRC/zungrq.f @@ -0,0 +1,297 @@ +*> \brief \b ZUNGRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, +*> which is defined as the last M rows of a product of K elementary +*> reflectors of order N +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by ZGERQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (m-k+i)-th row must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGERQF in the last k rows of its array argument +*> A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGERQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,M). +*> For optimum performance LWORK >= M*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, II, IINFO, IWS, J, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNGR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( M.LE.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZUNGRQ', ' ', M, N, K, -1 ) + LWKOPT = M*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = M + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGRQ', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = M + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGRQ', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk rows are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(1:m-kk,n-kk+1:n) to zero. +* + DO 20 J = N - KK + 1, N + DO 10 I = 1, M - KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL ZUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + II = M - K + I + IF( II.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB, + $ A( II, 1 ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H**H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right +* + CALL ZLARFB( 'Right', 'Conjugate transpose', 'Backward', + $ 'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ), + $ LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ), + $ LDWORK ) + END IF +* +* Apply H**H to columns 1:n-k+i+ib-1 of current block +* + CALL ZUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ), + $ WORK, IINFO ) +* +* Set columns n-k+i+ib:n of current block to zero +* + DO 40 L = N - K + I + IB, N + DO 30 J = II, II + IB - 1 + A( J, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGRQ +* + END diff --git a/dspl/liblapack/SRC/zungtr.f b/dspl/liblapack/SRC/zungtr.f new file mode 100644 index 0000000..7288543 --- /dev/null +++ b/dspl/liblapack/SRC/zungtr.f @@ -0,0 +1,256 @@ +*> \brief \b ZUNGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGTR generates a complex unitary matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> ZHETRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from ZHETRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from ZHETRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by ZHETRD. +*> On exit, the N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= N. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZHETRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N-1. +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGQL, ZUNGQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'ZUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL ZUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to ZHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL ZUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGTR +* + END diff --git a/dspl/liblapack/SRC/zunm22.f b/dspl/liblapack/SRC/zunm22.f new file mode 100644 index 0000000..f612e8f --- /dev/null +++ b/dspl/liblapack/SRC/zunm22.f @@ -0,0 +1,440 @@ +*> \brief \b ZUNM22 multiplies a general matrix by a banded unitary matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNM22 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, +* $ WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. +* COMPLEX*16 Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +*> \par Purpose +* ============ +*> +*> \verbatim +*> +*> ZUNM22 overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order NQ, with NQ = M if +*> SIDE = 'L' and NQ = N if SIDE = 'R'. +*> The unitary matrix Q processes a 2-by-2 block structure +*> +*> [ Q11 Q12 ] +*> Q = [ ] +*> [ Q21 Q22 ], +*> +*> where Q12 is an N1-by-N1 lower triangular matrix and Q21 is an +*> N2-by-N2 upper triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose); +*> = 'C': apply Q**H (Conjugate transpose). +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] N1 +*> \param[in] N2 +*> \verbatim +*> N1 is INTEGER +*> N2 is INTEGER +*> The dimension of Q12 and Q21, respectively. N1, N2 >= 0. +*> The following requirement must be satisfied: +*> N1 + N2 = M if SIDE = 'L' and N1 + N2 = N if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension +*> (LDQ,M) if SIDE = 'L' +*> (LDQ,N) if SIDE = 'R' +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. +*> LDQ >= max(1,M) if SIDE = 'L'; LDQ >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= M*N. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date January 2015 +* +*> \ingroup complexOTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNM22( SIDE, TRANS, M, N, N1, N2, Q, LDQ, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.1) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* January 2015 +* + IMPLICIT NONE +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER M, N, N1, N2, LDQ, LDC, LWORK, INFO +* .. +* .. Array Arguments .. + COMPLEX*16 Q( LDQ, * ), C( LDC, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, LDWORK, LEN, LWKOPT, NB, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL ZGEMM, ZLACPY, ZTRMM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCMPLX, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q; +* NW is the minimum dimension of WORK. +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + NW = NQ + IF( N1.EQ.0 .OR. N2.EQ.0 ) NW = 1 + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( N1.LT.0 .OR. N1+N2.NE.NQ ) THEN + INFO = -5 + ELSE IF( N2.LT.0 ) THEN + INFO = -6 + ELSE IF( LDQ.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKOPT = M*N + WORK( 1 ) = DCMPLX( LWKOPT ) + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNM22', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* +* Degenerate cases (N1 = 0 or N2 = 0) are handled using ZTRMM. +* + IF( N1.EQ.0 ) THEN + CALL ZTRMM( SIDE, 'Upper', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + ELSE IF( N2.EQ.0 ) THEN + CALL ZTRMM( SIDE, 'Lower', TRANS, 'Non-Unit', M, N, ONE, + $ Q, LDQ, C, LDC ) + WORK( 1 ) = ONE + RETURN + END IF +* +* Compute the largest chunk size available from the workspace. +* + NB = MAX( 1, MIN( LWORK, LWKOPT ) / NQ ) +* + IF( LEFT ) THEN + IF( NOTRAN ) THEN + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q12. +* + CALL ZLACPY( 'All', N1, LEN, C( N2+1, I ), LDC, WORK, + $ LDWORK ) + CALL ZTRMM( 'Left', 'Lower', 'No Transpose', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', N1, LEN, N2, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q21. +* + CALL ZLACPY( 'All', N2, LEN, C( 1, I ), LDC, + $ WORK( N1+1 ), LDWORK ) + CALL ZTRMM( 'Left', 'Upper', 'No Transpose', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( N1+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', N2, LEN, N1, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N2+1, I ), LDC, + $ ONE, WORK( N1+1 ), LDWORK ) +* +* Copy everything back. +* + CALL ZLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + ELSE + DO I = 1, N, NB + LEN = MIN( NB, N-I+1 ) + LDWORK = M +* +* Multiply bottom part of C by Q21**H. +* + CALL ZLACPY( 'All', N2, LEN, C( N1+1, I ), LDC, WORK, + $ LDWORK ) + CALL ZTRMM( 'Left', 'Upper', 'Conjugate', 'Non-Unit', + $ N2, LEN, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q11**H. +* + CALL ZGEMM( 'Conjugate', 'No Transpose', N2, LEN, N1, + $ ONE, Q, LDQ, C( 1, I ), LDC, ONE, WORK, + $ LDWORK ) +* +* Multiply top part of C by Q12**H. +* + CALL ZLACPY( 'All', N1, LEN, C( 1, I ), LDC, + $ WORK( N2+1 ), LDWORK ) + CALL ZTRMM( 'Left', 'Lower', 'Conjugate', 'Non-Unit', + $ N1, LEN, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( N2+1 ), LDWORK ) +* +* Multiply bottom part of C by Q22**H. +* + CALL ZGEMM( 'Conjugate', 'No Transpose', N1, LEN, N2, + $ ONE, Q( N1+1, N2+1 ), LDQ, C( N1+1, I ), LDC, + $ ONE, WORK( N2+1 ), LDWORK ) +* +* Copy everything back. +* + CALL ZLACPY( 'All', M, LEN, WORK, LDWORK, C( 1, I ), + $ LDC ) + END DO + END IF + ELSE + IF( NOTRAN ) THEN + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q21. +* + CALL ZLACPY( 'All', LEN, N2, C( I, N1+1 ), LDC, WORK, + $ LDWORK ) + CALL ZTRMM( 'Right', 'Upper', 'No Transpose', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', LEN, N2, N1, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q12. +* + CALL ZLACPY( 'All', LEN, N1, C( I, 1 ), LDC, + $ WORK( 1 + N2*LDWORK ), LDWORK ) + CALL ZTRMM( 'Right', 'Lower', 'No Transpose', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, + $ WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22. +* + CALL ZGEMM( 'No Transpose', 'No Transpose', LEN, N1, N2, + $ ONE, C( I, N1+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N2*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL ZLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + ELSE + DO I = 1, M, NB + LEN = MIN( NB, M-I+1 ) + LDWORK = LEN +* +* Multiply right part of C by Q12**H. +* + CALL ZLACPY( 'All', LEN, N1, C( I, N2+1 ), LDC, WORK, + $ LDWORK ) + CALL ZTRMM( 'Right', 'Lower', 'Conjugate', 'Non-Unit', + $ LEN, N1, ONE, Q( 1, N2+1 ), LDQ, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q11**H. +* + CALL ZGEMM( 'No Transpose', 'Conjugate', LEN, N1, N2, + $ ONE, C( I, 1 ), LDC, Q, LDQ, ONE, WORK, + $ LDWORK ) +* +* Multiply left part of C by Q21**H. +* + CALL ZLACPY( 'All', LEN, N2, C( I, 1 ), LDC, + $ WORK( 1 + N1*LDWORK ), LDWORK ) + CALL ZTRMM( 'Right', 'Upper', 'Conjugate', 'Non-Unit', + $ LEN, N2, ONE, Q( N1+1, 1 ), LDQ, + $ WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Multiply right part of C by Q22**H. +* + CALL ZGEMM( 'No Transpose', 'Conjugate', LEN, N2, N1, + $ ONE, C( I, N2+1 ), LDC, Q( N1+1, N2+1 ), LDQ, + $ ONE, WORK( 1 + N1*LDWORK ), LDWORK ) +* +* Copy everything back. +* + CALL ZLACPY( 'All', LEN, N, WORK, LDWORK, C( I, 1 ), + $ LDC ) + END DO + END IF + END IF +* + WORK( 1 ) = DCMPLX( LWKOPT ) + RETURN +* +* End of ZUNM22 +* + END diff --git a/dspl/liblapack/SRC/zunm2l.f b/dspl/liblapack/SRC/zunm2l.f new file mode 100644 index 0000000..7e9a5cb --- /dev/null +++ b/dspl/liblapack/SRC/zunm2l.f @@ -0,0 +1,281 @@ +*> \brief \b ZUNM2L multiplies a general matrix by the unitary matrix from a QL factorization determined by cgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNM2L + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNM2L overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGEQLF in the last k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNM2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = A( NQ-K+I, I ) + A( NQ-K+I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK ) + A( NQ-K+I, I ) = AII + 10 CONTINUE + RETURN +* +* End of ZUNM2L +* + END diff --git a/dspl/liblapack/SRC/zunm2r.f b/dspl/liblapack/SRC/zunm2r.f new file mode 100644 index 0000000..e59caf9 --- /dev/null +++ b/dspl/liblapack/SRC/zunm2r.f @@ -0,0 +1,286 @@ +*> \brief \b ZUNM2R multiplies a general matrix by the unitary matrix from a QR factorization determined by cgeqrf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNM2R + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNM2R overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGEQRF in the first k columns of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNM2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC, + $ WORK ) + A( I, I ) = AII + 10 CONTINUE + RETURN +* +* End of ZUNM2R +* + END diff --git a/dspl/liblapack/SRC/zunmbr.f b/dspl/liblapack/SRC/zunmbr.f new file mode 100644 index 0000000..727aaeb --- /dev/null +++ b/dspl/liblapack/SRC/zunmbr.f @@ -0,0 +1,379 @@ +*> \brief \b ZUNMBR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMBR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, VECT +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C +*> with +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': P * C C * P +*> TRANS = 'C': P**H * C C * P**H +*> +*> Here Q and P**H are the unitary matrices determined by ZGEBRD when +*> reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q +*> and P**H are defined as products of elementary reflectors H(i) and +*> G(i) respectively. +*> +*> Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the +*> order of the unitary matrix Q or P**H that is applied. +*> +*> If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: +*> if nq >= k, Q = H(1) H(2) . . . H(k); +*> if nq < k, Q = H(1) H(2) . . . H(nq-1). +*> +*> If VECT = 'P', A is assumed to have been a K-by-NQ matrix: +*> if k < nq, P = G(1) G(2) . . . G(k); +*> if k >= nq, P = G(1) G(2) . . . G(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] VECT +*> \verbatim +*> VECT is CHARACTER*1 +*> = 'Q': apply Q or Q**H; +*> = 'P': apply P or P**H. +*> \endverbatim +*> +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q, Q**H, P or P**H from the Left; +*> = 'R': apply Q, Q**H, P or P**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q or P; +*> = 'C': Conjugate transpose, apply Q**H or P**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> If VECT = 'Q', the number of columns in the original +*> matrix reduced by ZGEBRD. +*> If VECT = 'P', the number of rows in the original +*> matrix reduced by ZGEBRD. +*> K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,min(nq,K)) if VECT = 'Q' +*> (LDA,nq) if VECT = 'P' +*> The vectors which define the elementary reflectors H(i) and +*> G(i), whose products determine the matrices Q and P, as +*> returned by ZGEBRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If VECT = 'Q', LDA >= max(1,nq); +*> if VECT = 'P', LDA >= max(1,min(nq,K)). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (min(nq,K)) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i) or G(i) which determines Q or P, as returned +*> by ZGEBRD in the array argument TAUQ or TAUP. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q +*> or P*C or P**H*C or C*P or C*P**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M); +*> if N = 0 or M = 0, LWORK >= 1. +*> For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L', +*> and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the +*> optimal blocksize. (NB = 0 if M = 0 or N = 0.) +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, VECT + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL APPLYQ, LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMLQ, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + APPLYQ = LSAME( VECT, 'Q' ) + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q or P and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + NW = 0 + END IF + IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( K.LT.0 ) THEN + INFO = -6 + ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR. + $ ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) ) + $ THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( NW.GT.0 ) THEN + IF( APPLYQ ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW*NB ) + ELSE + LWKOPT = 1 + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMBR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( APPLYQ ) THEN +* +* Apply Q +* + IF( NQ.GE.K ) THEN +* +* Q was determined by a call to ZGEBRD with nq >= k +* + CALL ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* Q was determined by a call to ZGEBRD with nq < k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + ELSE +* +* Apply P +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF + IF( NQ.GT.K ) THEN +* +* P was determined by a call to ZGEBRD with nq > k +* + CALL ZUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, IINFO ) + ELSE IF( NQ.GT.1 ) THEN +* +* P was determined by a call to ZGEBRD with nq <= k +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + I1 = 2 + I2 = 1 + ELSE + MI = M + NI = N - 1 + I1 = 1 + I2 = 2 + END IF + CALL ZUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA, + $ TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMBR +* + END diff --git a/dspl/liblapack/SRC/zunmhr.f b/dspl/liblapack/SRC/zunmhr.f new file mode 100644 index 0000000..49b7943 --- /dev/null +++ b/dspl/liblapack/SRC/zunmhr.f @@ -0,0 +1,294 @@ +*> \brief \b ZUNMHR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMHR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, +* LDC, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMHR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> IHI-ILO elementary reflectors, as returned by ZGEHRD: +*> +*> Q = H(ilo) H(ilo+1) . . . H(ihi-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] ILO +*> \verbatim +*> ILO is INTEGER +*> \endverbatim +*> +*> \param[in] IHI +*> \verbatim +*> IHI is INTEGER +*> +*> ILO and IHI must have the same values as in the previous call +*> of ZGEHRD. Q is equal to the unit matrix except in the +*> submatrix Q(ilo+1:ihi,ilo+1:ihi). +*> If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and +*> ILO = 1 and IHI = 0, if M = 0; +*> if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and +*> ILO = 1 and IHI = 0, if N = 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by ZGEHRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEHRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >= M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C, + $ LDC, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER IHI, ILO, INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NH = IHI - ILO + LEFT = LSAME( SIDE, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN + INFO = -5 + ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, NH, N, NH, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, NH, NH, -1 ) + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMHR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = NH + NI = N + I1 = ILO + 1 + I2 = 1 + ELSE + MI = M + NI = NH + I1 = 1 + I2 = ILO + 1 + END IF +* + CALL ZUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA, + $ TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO ) +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMHR +* + END diff --git a/dspl/liblapack/SRC/zunml2.f b/dspl/liblapack/SRC/zunml2.f new file mode 100644 index 0000000..cb1eaf2 --- /dev/null +++ b/dspl/liblapack/SRC/zunml2.f @@ -0,0 +1,290 @@ +*> \brief \b ZUNML2 multiplies a general matrix by the unitary matrix from a LQ factorization determined by cgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNML2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNML2 overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGELQF in the first k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNML2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = DCONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + IF( I.LT.NQ ) + $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) + AII = A( I, I ) + A( I, I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ), + $ LDC, WORK ) + A( I, I ) = AII + IF( I.LT.NQ ) + $ CALL ZLACGV( NQ-I, A( I, I+1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZUNML2 +* + END diff --git a/dspl/liblapack/SRC/zunmlq.f b/dspl/liblapack/SRC/zunmlq.f new file mode 100644 index 0000000..c12c063 --- /dev/null +++ b/dspl/liblapack/SRC/zunmlq.f @@ -0,0 +1,347 @@ +*> \brief \b ZUNMLQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMLQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMLQ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGELQF in the first k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGELQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNML2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMLQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMLQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL ZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB, + $ A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMLQ +* + END diff --git a/dspl/liblapack/SRC/zunmql.f b/dspl/liblapack/SRC/zunmql.f new file mode 100644 index 0000000..abdc0e5 --- /dev/null +++ b/dspl/liblapack/SRC/zunmql.f @@ -0,0 +1,339 @@ +*> \brief \b ZUNMQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMQL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMQL overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGEQLF in the last k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should genreally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNM2L +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQL', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB, + $ A( 1, I ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**H is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**H +* + CALL ZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI, + $ IB, A( 1, I ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMQL +* + END diff --git a/dspl/liblapack/SRC/zunmqr.f b/dspl/liblapack/SRC/zunmqr.f new file mode 100644 index 0000000..e60f10a --- /dev/null +++ b/dspl/liblapack/SRC/zunmqr.f @@ -0,0 +1,340 @@ +*> \brief \b ZUNMQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMQR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMQR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,K) +*> The i-th column must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGEQRF in the first k columns of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> If SIDE = 'L', LDA >= max(1,M); +*> if SIDE = 'R', LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JC, LDWORK, + $ LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNM2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + LWKOPT = MAX( 1, NW )*NB + TSIZE + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQR', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ), + $ LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL ZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI, + $ IB, A( I, I ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMQR +* + END diff --git a/dspl/liblapack/SRC/zunmr2.f b/dspl/liblapack/SRC/zunmr2.f new file mode 100644 index 0000000..0a77773 --- /dev/null +++ b/dspl/liblapack/SRC/zunmr2.f @@ -0,0 +1,283 @@ +*> \brief \b ZUNMR2 multiplies a general matrix by the unitary matrix from a RQ factorization determined by cgerqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMR2 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMR2 overwrites the general complex m-by-n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGERQF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMR2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:m-k+i,1:n) +* + MI = M - K + I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:n-k+i) +* + NI = N - K + I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = DCONJG( TAU( I ) ) + ELSE + TAUI = TAU( I ) + END IF + CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + AII = A( I, NQ-K+I ) + A( I, NQ-K+I ) = ONE + CALL ZLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK ) + A( I, NQ-K+I ) = AII + CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA ) + 10 CONTINUE + RETURN +* +* End of ZUNMR2 +* + END diff --git a/dspl/liblapack/SRC/zunmr3.f b/dspl/liblapack/SRC/zunmr3.f new file mode 100644 index 0000000..138a3c0 --- /dev/null +++ b/dspl/liblapack/SRC/zunmr3.f @@ -0,0 +1,305 @@ +*> \brief \b ZUNMR3 multiplies a general matrix by the unitary matrix from a RZ factorization determined by ctzrzf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMR3 + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMR3 overwrites the general complex m by n matrix C with +*> +*> Q * C if SIDE = 'L' and TRANS = 'N', or +*> +*> Q**H* C if SIDE = 'L' and TRANS = 'C', or +*> +*> C * Q if SIDE = 'R' and TRANS = 'N', or +*> +*> C * Q**H if SIDE = 'R' and TRANS = 'C', +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZTZRZF. Q is of order m if SIDE = 'L' and of order n +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left +*> = 'R': apply Q or Q**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply Q (No transpose) +*> = 'C': apply Q**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the m-by-n matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L', +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, NOTRAN + INTEGER I, I1, I2, I3, IC, JA, JC, MI, NI, NQ + COMPLEX*16 TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZ +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMR3', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) + $ RETURN +* + IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = 1 + ELSE + I1 = K + I2 = 1 + I3 = -1 + END IF +* + IF( LEFT ) THEN + NI = N + JA = M - L + 1 + JC = 1 + ELSE + MI = M + JA = N - L + 1 + IC = 1 + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + CALL ZLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI, + $ C( IC, JC ), LDC, WORK ) +* + 10 CONTINUE +* + RETURN +* +* End of ZUNMR3 +* + END diff --git a/dspl/liblapack/SRC/zunmrq.f b/dspl/liblapack/SRC/zunmrq.f new file mode 100644 index 0000000..0d246de --- /dev/null +++ b/dspl/liblapack/SRC/zunmrq.f @@ -0,0 +1,346 @@ +*> \brief \b ZUNMRQ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMRQ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMRQ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1)**H H(2)**H . . . H(k)**H +*> +*> as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZGERQF in the last k rows of its array argument A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGERQF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IINFO, IWT, LDWORK, LWKOPT, + $ MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNMR2 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.NW .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMRQ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, + $ IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB, + $ A( I, 1 ), LDA, TAU( I ), WORK( IWT ), LDT ) + IF( LEFT ) THEN +* +* H or H**H is applied to C(1:m-k+i+ib-1,1:n) +* + MI = M - K + I + IB - 1 + ELSE +* +* H or H**H is applied to C(1:m,1:n-k+i+ib-1) +* + NI = N - K + I + IB - 1 + END IF +* +* Apply H or H**H +* + CALL ZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, A( I, 1 ), LDA, WORK( IWT ), LDT, C, LDC, + $ WORK, LDWORK ) + 10 CONTINUE + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMRQ +* + END diff --git a/dspl/liblapack/SRC/zunmrz.f b/dspl/liblapack/SRC/zunmrz.f new file mode 100644 index 0000000..71609f3 --- /dev/null +++ b/dspl/liblapack/SRC/zunmrz.f @@ -0,0 +1,384 @@ +*> \brief \b ZUNMRZ +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMRZ + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS +* INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMRZ overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix defined as the product of k +*> elementary reflectors +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZTZRZF. Q is of order M if SIDE = 'L' and of order N +*> if SIDE = 'R'. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines +*> the matrix Q. +*> If SIDE = 'L', M >= K >= 0; +*> if SIDE = 'R', N >= K >= 0. +*> \endverbatim +*> +*> \param[in] L +*> \verbatim +*> L is INTEGER +*> The number of columns of the matrix A containing +*> the meaningful part of the Householder reflectors. +*> If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L', +*> (LDA,N) if SIDE = 'R' +*> The i-th row must contain the vector which defines the +*> elementary reflector H(i), for i = 1,2,...,k, as returned by +*> ZTZRZF in the last k rows of its array argument A. +*> A is modified by the routine but restored on exit. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,K). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZTZRZF. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For good performance, LWORK should generally be larger. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +*> \par Contributors: +* ================== +*> +*> A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS + INTEGER INFO, K, L, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + INTEGER NBMAX, LDT, TSIZE + PARAMETER ( NBMAX = 64, LDT = NBMAX+1, + $ TSIZE = LDT*NBMAX ) +* .. +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, NOTRAN + CHARACTER TRANST + INTEGER I, I1, I2, I3, IB, IC, IINFO, IWT, JA, JC, + $ LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARZB, ZLARZT, ZUNMR3 +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = MAX( 1, N ) + ELSE + NQ = N + NW = MAX( 1, M ) + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -2 + ELSE IF( M.LT.0 ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN + INFO = -5 + ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR. + $ ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN + INFO = -6 + ELSE IF( LDA.LT.MAX( 1, K ) ) THEN + INFO = -8 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -11 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -13 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Compute the workspace requirements +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, + $ K, -1 ) ) + LWKOPT = NW*NB + TSIZE + END IF + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMRZ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) THEN + RETURN + END IF +* +* Determine the block size. NB may be at most NBMAX, where NBMAX +* is used to define the local array T. +* + NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + NBMIN = 2 + LDWORK = NW + IF( NB.GT.1 .AND. NB.LT.K ) THEN + IF( LWORK.LT.NW*NB+TSIZE ) THEN + NB = (LWORK-TSIZE) / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K, + $ -1 ) ) + END IF + END IF +* + IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN +* +* Use unblocked code +* + CALL ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC, + $ WORK, IINFO ) + ELSE +* +* Use blocked code +* + IWT = 1 + NW*NB + IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) ) THEN + I1 = 1 + I2 = K + I3 = NB + ELSE + I1 = ( ( K-1 ) / NB )*NB + 1 + I2 = 1 + I3 = -NB + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + JA = M - L + 1 + ELSE + MI = M + IC = 1 + JA = N - L + 1 + END IF +* + IF( NOTRAN ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + DO 10 I = I1, I2, I3 + IB = MIN( NB, K-I+1 ) +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA, + $ TAU( I ), WORK( IWT ), LDT ) +* + IF( LEFT ) THEN +* +* H or H**H is applied to C(i:m,1:n) +* + MI = M - I + 1 + IC = I + ELSE +* +* H or H**H is applied to C(1:m,i:n) +* + NI = N - I + 1 + JC = I + END IF +* +* Apply H or H**H +* + CALL ZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI, + $ IB, L, A( I, JA ), LDA, WORK( IWT ), LDT, + $ C( IC, JC ), LDC, WORK, LDWORK ) + 10 CONTINUE +* + END IF +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZUNMRZ +* + END diff --git a/dspl/liblapack/SRC/zunmtr.f b/dspl/liblapack/SRC/zunmtr.f new file mode 100644 index 0000000..1c85703 --- /dev/null +++ b/dspl/liblapack/SRC/zunmtr.f @@ -0,0 +1,310 @@ +*> \brief \b ZUNMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, +* WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNMTR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by ZHETRD: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from ZHETRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from ZHETRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension +*> (LDA,M) if SIDE = 'L' +*> (LDA,N) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by ZHETRD. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. +*> LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZHETRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. +*> If SIDE = 'L', LWORK >= max(1,N); +*> if SIDE = 'R', LWORK >= max(1,M). +*> For optimum performance LWORK >= N*NB if SIDE = 'L', and +*> LWORK >=M*NB if SIDE = 'R', where NB is the optimal +*> blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC, + $ WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDA, LDC, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + LOGICAL LEFT, LQUERY, UPPER + INTEGER I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNMQL, ZUNMQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* +* NQ is the order of Q and NW is the minimum dimension of WORK +* + IF( LEFT ) THEN + NQ = M + NW = N + ELSE + NQ = N + NW = M + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) + $ THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN + INFO = -7 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -10 + ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN + INFO = -12 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + ELSE + IF( LEFT ) THEN + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1, + $ -1 ) + ELSE + NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1, + $ -1 ) + END IF + END IF + LWKOPT = MAX( 1, NW )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNMTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( LEFT ) THEN + MI = M - 1 + NI = N + ELSE + MI = M + NI = N - 1 + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHETRD with UPLO = 'U' +* + CALL ZUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C, + $ LDC, WORK, LWORK, IINFO ) + ELSE +* +* Q was determined by a call to ZHETRD with UPLO = 'L' +* + IF( LEFT ) THEN + I1 = 2 + I2 = 1 + ELSE + I1 = 1 + I2 = 2 + END IF + CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU, + $ C( I1, I2 ), LDC, WORK, LWORK, IINFO ) + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNMTR +* + END diff --git a/dspl/liblapack/SRC/zupgtr.f b/dspl/liblapack/SRC/zupgtr.f new file mode 100644 index 0000000..eeff364 --- /dev/null +++ b/dspl/liblapack/SRC/zupgtr.f @@ -0,0 +1,233 @@ +*> \brief \b ZUPGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUPGTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUPGTR generates a complex unitary matrix Q which is defined as the +*> product of n-1 elementary reflectors H(i) of order n, as returned by +*> ZHPTRD using packed storage: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to ZHPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to ZHPTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension (N*(N+1)/2) +*> The vectors which define the elementary reflectors, as +*> returned by ZHPTRD. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZHPTRD. +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is COMPLEX*16 array, dimension (LDQ,N) +*> The N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDQ +*> \verbatim +*> LDQ is INTEGER +*> The leading dimension of the array Q. LDQ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N-1) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDQ, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), Q( LDQ, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ), + $ CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I, IINFO, IJ, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNG2L, ZUNG2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUPGTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHPTRD with UPLO = 'U' +* +* Unpack the vectors which define the elementary reflectors and +* set the last row and column of Q equal to those of the unit +* matrix +* + IJ = 2 + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 10 CONTINUE + IJ = IJ + 2 + Q( N, J ) = CZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + Q( I, N ) = CZERO + 30 CONTINUE + Q( N, N ) = CONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL ZUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO ) +* + ELSE +* +* Q was determined by a call to ZHPTRD with UPLO = 'L'. +* +* Unpack the vectors which define the elementary reflectors and +* set the first row and column of Q equal to those of the unit +* matrix +* + Q( 1, 1 ) = CONE + DO 40 I = 2, N + Q( I, 1 ) = CZERO + 40 CONTINUE + IJ = 3 + DO 60 J = 2, N + Q( 1, J ) = CZERO + DO 50 I = J + 1, N + Q( I, J ) = AP( IJ ) + IJ = IJ + 1 + 50 CONTINUE + IJ = IJ + 2 + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL ZUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK, + $ IINFO ) + END IF + END IF + RETURN +* +* End of ZUPGTR +* + END diff --git a/dspl/liblapack/SRC/zupmtr.f b/dspl/liblapack/SRC/zupmtr.f new file mode 100644 index 0000000..a2efa6e --- /dev/null +++ b/dspl/liblapack/SRC/zupmtr.f @@ -0,0 +1,349 @@ +*> \brief \b ZUPMTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUPMTR + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE, TRANS, UPLO +* INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUPMTR overwrites the general complex M-by-N matrix C with +*> +*> SIDE = 'L' SIDE = 'R' +*> TRANS = 'N': Q * C C * Q +*> TRANS = 'C': Q**H * C C * Q**H +*> +*> where Q is a complex unitary matrix of order nq, with nq = m if +*> SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of +*> nq-1 elementary reflectors, as returned by ZHPTRD using packed +*> storage: +*> +*> if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply Q or Q**H from the Left; +*> = 'R': apply Q or Q**H from the Right. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangular packed storage used in previous +*> call to ZHPTRD; +*> = 'L': Lower triangular packed storage used in previous +*> call to ZHPTRD. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': No transpose, apply Q; +*> = 'C': Conjugate transpose, apply Q**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. N >= 0. +*> \endverbatim +*> +*> \param[in] AP +*> \verbatim +*> AP is COMPLEX*16 array, dimension +*> (M*(M+1)/2) if SIDE = 'L' +*> (N*(N+1)/2) if SIDE = 'R' +*> The vectors which define the elementary reflectors, as +*> returned by ZHPTRD. AP is modified by the routine but +*> restored on exit. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (M-1) if SIDE = 'L' +*> or (N-1) if SIDE = 'R' +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZHPTRD. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> (M) if SIDE = 'R' +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date December 2016 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK, + $ INFO ) +* +* -- LAPACK computational routine (version 3.7.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* December 2016 +* +* .. Scalar Arguments .. + CHARACTER SIDE, TRANS, UPLO + INTEGER INFO, LDC, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 AP( * ), C( LDC, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL FORWRD, LEFT, NOTRAN, UPPER + INTEGER I, I1, I2, I3, IC, II, JC, MI, NI, NQ + COMPLEX*16 AII, TAUI +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LEFT = LSAME( SIDE, 'L' ) + NOTRAN = LSAME( TRANS, 'N' ) + UPPER = LSAME( UPLO, 'U' ) +* +* NQ is the order of Q +* + IF( LEFT ) THEN + NQ = M + ELSE + NQ = N + END IF + IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN + INFO = -1 + ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -2 + ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN + INFO = -3 + ELSE IF( M.LT.0 ) THEN + INFO = -4 + ELSE IF( N.LT.0 ) THEN + INFO = -5 + ELSE IF( LDC.LT.MAX( 1, M ) ) THEN + INFO = -9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUPMTR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.EQ.0 .OR. N.EQ.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHPTRD with UPLO = 'U' +* + FORWRD = ( LEFT .AND. NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. .NOT.NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + ELSE + MI = M + END IF +* + DO 10 I = I1, I2, I3 + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(1:i,1:n) +* + MI = I + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,1:i) +* + NI = I + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + AII = AP( II ) + AP( II ) = ONE + CALL ZLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC, + $ WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + I + 2 + ELSE + II = II - I - 1 + END IF + 10 CONTINUE + ELSE +* +* Q was determined by a call to ZHPTRD with UPLO = 'L'. +* + FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR. + $ ( .NOT.LEFT .AND. NOTRAN ) +* + IF( FORWRD ) THEN + I1 = 1 + I2 = NQ - 1 + I3 = 1 + II = 2 + ELSE + I1 = NQ - 1 + I2 = 1 + I3 = -1 + II = NQ*( NQ+1 ) / 2 - 1 + END IF +* + IF( LEFT ) THEN + NI = N + JC = 1 + ELSE + MI = M + IC = 1 + END IF +* + DO 20 I = I1, I2, I3 + AII = AP( II ) + AP( II ) = ONE + IF( LEFT ) THEN +* +* H(i) or H(i)**H is applied to C(i+1:m,1:n) +* + MI = M - I + IC = I + 1 + ELSE +* +* H(i) or H(i)**H is applied to C(1:m,i+1:n) +* + NI = N - I + JC = I + 1 + END IF +* +* Apply H(i) or H(i)**H +* + IF( NOTRAN ) THEN + TAUI = TAU( I ) + ELSE + TAUI = DCONJG( TAU( I ) ) + END IF + CALL ZLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ), + $ LDC, WORK ) + AP( II ) = AII +* + IF( FORWRD ) THEN + II = II + NQ - I + 1 + ELSE + II = II - NQ + I - 2 + END IF + 20 CONTINUE + END IF + RETURN +* +* End of ZUPMTR +* + END diff --git a/dspl/liblapack/make.inc b/dspl/liblapack/make.inc new file mode 100644 index 0000000..eef22c3 --- /dev/null +++ b/dspl/liblapack/make.inc @@ -0,0 +1,85 @@ +#################################################################### +# LAPACK make include file. # +# LAPACK, Version 3.8.0 # +# November 2017 # +#################################################################### + +SHELL = /bin/sh + +# CC is the C compiler, normally invoked with options CFLAGS. +# +CC = gcc +CFLAGS = -O3 + +# Modify the FORTRAN and OPTS definitions to refer to the compiler +# and desired compiler options for your machine. NOOPT refers to +# the compiler options desired when NO OPTIMIZATION is selected. +# +# Note: During a regular execution, LAPACK might create NaN and Inf +# and handle these quantities appropriately. As a consequence, one +# should not compile LAPACK with flags such as -ffpe-trap=overflow. +# +FORTRAN = gfortran +OPTS = -O3 -frecursive +DRVOPTS = $(OPTS) +NOOPT = -O0 -frecursive + +# Define LOADER and LOADOPTS to refer to the loader and desired +# load options for your machine. +# +LOADER = gfortran +LOADOPTS = + +# The archiver and the flag(s) to use when building an archive +# (library). If your system has no ranlib, set RANLIB = echo. +# +ARCH = ar +ARCHFLAGS = cr +RANLIB = ranlib + +# Timer for the SECOND and DSECND routines +# +# Default: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME +#TIMER = EXT_ETIME +# For RS6K: SECOND and DSECND will use a call to the +# EXTERNAL FUNCTION ETIME_ +#TIMER = EXT_ETIME_ +# For gfortran compiler: SECOND and DSECND will use a call to the +# INTERNAL FUNCTION ETIME +TIMER = INT_ETIME +# If your Fortran compiler does not provide etime (like Nag Fortran +# Compiler, etc...) SECOND and DSECND will use a call to the +# INTERNAL FUNCTION CPU_TIME +#TIMER = INT_CPU_TIME +# If none of these work, you can use the NONE value. +# In that case, SECOND and DSECND will always return 0. +#TIMER = NONE + +# Uncomment the following line to include deprecated routines in +# the LAPACK library. +# +#BUILD_DEPRECATED = Yes + +# LAPACKE has the interface to some routines from tmglib. +# If LAPACKE_WITH_TMG is defined, add those routines to LAPACKE. +# +#LAPACKE_WITH_TMG = Yes + +# Location of the extended-precision BLAS (XBLAS) Fortran library +# used for building and testing extended-precision routines. The +# relevant routines will be compiled and XBLAS will be linked only +# if USEXBLAS is defined. +# +#USEXBLAS = Yes +#XBLASLIB = -lxblas + +# The location of the libraries to which you will link. (The +# machine-specific, optimized BLAS library should be used whenever +# possible.) +# +BLASLIB = ../../librefblas.a +CBLASLIB = ../../libcblas.a +LAPACKLIB = liblapack.a +TMGLIB = libtmglib.a +LAPACKELIB = liblapacke.a diff --git a/make.inc b/make.inc new file mode 100644 index 0000000..f7d3b22 --- /dev/null +++ b/make.inc @@ -0,0 +1,59 @@ +CC = gcc +FORTRAN = gfortran +AR = ar + + +LIB_DIR = dspl + +# DSPL source and obj file path +DSPL_SRC_DIR = $(LIB_DIR)/dspl_src +DSPL_OBJ_DIR = $(LIB_DIR)/dspl_obj + +# BLAS source path +BLAS_LIB_DIR = $(LIB_DIR)/libblas/ +BLAS_SRC_DIR = $(BLAS_LIB_DIR)/SRC +BLAS_LIB_NAME = $(BLAS_LIB_DIR)/libblas.a + + +# LAPACK source path +LAPACK_LIB_DIR = $(LIB_DIR)/liblapack/ +LAPACK_SRC_DIR = $(LAPACK_LIB_DIR)/SRC +LAPACK_LIB_NAME = $(LAPACK_LIB_DIR)/liblapack.a + +#common source for DSPL and examples +COMMON_SRC_DIR = $(COMMON_DIR)/src + +INC_DIR = include +RELEASE_DIR = release/lib + +ifeq ($(OS),Windows_NT) + DSPL_LIBNAME = libdspl.dll + DEF_OS = WIN_OS + LFLAGS = -lm +else + UNAME_S := $(shell uname -s) + UNAME_P := $(shell uname -p) + ifeq ($(UNAME_S),Linux) + DSPL_LIBNAME = libdspl.so + DEF_OS = LINUX_OS + LFLAGS = -lm -ldl + else ifeq ($(UNAME_S),Darwin) + DSPL_LIBNAME = libdspl.so + DEF_OS = LINUX_OS + LFLAGS = -lm -ldl + endif +endif + +LIB_NAME = $(DSPL_LIBNAME) + + +ifeq ($(OS),Windows_NT) + MAKE = mingw32-make +else + UNAME_S := $(shell uname -s) + ifeq ($(UNAME_S),Linux) + MAKE = make + else ifeq ($(UNAME_S),Darwin) + MAKE = make + endif +endif \ No newline at end of file